1var duplicates = [];
2db.collectionName.aggregate([
3 { $match: {
4 name: { "$ne": '' } // discard selection criteria
5 }},
6 { $group: {
7 _id: { name: "$name"}, // can be grouped on multiple properties
8 dups: { "$addToSet": "$_id" },
9 count: { "$sum": 1 }
10 }},
11 { $match: {
12 count: { "$gt": 1 } // Duplicates considered as count greater than one
13 }}
14],
15{allowDiskUse: true} // For faster processing if set is larger
16) // You can display result until this and check duplicates
17.forEach(function(doc) {
18 doc.dups.shift(); // First element skipped for deleting
19 doc.dups.forEach( function(dupId){
20 duplicates.push(dupId); // Getting all duplicate ids
21 }
22 )
23})
24// If you want to Check all "_id" which you are deleting else print statement not needed
25printjson(duplicates);
26// Remove all duplicates in one go
27db.collectionName.remove({_id:{$in:duplicates}})