const fs = require("fs"); const { leads } = JSON.parse(fs.readFileSync("./leads.json")); // index records by ID for easy lookup const leadsById = {}; // index IDs by email for easy lookup const leadIdssByEmail = {}; const collisions = []; for (const currentLead of leads) { const { _id, email } = currentLead; const collidingLeadIdByEmail = leadIdssByEmail[email]; const collidingLead = collidingLeadIdByEmail ? leadsById[collidingLeadIdByEmail] : leadsById[_id]; if (collidingLead) { const collision = { left: collidingLead, right: currentLead, collidingField: collidingLeadIdByEmail ? "email" : "_id", }; collisions.push(collision); const lDate = new Date(collision.left.entryDate); const rDate = new Date(collision.right.entryDate); if (lDate > rDate) { // existing lead is newer than current lead // discard current lead by doing nothing with it collision.took = "left"; } else { // current lead is newer than existing lead, or both leads have the same date // either way, take the current lead over the existing one collision.took = "right"; if (collision.collidingField === "_id") { // colliding ID - replace ID index, delete old email in email index delete leadIdssByEmail[collision.left.email]; leadIdssByEmail[email] = _id; leadsById[_id] = currentLead; } else { // colliding email - replace ID in email index, delete old ID index leadIdssByEmail[email] = _id; delete leadsById[collision.left._id]; leadsById[_id] = currentLead; } } } else { // no collision leadsById[currentLead._id] = currentLead; leadIdssByEmail[currentLead.email] = currentLead._id; } } console.log("collisions", collisions); console.log("leadsById", leadsById); console.log('records processed:', leads.length) console.log('collisions:', collisions.length) console.log('output leads:', Object.keys(leadsById).length)