diff --git a/.gitignore b/.gitignore index 496ee2c..175f857 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -.DS_Store \ No newline at end of file +.DS_Store +deduplicatedLeads.json \ No newline at end of file diff --git a/index.js b/index.js index d51a185..19b7ecf 100644 --- a/index.js +++ b/index.js @@ -10,6 +10,7 @@ const collisions = []; for (const currentLead of leads) { const { _id, email } = currentLead; + const collidingLeadIdByEmail = leadIdssByEmail[email]; const collidingLead = collidingLeadIdByEmail ? leadsById[collidingLeadIdByEmail] @@ -28,20 +29,39 @@ for (const currentLead of leads) { // existing lead is newer than current lead // discard current lead by doing nothing with it collision.took = "left"; + + const discardedValues = [...(collision.right.discardedValues || [])]; + delete collision.right.discardedValues; + discardedValues.push(collision.right); + const lead = { + ...collidingLead, + discardedValues, + }; + leadsById[lead._id] = lead; } else { // current lead is newer than existing lead, or both leads have the same date // either way, take the current lead over the existing one collision.took = "right"; + + const discardedValues = [...(collision.left.discardedValues || [])]; + delete collision.left.discardedValues; + discardedValues.push(collision.left); + const lead = { + ...currentLead, + discardedValues, + }; + + // rewrite indices by which field collides if (collision.collidingField === "_id") { // colliding ID - replace ID index, delete old email in email index delete leadIdssByEmail[collision.left.email]; leadIdssByEmail[email] = _id; - leadsById[_id] = currentLead; + leadsById[_id] = lead; } else { // colliding email - replace ID in email index, delete old ID index leadIdssByEmail[email] = _id; delete leadsById[collision.left._id]; - leadsById[_id] = currentLead; + leadsById[_id] = lead; } } } else { @@ -51,9 +71,13 @@ for (const currentLead of leads) { } } -console.log("collisions", collisions); -console.log("leadsById", leadsById); +fs.writeFileSync( + "./deduplicatedLeads.json", + JSON.stringify(Object.values(leadsById), null, 2) +); -console.log('records processed:', leads.length) -console.log('collisions:', collisions.length) -console.log('output leads:', Object.keys(leadsById).length) \ No newline at end of file +console.log("records processed:", leads.length); +console.log("collisions:", collisions.length); +console.log("output leads:", Object.keys(leadsById).length); + +console.log("collisions", collisions);