Store discarded values on leads and write to file
This commit is contained in:
parent
d4ce905d1a
commit
7453de7257
3
.gitignore
vendored
3
.gitignore
vendored
@ -1 +1,2 @@
|
||||
.DS_Store
|
||||
.DS_Store
|
||||
deduplicatedLeads.json
|
38
index.js
38
index.js
@ -10,6 +10,7 @@ const collisions = [];
|
||||
|
||||
for (const currentLead of leads) {
|
||||
const { _id, email } = currentLead;
|
||||
|
||||
const collidingLeadIdByEmail = leadIdssByEmail[email];
|
||||
const collidingLead = collidingLeadIdByEmail
|
||||
? leadsById[collidingLeadIdByEmail]
|
||||
@ -28,20 +29,39 @@ for (const currentLead of leads) {
|
||||
// existing lead is newer than current lead
|
||||
// discard current lead by doing nothing with it
|
||||
collision.took = "left";
|
||||
|
||||
const discardedValues = [...(collision.right.discardedValues || [])];
|
||||
delete collision.right.discardedValues;
|
||||
discardedValues.push(collision.right);
|
||||
const lead = {
|
||||
...collidingLead,
|
||||
discardedValues,
|
||||
};
|
||||
leadsById[lead._id] = lead;
|
||||
} else {
|
||||
// current lead is newer than existing lead, or both leads have the same date
|
||||
// either way, take the current lead over the existing one
|
||||
collision.took = "right";
|
||||
|
||||
const discardedValues = [...(collision.left.discardedValues || [])];
|
||||
delete collision.left.discardedValues;
|
||||
discardedValues.push(collision.left);
|
||||
const lead = {
|
||||
...currentLead,
|
||||
discardedValues,
|
||||
};
|
||||
|
||||
// rewrite indices by which field collides
|
||||
if (collision.collidingField === "_id") {
|
||||
// colliding ID - replace ID index, delete old email in email index
|
||||
delete leadIdssByEmail[collision.left.email];
|
||||
leadIdssByEmail[email] = _id;
|
||||
leadsById[_id] = currentLead;
|
||||
leadsById[_id] = lead;
|
||||
} else {
|
||||
// colliding email - replace ID in email index, delete old ID index
|
||||
leadIdssByEmail[email] = _id;
|
||||
delete leadsById[collision.left._id];
|
||||
leadsById[_id] = currentLead;
|
||||
leadsById[_id] = lead;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -51,9 +71,13 @@ for (const currentLead of leads) {
|
||||
}
|
||||
}
|
||||
|
||||
console.log("collisions", collisions);
|
||||
console.log("leadsById", leadsById);
|
||||
fs.writeFileSync(
|
||||
"./deduplicatedLeads.json",
|
||||
JSON.stringify(Object.values(leadsById), null, 2)
|
||||
);
|
||||
|
||||
console.log('records processed:', leads.length)
|
||||
console.log('collisions:', collisions.length)
|
||||
console.log('output leads:', Object.keys(leadsById).length)
|
||||
console.log("records processed:", leads.length);
|
||||
console.log("collisions:", collisions.length);
|
||||
console.log("output leads:", Object.keys(leadsById).length);
|
||||
|
||||
console.log("collisions", collisions);
|
||||
|
Loading…
x
Reference in New Issue
Block a user