Store discarded values on leads and write to file
This commit is contained in:
parent
d4ce905d1a
commit
7453de7257
3
.gitignore
vendored
3
.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
|
deduplicatedLeads.json
|
38
index.js
38
index.js
@ -10,6 +10,7 @@ const collisions = [];
|
|||||||
|
|
||||||
for (const currentLead of leads) {
|
for (const currentLead of leads) {
|
||||||
const { _id, email } = currentLead;
|
const { _id, email } = currentLead;
|
||||||
|
|
||||||
const collidingLeadIdByEmail = leadIdssByEmail[email];
|
const collidingLeadIdByEmail = leadIdssByEmail[email];
|
||||||
const collidingLead = collidingLeadIdByEmail
|
const collidingLead = collidingLeadIdByEmail
|
||||||
? leadsById[collidingLeadIdByEmail]
|
? leadsById[collidingLeadIdByEmail]
|
||||||
@ -28,20 +29,39 @@ for (const currentLead of leads) {
|
|||||||
// existing lead is newer than current lead
|
// existing lead is newer than current lead
|
||||||
// discard current lead by doing nothing with it
|
// discard current lead by doing nothing with it
|
||||||
collision.took = "left";
|
collision.took = "left";
|
||||||
|
|
||||||
|
const discardedValues = [...(collision.right.discardedValues || [])];
|
||||||
|
delete collision.right.discardedValues;
|
||||||
|
discardedValues.push(collision.right);
|
||||||
|
const lead = {
|
||||||
|
...collidingLead,
|
||||||
|
discardedValues,
|
||||||
|
};
|
||||||
|
leadsById[lead._id] = lead;
|
||||||
} else {
|
} else {
|
||||||
// current lead is newer than existing lead, or both leads have the same date
|
// current lead is newer than existing lead, or both leads have the same date
|
||||||
// either way, take the current lead over the existing one
|
// either way, take the current lead over the existing one
|
||||||
collision.took = "right";
|
collision.took = "right";
|
||||||
|
|
||||||
|
const discardedValues = [...(collision.left.discardedValues || [])];
|
||||||
|
delete collision.left.discardedValues;
|
||||||
|
discardedValues.push(collision.left);
|
||||||
|
const lead = {
|
||||||
|
...currentLead,
|
||||||
|
discardedValues,
|
||||||
|
};
|
||||||
|
|
||||||
|
// rewrite indices by which field collides
|
||||||
if (collision.collidingField === "_id") {
|
if (collision.collidingField === "_id") {
|
||||||
// colliding ID - replace ID index, delete old email in email index
|
// colliding ID - replace ID index, delete old email in email index
|
||||||
delete leadIdssByEmail[collision.left.email];
|
delete leadIdssByEmail[collision.left.email];
|
||||||
leadIdssByEmail[email] = _id;
|
leadIdssByEmail[email] = _id;
|
||||||
leadsById[_id] = currentLead;
|
leadsById[_id] = lead;
|
||||||
} else {
|
} else {
|
||||||
// colliding email - replace ID in email index, delete old ID index
|
// colliding email - replace ID in email index, delete old ID index
|
||||||
leadIdssByEmail[email] = _id;
|
leadIdssByEmail[email] = _id;
|
||||||
delete leadsById[collision.left._id];
|
delete leadsById[collision.left._id];
|
||||||
leadsById[_id] = currentLead;
|
leadsById[_id] = lead;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -51,9 +71,13 @@ for (const currentLead of leads) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log("collisions", collisions);
|
fs.writeFileSync(
|
||||||
console.log("leadsById", leadsById);
|
"./deduplicatedLeads.json",
|
||||||
|
JSON.stringify(Object.values(leadsById), null, 2)
|
||||||
|
);
|
||||||
|
|
||||||
console.log('records processed:', leads.length)
|
console.log("records processed:", leads.length);
|
||||||
console.log('collisions:', collisions.length)
|
console.log("collisions:", collisions.length);
|
||||||
console.log('output leads:', Object.keys(leadsById).length)
|
console.log("output leads:", Object.keys(leadsById).length);
|
||||||
|
|
||||||
|
console.log("collisions", collisions);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user