initial deduplication implementation
This commit is contained in:
commit
d4ce905d1a
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
.DS_Store
|
59
index.js
Normal file
59
index.js
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
const fs = require("fs");
|
||||||
|
const { leads } = JSON.parse(fs.readFileSync("./leads.json"));
|
||||||
|
|
||||||
|
// index records by ID for easy lookup
|
||||||
|
const leadsById = {};
|
||||||
|
// index IDs by email for easy lookup
|
||||||
|
const leadIdssByEmail = {};
|
||||||
|
|
||||||
|
const collisions = [];
|
||||||
|
|
||||||
|
for (const currentLead of leads) {
|
||||||
|
const { _id, email } = currentLead;
|
||||||
|
const collidingLeadIdByEmail = leadIdssByEmail[email];
|
||||||
|
const collidingLead = collidingLeadIdByEmail
|
||||||
|
? leadsById[collidingLeadIdByEmail]
|
||||||
|
: leadsById[_id];
|
||||||
|
|
||||||
|
if (collidingLead) {
|
||||||
|
const collision = {
|
||||||
|
left: collidingLead,
|
||||||
|
right: currentLead,
|
||||||
|
collidingField: collidingLeadIdByEmail ? "email" : "_id",
|
||||||
|
};
|
||||||
|
collisions.push(collision);
|
||||||
|
const lDate = new Date(collision.left.entryDate);
|
||||||
|
const rDate = new Date(collision.right.entryDate);
|
||||||
|
if (lDate > rDate) {
|
||||||
|
// existing lead is newer than current lead
|
||||||
|
// discard current lead by doing nothing with it
|
||||||
|
collision.took = "left";
|
||||||
|
} else {
|
||||||
|
// current lead is newer than existing lead, or both leads have the same date
|
||||||
|
// either way, take the current lead over the existing one
|
||||||
|
collision.took = "right";
|
||||||
|
if (collision.collidingField === "_id") {
|
||||||
|
// colliding ID - replace ID index, delete old email in email index
|
||||||
|
delete leadIdssByEmail[collision.left.email];
|
||||||
|
leadIdssByEmail[email] = _id;
|
||||||
|
leadsById[_id] = currentLead;
|
||||||
|
} else {
|
||||||
|
// colliding email - replace ID in email index, delete old ID index
|
||||||
|
leadIdssByEmail[email] = _id;
|
||||||
|
delete leadsById[collision.left._id];
|
||||||
|
leadsById[_id] = currentLead;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// no collision
|
||||||
|
leadsById[currentLead._id] = currentLead;
|
||||||
|
leadIdssByEmail[currentLead.email] = currentLead._id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("collisions", collisions);
|
||||||
|
console.log("leadsById", leadsById);
|
||||||
|
|
||||||
|
console.log('records processed:', leads.length)
|
||||||
|
console.log('collisions:', collisions.length)
|
||||||
|
console.log('output leads:', Object.keys(leadsById).length)
|
82
leads.json
Normal file
82
leads.json
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
{"leads":[
|
||||||
|
{
|
||||||
|
"_id": "jkj238238jdsnfsj23",
|
||||||
|
"email": "foo@bar.com",
|
||||||
|
"firstName": "John",
|
||||||
|
"lastName": "Smith",
|
||||||
|
"address": "123 Street St",
|
||||||
|
"entryDate": "2014-05-07T17:30:20+00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"_id": "edu45238jdsnfsj23",
|
||||||
|
"email": "mae@bar.com",
|
||||||
|
"firstName": "Ted",
|
||||||
|
"lastName": "Masters",
|
||||||
|
"address": "44 North Hampton St",
|
||||||
|
"entryDate": "2014-05-07T17:31:20+00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"_id": "wabaj238238jdsnfsj23",
|
||||||
|
"email": "bog@bar.com",
|
||||||
|
"firstName": "Fran",
|
||||||
|
"lastName": "Jones",
|
||||||
|
"address": "8803 Dark St",
|
||||||
|
"entryDate": "2014-05-07T17:31:20+00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"_id": "jkj238238jdsnfsj23",
|
||||||
|
"email": "coo@bar.com",
|
||||||
|
"firstName": "Ted",
|
||||||
|
"lastName": "Jones",
|
||||||
|
"address": "456 Neat St",
|
||||||
|
"entryDate": "2014-05-07T17:32:20+00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"_id": "sel045238jdsnfsj23",
|
||||||
|
"email": "foo@bar.com",
|
||||||
|
"firstName": "John",
|
||||||
|
"lastName": "Smith",
|
||||||
|
"address": "123 Street St",
|
||||||
|
"entryDate": "2014-05-07T17:32:20+00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"_id": "qest38238jdsnfsj23",
|
||||||
|
"email": "foo@bar.com",
|
||||||
|
"firstName": "John",
|
||||||
|
"lastName": "Smith",
|
||||||
|
"address": "123 Street St",
|
||||||
|
"entryDate": "2014-05-07T17:32:20+00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"_id": "vug789238jdsnfsj23",
|
||||||
|
"email": "foo1@bar.com",
|
||||||
|
"firstName": "Blake",
|
||||||
|
"lastName": "Douglas",
|
||||||
|
"address": "123 Reach St",
|
||||||
|
"entryDate": "2014-05-07T17:33:20+00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"_id": "wuj08238jdsnfsj23",
|
||||||
|
"email": "foo@bar.com",
|
||||||
|
"firstName": "Micah",
|
||||||
|
"lastName": "Valmer",
|
||||||
|
"address": "123 Street St",
|
||||||
|
"entryDate": "2014-05-07T17:33:20+00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"_id": "belr28238jdsnfsj23",
|
||||||
|
"email": "mae@bar.com",
|
||||||
|
"firstName": "Tallulah",
|
||||||
|
"lastName": "Smith",
|
||||||
|
"address": "123 Water St",
|
||||||
|
"entryDate": "2014-05-07T17:33:20+00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"_id": "jkj238238jdsnfsj23",
|
||||||
|
"email": "bill@bar.com",
|
||||||
|
"firstName": "John",
|
||||||
|
"lastName": "Smith",
|
||||||
|
"address": "888 Mayberry St",
|
||||||
|
"entryDate": "2014-05-07T17:33:20+00:00"
|
||||||
|
}]
|
||||||
|
}
|
11
package.json
Normal file
11
package.json
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"name": "adobe-coding-challenge",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
|
},
|
||||||
|
"author": "",
|
||||||
|
"license": "ISC"
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user