diff --git a/.gitignore b/.gitignore index 175f857..1025d5f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .DS_Store -deduplicatedLeads.json \ No newline at end of file +deduplicatedLeads.json +node_modules \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..03abce8 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +Requires Node 12+ + +```sh +npm i # install dependencies +node index.js # run script +``` \ No newline at end of file diff --git a/index.js b/index.js index bc0199e..6ef71a7 100755 --- a/index.js +++ b/index.js @@ -1,5 +1,7 @@ #! /usr/bin/env node const fs = require("fs"); +const chalk = require("chalk"); + const { leads } = JSON.parse(fs.readFileSync("./leads.json")); // index records by ID for easy lookup @@ -7,6 +9,8 @@ const leadsById = {}; // index IDs by email for easy lookup const leadIdssByEmail = {}; +const collisions = []; + // deduplicate leads for (const currentLead of leads) { const { _id, email } = currentLead; @@ -22,6 +26,7 @@ for (const currentLead of leads) { right: currentLead, collidingField: collidingLeadIdByEmail ? "email" : "_id", }; + collisions.push(collision); const lDate = new Date(collision.left.entryDate); const rDate = new Date(collision.right.entryDate); if (lDate > rDate) { @@ -31,7 +36,7 @@ for (const currentLead of leads) { const discardedValues = [...(collision.right.discardedValues || [])]; delete collision.right.discardedValues; - discardedValues.push(collision.right); + discardedValues.unshift(collision.right); const lead = { ...collidingLead, discardedValues, @@ -44,7 +49,7 @@ for (const currentLead of leads) { const discardedValues = [...(collision.left.discardedValues || [])]; delete collision.left.discardedValues; - discardedValues.push(collision.left); + discardedValues.unshift(collision.left); const lead = { ...currentLead, discardedValues, @@ -70,6 +75,41 @@ for (const currentLead of leads) { } } +const printPropRedIfDiff = (prop, val) => (object) => + object[prop] !== val && console.log("\t\t", chalk.bgRed(object[prop])); + +const prettyPrintItem = ({ + _id, + email, + firstName, + lastName, + address, + entryDate, + discardedValues, +}) => { + console.log("------"); + console.log("_id:\t\t", _id); + discardedValues && discardedValues.forEach(printPropRedIfDiff("_id", _id)); + console.log("email:\t\t", email); + discardedValues && + discardedValues.forEach(printPropRedIfDiff("email", email)); + console.log("firstName:\t", firstName); + discardedValues && + discardedValues.forEach(printPropRedIfDiff("firstName", firstName)); + console.log("lastName:\t", lastName); + discardedValues && + discardedValues.forEach(printPropRedIfDiff("lastName", lastName)); + console.log("address:\t", address); + discardedValues && + discardedValues.forEach(printPropRedIfDiff("address", address)); + console.log("entryDate:\t", entryDate); + discardedValues && + discardedValues.forEach(printPropRedIfDiff("entryDate", entryDate)); + console.log(); +}; + +Object.values(leadsById).forEach(prettyPrintItem); + fs.writeFileSync( "./deduplicatedLeads.json", JSON.stringify(Object.values(leadsById), null, 2) @@ -79,5 +119,3 @@ console.log("records processed:", leads.length); console.log("collisions:", collisions.length); console.log("output leads:", Object.keys(leadsById).length); console.log("leads written to deduplicatedLeads.json"); - -console.log("collisions", collisions); diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..f27db35 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,125 @@ +{ + "name": "lead-deduplication", + "version": "1.0.0", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "name": "lead-deduplication", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "chalk": "^4.1.2" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + }, + "node_modules/has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "engines": { + "node": ">=8" + } + }, + "node_modules/supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + } + }, + "dependencies": { + "ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "requires": { + "color-convert": "^2.0.1" + } + }, + "chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "requires": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + } + }, + "color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "requires": { + "color-name": "~1.1.4" + } + }, + "color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + }, + "has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==" + }, + "supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "requires": { + "has-flag": "^4.0.0" + } + } + } +} diff --git a/package.json b/package.json index 9c1d5ae..2ade9ac 100644 --- a/package.json +++ b/package.json @@ -7,5 +7,8 @@ "run": "node index.js" }, "author": "", - "license": "ISC" + "license": "ISC", + "dependencies": { + "chalk": "^4.1.2" + } }