diff --git a/nodejs/debug.js b/nodejs/debug.js new file mode 100644 index 0000000..0d7cc35 --- /dev/null +++ b/nodejs/debug.js @@ -0,0 +1,50 @@ +const Promise = require('bluebird'); +const request = Promise.promisifyAll(require('request')); + +const addressParser = require('../web/src/lib/address-parser'); + +if (process.argv.length < 2) { + process.exit(1); +} + +const address = process.argv[2]; +const n = 100; +const URL = 'https://www.als.ogcio.gov.hk/lookup'; + + +request.getAsync(URL, { + headers: { + Accept: 'application/json' + }, + qs: { + q: address, + n + }, + json: {} +}).then(res => { + return addressParser.searchResult(address, res.body); +}).then( results => { + results.forEach((result, index) => { + if (index < 5) { + console.log("================================================") + console.log(JSON.stringify(result.chi, null ,2)); + console.log(`score: ${result.score}`); + for (const match of result.matches) { + console.log(` -----------------------`); + console.log(` key: ${match.matchedKey}`); + console.log(` matched: ${match.matchedWords}`); + console.log(` score: ${addressParser.calculateScoreFromMatches([match])}`); + + } + console.log("================================================") + console.log("") + console.log("") + } + + }) + + +}).catch(error => { + console.log(error.stack); +}) + diff --git a/nodejs/package-lock.json b/nodejs/package-lock.json index d25b56a..c25b0e1 100644 --- a/nodejs/package-lock.json +++ b/nodejs/package-lock.json @@ -40,7 +40,6 @@ "version": "6.5.5", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.5.5.tgz", "integrity": "sha512-7q7gtRQDJSyuEHjuVgHoUa2VuemFiCMrfQc9Tc08XTAc4Zj/5U1buQJ0HU6i7fKjXU09SVgSmxa4sLvuvS8Iyg==", - "dev": true, "requires": { "fast-deep-equal": "^2.0.1", "fast-json-stable-stringify": "^2.0.0", @@ -84,12 +83,40 @@ "sprintf-js": "~1.0.2" } }, + "asn1": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.4.tgz", + "integrity": "sha512-jxwzQpLQjSmWXgwaCZE9Nz+glAG01yF1QnWgbhGwHI5A6FRIEY6IVqtHhIepHqI7/kyEyQEagBC5mBEFlIYvdg==", + "requires": { + "safer-buffer": "~2.1.0" + } + }, + "assert-plus": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz", + "integrity": "sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=" + }, "assertion-error": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-1.1.0.tgz", "integrity": "sha512-jgsaNduz+ndvGyFt3uSuWqvy4lCnIJiovtouQN5JZHOKCS2QuhEdbcQHFhVksz2N2U9hXJo8odG7ETyWlEeuDw==", "dev": true }, + "asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=" + }, + "aws-sign2": { + "version": "0.7.0", + "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz", + "integrity": "sha1-tG6JCTSpWR8tL2+G1+ap8bP+dqg=" + }, + "aws4": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.8.0.tgz", + "integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==" + }, "babel-code-frame": { "version": "6.26.0", "resolved": "https://registry.npmjs.org/babel-code-frame/-/babel-code-frame-6.26.0.tgz", @@ -155,6 +182,14 @@ "integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=", "dev": true }, + "bcrypt-pbkdf": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", + "integrity": "sha1-pDAdOJtqQ/m2f/PKEaP2Y342Dp4=", + "requires": { + "tweetnacl": "^0.14.3" + } + }, "bluebird": { "version": "3.5.3", "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.5.3.tgz", @@ -203,6 +238,11 @@ "integrity": "sha1-r6uWJikQp/M8GaV3WCXGnzTjUMo=", "dev": true }, + "caseless": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz", + "integrity": "sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=" + }, "chai": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/chai/-/chai-4.2.0.tgz", @@ -282,6 +322,14 @@ "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=", "dev": true }, + "combined-stream": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.7.tgz", + "integrity": "sha512-brWl9y6vOB1xYPZcpZde3N9zDByXTosAeMDo4p1wzo6UMOX4vumB+TP1RZ76sfE6Md68Q0NJSrE/gbezd4Ul+w==", + "requires": { + "delayed-stream": "~1.0.0" + } + }, "commander": { "version": "2.19.0", "resolved": "https://registry.npmjs.org/commander/-/commander-2.19.0.tgz", @@ -314,8 +362,7 @@ "core-util-is": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", - "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=", - "dev": true + "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" }, "cross-spawn": { "version": "6.0.5", @@ -330,6 +377,14 @@ "which": "^1.2.9" } }, + "dashdash": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", + "integrity": "sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA=", + "requires": { + "assert-plus": "^1.0.0" + } + }, "debug": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/debug/-/debug-4.1.0.tgz", @@ -363,6 +418,11 @@ "object-keys": "^1.0.12" } }, + "delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=" + }, "diff": { "version": "3.5.0", "resolved": "https://registry.npmjs.org/diff/-/diff-3.5.0.tgz", @@ -378,6 +438,15 @@ "esutils": "^2.0.2" } }, + "ecc-jsbn": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz", + "integrity": "sha1-OoOpBOVDUyh4dMVkt1SThoSamMk=", + "requires": { + "jsbn": "~0.1.0", + "safer-buffer": "^2.1.0" + } + }, "error-ex": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", @@ -687,6 +756,11 @@ "integrity": "sha1-Cr9PHKpbyx96nYrMbepPqqBLrJs=", "dev": true }, + "extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" + }, "external-editor": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/external-editor/-/external-editor-3.0.3.tgz", @@ -698,17 +772,20 @@ "tmp": "^0.0.33" } }, + "extsprintf": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz", + "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=" + }, "fast-deep-equal": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz", - "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=", - "dev": true + "integrity": "sha1-ewUhjd+WZ79/Nwv3/bLLFf3Qqkk=" }, "fast-json-stable-stringify": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.0.0.tgz", - "integrity": "sha1-1RQsDK7msRifh9OnYREGT4bIu/I=", - "dev": true + "integrity": "sha1-1RQsDK7msRifh9OnYREGT4bIu/I=" }, "fast-levenshtein": { "version": "2.0.6", @@ -757,6 +834,21 @@ "write": "^0.2.1" } }, + "forever-agent": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz", + "integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=" + }, + "form-data": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz", + "integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==", + "requires": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.6", + "mime-types": "^2.1.12" + } + }, "fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", @@ -781,6 +873,14 @@ "integrity": "sha1-6td0q+5y4gQJQzoGY2YCPdaIekE=", "dev": true }, + "getpass": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/getpass/-/getpass-0.1.7.tgz", + "integrity": "sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo=", + "requires": { + "assert-plus": "^1.0.0" + } + }, "glob": { "version": "7.1.3", "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.3.tgz", @@ -813,6 +913,20 @@ "integrity": "sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA==", "dev": true }, + "har-schema": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/har-schema/-/har-schema-2.0.0.tgz", + "integrity": "sha1-qUwiJOvKwEeCoNkDVSHyRzW37JI=" + }, + "har-validator": { + "version": "5.1.3", + "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-5.1.3.tgz", + "integrity": "sha512-sNvOCzEQNr/qrvJgc3UG/kD4QtlHycrzwS+6mfTrrSq97BvaYcPZZI1ZSqGSPR73Cxn4LKTD4PttRwfU7jWq5g==", + "requires": { + "ajv": "^6.5.5", + "har-schema": "^2.0.0" + } + }, "has": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/has/-/has-1.0.3.tgz", @@ -863,6 +977,16 @@ "integrity": "sha512-7T/BxH19zbcCTa8XkMlbK5lTo1WtgkFi3GvdWEyNuc4Vex7/9Dqbnpsf4JMydcfj9HCg4zUWFTL3Za6lapg5/w==", "dev": true }, + "http-signature": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.2.0.tgz", + "integrity": "sha1-muzZJRFHcvPZW2WmCruPfBj7rOE=", + "requires": { + "assert-plus": "^1.0.0", + "jsprim": "^1.2.2", + "sshpk": "^1.7.0" + } + }, "iconv-lite": { "version": "0.4.24", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", @@ -984,6 +1108,11 @@ "has-symbols": "^1.0.0" } }, + "is-typedarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz", + "integrity": "sha1-5HnICFjfDBsR3dppQPlgEfzaSpo=" + }, "isarray": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", @@ -996,6 +1125,11 @@ "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=", "dev": true }, + "isstream": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz", + "integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=" + }, "js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", @@ -1012,11 +1146,20 @@ "esprima": "^4.0.0" } }, + "jsbn": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", + "integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM=" + }, + "json-schema": { + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz", + "integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=" + }, "json-schema-traverse": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", - "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", - "dev": true + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" }, "json-stable-stringify-without-jsonify": { "version": "1.0.1", @@ -1024,6 +1167,22 @@ "integrity": "sha1-nbe1lJatPzz+8wp1FC0tkwrXJlE=", "dev": true }, + "json-stringify-safe": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", + "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=" + }, + "jsprim": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz", + "integrity": "sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=", + "requires": { + "assert-plus": "1.0.0", + "extsprintf": "1.3.0", + "json-schema": "0.2.3", + "verror": "1.10.0" + } + }, "levn": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/levn/-/levn-0.3.0.tgz", @@ -1080,6 +1239,19 @@ "yallist": "^3.0.2" } }, + "mime-db": { + "version": "1.37.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.37.0.tgz", + "integrity": "sha512-R3C4db6bgQhlIhPU48fUtdVmKnflq+hRdad7IyKhtFj06VPNVdk2RhiYL3UjQIlso8L+YxAtFkobT0VK+S/ybg==" + }, + "mime-types": { + "version": "2.1.21", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.21.tgz", + "integrity": "sha512-3iL6DbwpyLzjR3xHSFNFeb9Nz/M8WDkX33t1GFQnFOllWk8pOrh/LSrB5OXlnlW5P9LH73X6loW/eogc+F5lJg==", + "requires": { + "mime-db": "~1.37.0" + } + }, "mimic-fn": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-1.2.0.tgz", @@ -1211,6 +1383,11 @@ "validate-npm-package-license": "^3.0.1" } }, + "oauth-sign": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.9.0.tgz", + "integrity": "sha512-fexhUFFPTGV8ybAtSIGbV6gOkSv8UtRbDBnAyLQw4QPKkgNlsH2ByPGtMUqdWkos6YCRmAqViwgZrJc/mRDzZQ==" + }, "object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", @@ -1366,6 +1543,11 @@ "integrity": "sha1-uULm1L3mUwBe9rcTYd74cn0GReA=", "dev": true }, + "performance-now": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz", + "integrity": "sha1-Ywn04OX6kT7BxpMHrjZLSzd8nns=" + }, "pify": { "version": "2.3.0", "resolved": "http://registry.npmjs.org/pify/-/pify-2.3.0.tgz", @@ -1426,11 +1608,20 @@ "integrity": "sha1-8FKijacOYYkX7wqKw0wa5aaChrM=", "dev": true }, + "psl": { + "version": "1.1.29", + "resolved": "https://registry.npmjs.org/psl/-/psl-1.1.29.tgz", + "integrity": "sha512-AeUmQ0oLN02flVHXWh9sSJF7mcdFq0ppid/JkErufc3hGIV/AMa8Fo9VgDo/cT2jFdOWoFvHp90qqBH54W+gjQ==" + }, "punycode": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", - "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", - "dev": true + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==" + }, + "qs": { + "version": "6.5.2", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz", + "integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA==" }, "ramda": { "version": "0.25.0", @@ -1491,6 +1682,33 @@ "integrity": "sha512-lv0M6+TkDVniA3aD1Eg0DVpfU/booSu7Eev3TDO/mZKHBfVjgCGTV4t4buppESEYDtkArYFOxTJWv6S5C+iaNw==", "dev": true }, + "request": { + "version": "2.88.0", + "resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz", + "integrity": "sha512-NAqBSrijGLZdM0WZNsInLJpkJokL72XYjUpnB0iwsRgxh7dB6COrHnTBNwN0E+lHDAJzu7kLAkDeY08z2/A0hg==", + "requires": { + "aws-sign2": "~0.7.0", + "aws4": "^1.8.0", + "caseless": "~0.12.0", + "combined-stream": "~1.0.6", + "extend": "~3.0.2", + "forever-agent": "~0.6.1", + "form-data": "~2.3.2", + "har-validator": "~5.1.0", + "http-signature": "~1.2.0", + "is-typedarray": "~1.0.0", + "isstream": "~0.1.2", + "json-stringify-safe": "~5.0.1", + "mime-types": "~2.1.19", + "oauth-sign": "~0.9.0", + "performance-now": "^2.1.0", + "qs": "~6.5.2", + "safe-buffer": "^5.1.2", + "tough-cookie": "~2.4.3", + "tunnel-agent": "^0.6.0", + "uuid": "^3.3.2" + } + }, "require-uncached": { "version": "1.0.3", "resolved": "http://registry.npmjs.org/require-uncached/-/require-uncached-1.0.3.tgz", @@ -1780,14 +1998,12 @@ "safe-buffer": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", - "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", - "dev": true + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" }, "safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "dev": true + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" }, "semver": { "version": "5.6.0", @@ -1863,6 +2079,22 @@ "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=", "dev": true }, + "sshpk": { + "version": "1.15.2", + "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.15.2.tgz", + "integrity": "sha512-Ra/OXQtuh0/enyl4ETZAfTaeksa6BXks5ZcjpSUNrjBr0DvrJKX+1fsKDPpT9TBXgHAFsa4510aNVgI8g/+SzA==", + "requires": { + "asn1": "~0.2.3", + "assert-plus": "^1.0.0", + "bcrypt-pbkdf": "^1.0.0", + "dashdash": "^1.12.0", + "ecc-jsbn": "~0.1.1", + "getpass": "^0.1.1", + "jsbn": "~0.1.0", + "safer-buffer": "^2.0.2", + "tweetnacl": "~0.14.0" + } + }, "string-width": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", @@ -1945,12 +2177,41 @@ "os-tmpdir": "~1.0.2" } }, + "tough-cookie": { + "version": "2.4.3", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.4.3.tgz", + "integrity": "sha512-Q5srk/4vDM54WJsJio3XNn6K2sCG+CQ8G5Wz6bZhRZoAe/+TxjWB/GlFAnYEbkYVlON9FMk/fE3h2RLpPXo4lQ==", + "requires": { + "psl": "^1.1.24", + "punycode": "^1.4.1" + }, + "dependencies": { + "punycode": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", + "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=" + } + } + }, "tslib": { "version": "1.9.3", "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.9.3.tgz", "integrity": "sha512-4krF8scpejhaOgqzBEcGM7yDIEfi0/8+8zDRZhNZZ2kjmHJ4hv3zCbQWxoJGz1iw5U0Jl0nma13xzHXcncMavQ==", "dev": true }, + "tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0=", + "requires": { + "safe-buffer": "^5.0.1" + } + }, + "tweetnacl": { + "version": "0.14.5", + "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz", + "integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=" + }, "type-check": { "version": "0.3.2", "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz", @@ -1976,7 +2237,6 @@ "version": "4.2.2", "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.2.2.tgz", "integrity": "sha512-KY9Frmirql91X2Qgjry0Wd4Y+YTdrdZheS8TFwvkbLWf/G5KNJDCh6pKL5OZctEW4+0Baa5idK2ZQuELRwPznQ==", - "dev": true, "requires": { "punycode": "^2.1.0" } @@ -1987,6 +2247,11 @@ "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=", "dev": true }, + "uuid": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.3.2.tgz", + "integrity": "sha512-yXJmeNaw3DnnKAOKJE51sL/ZaYfWJRl1pK9dr19YFCu0ObS231AB1/LbqTKRAQ5kw8A90rA6fr4riOUpTZvQZA==" + }, "validate-npm-package-license": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz", @@ -1997,6 +2262,16 @@ "spdx-expression-parse": "^3.0.0" } }, + "verror": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/verror/-/verror-1.10.0.tgz", + "integrity": "sha1-OhBcoXBTr1XW4nDB+CiGguGNpAA=", + "requires": { + "assert-plus": "^1.0.0", + "core-util-is": "1.0.2", + "extsprintf": "^1.2.0" + } + }, "which": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz", diff --git a/nodejs/package.json b/nodejs/package.json index b318647..288e817 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -7,7 +7,8 @@ }, "dependencies": { "bluebird": "^3.5.2", - "commander": "^2.18.0" + "commander": "^2.18.0", + "request": "^2.88.0" }, "devDependencies": { "chai": "^4.1.2", diff --git a/nodejs/test/test-address-parser.js b/nodejs/test/test-address-parser.js index c877896..e09f849 100755 --- a/nodejs/test/test-address-parser.js +++ b/nodejs/test/test-address-parser.js @@ -100,5 +100,23 @@ describe('cli', () => { expect(parsedResult.score).to.be.a('number'); } }); + + it('should find the longest match without overlapping', async () => { + const findMaximumNonOverlappingMatches = getFunc('findMaximumNonOverlappingMatches'); + const addressToTest = '香港中環皇后大道中80號H QUEEN\'S 23樓'; + const matches = [{ + matchedKey: 'Street', + matchedWords: ['皇后大道中'], + confident: 1 + },{ + matchedKey: 'Building', + matchedWords: ['皇后大道中'], + confident: 1 + }]; + + const maximumMatches = findMaximumNonOverlappingMatches(addressToTest, matches); + expect(maximumMatches.length).to.be.eq(1); + }); + }); }); diff --git a/web/src/lib/address-parser.js b/web/src/lib/address-parser.js index 43c338b..b1992c8 100644 --- a/web/src/lib/address-parser.js +++ b/web/src/lib/address-parser.js @@ -6,7 +6,11 @@ const { dcDistrict } = require('../utils/constants'); const CONFIDENT_ALL_MATCH = 1.0; -const CONFIDENT_MATCH_NAME = 0.5; +const CONFIDENT_MULTIPLIER_NAME_ONLY = 0.5; +const CONFIDENT_MULTIPLIER_OPPOSITE_STREET = 0.75; +const CONFIDENT_MULTIPLIER_FULL_STREET_MATCH = 1.5; + + const CONFIDENT_REVERSE_MATCH = 0.9; const OGCIO_KEY_BLOCK = 'Block'; @@ -27,6 +31,8 @@ const SCORE_SCHEME = { [OGCIO_KEY_BLOCK]: 20, }; +const SCORE_PER_MATCHED_CHAR = 0.1; + // priority in asscending order const elementPriority = [OGCIO_KEY_BUILDING_NAME, OGCIO_KEY_BLOCK, OGCIO_KEY_PHASE, OGCIO_KEY_ESTATE, OGCIO_KEY_VILLAGE , OGCIO_KEY_STREET, OGCIO_KEY_REGION]; @@ -34,9 +40,11 @@ const elementPriority = [OGCIO_KEY_BUILDING_NAME, OGCIO_KEY_BLOCK, OGCIO_KEY_PHA const log = console.log; // eslint-disable-line class Match { - constructor(confident, matchedKey) { + constructor(confident, matchedKey, matchedWords) { this.confident = confident; this.matchedKey = matchedKey; + // array of words that matched + this.matchedWords = matchedWords; } } @@ -64,21 +72,30 @@ function dcDistrictMapping(val, isChinese) { * @param {*} string * @param {*} stringToSearch */ -function partialMatch(string, stringToSearch) { +function findPartialMatch(string, stringToSearch) { + const match = { + matchPercentage: 0, + matchedWord: null + }; // some exceptional case if the word from OGCIO contains directly the search address, we consider it as a full match if (stringToSearch.indexOf(string) >= 0) { - return CONFIDENT_REVERSE_MATCH; - } - - for (let i = 0; i < stringToSearch.length; i ++) { - for (let end = stringToSearch.length; end > i; end --) { - const substring = stringToSearch.substring(i, end); - if (string.includes(substring)) { - return (substring.length * 1.0 / stringToSearch.length) + match.matchPercentage = 0.9; + match.matchedWord = string; + } else { + masterLoop: + for (let i = 0; i < stringToSearch.length; i ++) { + for (let end = stringToSearch.length; end > i; end --) { + const substring = stringToSearch.substring(i, end); + if (string.includes(substring)) { + match.matchPercentage = (substring.length * 1.0 / stringToSearch.length); + match.matchedWord = substring; + break masterLoop; + } } } } - return 0; + + return match; } /** @@ -147,6 +164,41 @@ function splitValueForSpaceIfChinese(value) { return value; } + +function matchAllMatchedWords(address, matchedWords) { + return matchedWords.map(word => address.includes(word)).reduce((p,c) => p && c, true); +} +/** + * Find the longest set of matches that has highest score and not overlapping + * @param {*} address + * @param {*} matches + */ +function findMaximumNonOverlappingMatches(address, matches) { + if (matches.length === 1) { + if (matches[0].matchedWord !== null && matchAllMatchedWords(address, matches[0].matchedWords)) { + return matches; + } + return []; + } + + let longestMatchScore = 0; + let longestMatch = []; + for (const match of matches) { + if (matchAllMatchedWords(address, match.matchedWords)) { + let subAddress = address; + match.matchedWords.forEach(word => subAddress = subAddress.replace(word, '')); + const localLongestMatch = findMaximumNonOverlappingMatches(subAddress, matches.filter(m => m.matchedKey !== match.matchedKey)); + localLongestMatch.push(match); + const score = calculateScoreFromMatches(localLongestMatch); + if (score > longestMatchScore) { + longestMatchScore = score; + longestMatch = localLongestMatch; + } + } + } + return longestMatch; +} + /** * To calcutate the final confident with partical match * @param {*} confident @@ -156,8 +208,18 @@ function modifyConfidentByPartialMatchPercentage(confident, matchPercentage) { return confident * matchPercentage * matchPercentage; } -function searchSimilarityForStreetOrVillage(type, address, BuildingNoFrom, BuildingNoTo) { - const sim = new Match(CONFIDENT_ALL_MATCH, type) +function searchSimilarityForStreetOrVillage(type, address, addressToSearch, BuildingNoFrom, BuildingNoTo) { + const sim = new Match(0, type, []); + if (address.includes(addressToSearch)) { + sim.confident = CONFIDENT_ALL_MATCH; + sim.matchedWords.push(addressToSearch); + } else { + const { matchPercentage, matchedWord } = findPartialMatch(address, addressToSearch); + if (matchPercentage > 0) { + sim.confident = modifyConfidentByPartialMatchPercentage(CONFIDENT_ALL_MATCH, matchPercentage); + sim.matchedWords.push(matchedWord); + } + } // total match of the streetname if (BuildingNoFrom) { const from = parseInt(BuildingNoFrom, 10); @@ -166,29 +228,42 @@ function searchSimilarityForStreetOrVillage(type, address, BuildingNoFrom, Build // If the street name and also the street no. is matched. we should give it a very high score if (from === to) { if (!tryToMatchAnyNumber(address, from)) { - sim.confident = CONFIDENT_MATCH_NAME; + if (tryToMatchRangeOfNumber(address, from, to, !isOdd)) { + // ratio 1 + sim.confident *= CONFIDENT_MULTIPLIER_OPPOSITE_STREET; + } else { + sim.confident *= CONFIDENT_MULTIPLIER_NAME_ONLY; + } } else { - sim.confident *= 1.5; + sim.matchedWords.push(from + ''); + sim.confident *= CONFIDENT_MULTIPLIER_FULL_STREET_MATCH; } } else { if (!tryToMatchRangeOfNumber(address, from, to, isOdd)) { - sim.confident = CONFIDENT_MATCH_NAME; + // Try to look up at opposite street + if (tryToMatchRangeOfNumber(address, from, to, !isOdd)) { + // ratio 1 + sim.confident *= CONFIDENT_MULTIPLIER_OPPOSITE_STREET; + } else { + sim.confident *= CONFIDENT_MULTIPLIER_NAME_ONLY; + } } else { - sim.confident *= 1.5; + // TODO: cannot mark the street/village number that we have came across + sim.confident *= CONFIDENT_MULTIPLIER_FULL_STREET_MATCH; } } } else { - sim.confident = CONFIDENT_MATCH_NAME; + sim.confident *= CONFIDENT_MULTIPLIER_NAME_ONLY; } return sim; } -function searchOccuranceForBlock(address, { BlockDescriptor, BlockNo, BuildingName}) { - if (address.includes(BuildingName)) { - const match = new Match(CONFIDENT_ALL_MATCH, OGCIO_KEY_BLOCK); +function searchOccuranceForBlock(address, { BlockDescriptor, BlockNo}) { + if (address.includes(BlockNo + BlockDescriptor)) { + const match = new Match(CONFIDENT_ALL_MATCH, OGCIO_KEY_BLOCK, [BlockNo, BlockDescriptor]); if (BlockNo) { if (!tryToMatchAnyNumber(address, parseInt(BlockNo, 10))) { - match.confident = CONFIDENT_MATCH_NAME; + match.confident = CONFIDENT_MULTIPLIER_NAME_ONLY; } } return match; @@ -197,11 +272,11 @@ function searchOccuranceForBlock(address, { BlockDescriptor, BlockNo, BuildingNa } function searchOccuranceForPhase(address, { PhaseNo, PhaseName}) { - if (address.includes(PhaseName)) { - const match = new Match(CONFIDENT_ALL_MATCH, OGCIO_KEY_PHASE); + if (address.includes(PhaseName + PhaseNo)) { + const match = new Match(CONFIDENT_ALL_MATCH, OGCIO_KEY_PHASE, [PhaseNo, PhaseName]); if (PhaseNo) { if (!tryToMatchAnyNumber(address, parseInt(PhaseNo, 10))) { - match.confident = CONFIDENT_MATCH_NAME; + match.confident = CONFIDENT_MULTIPLIER_NAME_ONLY; } } return match; @@ -211,25 +286,25 @@ function searchOccuranceForPhase(address, { PhaseNo, PhaseName}) { function searchOccuranceForEstate(address, { EstateName }) { if (address.includes(EstateName)) { - return new Match(CONFIDENT_ALL_MATCH, OGCIO_KEY_ESTATE); + return new Match(CONFIDENT_ALL_MATCH, OGCIO_KEY_ESTATE, [EstateName]); } return null; } function searchOccuranceForRegion(address, region) { if (address.includes(region)) { - return new Match(CONFIDENT_ALL_MATCH, OGCIO_KEY_REGION); + return new Match(CONFIDENT_ALL_MATCH, OGCIO_KEY_REGION, [region]); } return null; } function searchOccuranceForBuildingName(address, buildingName) { if (address.includes(buildingName)) { - return new Match(CONFIDENT_ALL_MATCH, OGCIO_KEY_BUILDING_NAME); + return new Match(CONFIDENT_ALL_MATCH, OGCIO_KEY_BUILDING_NAME, [buildingName]); } else { - const matchPercentage = partialMatch(address, buildingName); + const { matchPercentage, matchedWord } = findPartialMatch(address, buildingName); if (matchPercentage > 0) { - const match = new Match(CONFIDENT_ALL_MATCH, OGCIO_KEY_BUILDING_NAME); + const match = new Match(CONFIDENT_ALL_MATCH, OGCIO_KEY_BUILDING_NAME, [matchedWord]); match.confident = modifyConfidentByPartialMatchPercentage(match.confident, matchPercentage); return match; } @@ -241,34 +316,13 @@ function searchOccuranceForBuildingName(address, buildingName) { function searchOccuranceForStreet(address, {StreetName, BuildingNoFrom, BuildingNoTo}) { const streetsToTest = splitValueForSpaceIfChinese(StreetName); - if (address.includes(streetsToTest)) { - return searchSimilarityForStreetOrVillage(OGCIO_KEY_STREET, address, BuildingNoFrom, BuildingNoTo); - } else { - const matchPercentage = partialMatch(address, StreetName); - if (matchPercentage > 0) { - const match = searchSimilarityForStreetOrVillage(OGCIO_KEY_STREET, address, BuildingNoFrom, BuildingNoTo); - match.confident = modifyConfidentByPartialMatchPercentage(match.confident, matchPercentage); - return match; - } - } - - return null; + return searchSimilarityForStreetOrVillage(OGCIO_KEY_STREET, address, streetsToTest, BuildingNoFrom, BuildingNoTo); } function searchOccuranceForVillage(address, {VillageName, BuildingNoFrom, BuildingNoTo}) { const streetsToTest = splitValueForSpaceIfChinese(VillageName); - if (address.includes(streetsToTest)) { - return searchSimilarityForStreetOrVillage(OGCIO_KEY_VILLAGE, address, BuildingNoFrom, BuildingNoTo); - } else { - const matchPercentage = partialMatch(address, VillageName); - if (matchPercentage > 0) { - const match = searchSimilarityForStreetOrVillage(OGCIO_KEY_VILLAGE, address, BuildingNoFrom, BuildingNoTo); - match.confident = modifyConfidentByPartialMatchPercentage(match.confident, matchPercentage); - return match; - } - } - return null; + return searchSimilarityForStreetOrVillage(OGCIO_KEY_VILLAGE, address, streetsToTest, BuildingNoFrom, BuildingNoTo); } /** @@ -290,13 +344,14 @@ function searchOccurance(address, ogcioRecordElementKey, ogcioRecordElement) { function calculateScoreFromMatches(matches) { let score = 0; for (const match of matches) { - score += SCORE_SCHEME[match.matchedKey] * match.confident; + score += SCORE_SCHEME[match.matchedKey] * match.confident + + (match.matchedWords.map(word => word.length).reduce((p,c) => p + c, 0) * SCORE_PER_MATCHED_CHAR * SCORE_SCHEME[match.matchedKey]); } return score; } function findMatchFromOGCIORecord(address, ogcioRecord) { - const matchedPhrase = []; + const matches = []; // First we look up everything that exists in that address for (const key of elementPriority) { @@ -307,7 +362,7 @@ function findMatchFromOGCIORecord(address, ogcioRecord) { if (occurance === null) { continue; } - matchedPhrase.push(occurance); + matches.push(occurance); } if (ogcioRecord.eng[key] !== undefined && !isChinese(address)) { @@ -315,10 +370,10 @@ function findMatchFromOGCIORecord(address, ogcioRecord) { if (occurance === null) { continue; } - matchedPhrase.push(occurance); + matches.push(occurance); } } - return matchedPhrase; + return findMaximumNonOverlappingMatches(address, matches); } function transformDistrict(ogcioRecord) { @@ -359,4 +414,4 @@ async function searchResult(address, responseFromOGCIO) { } // node.js exports -module.exports = { searchResult }; +module.exports = { searchResult, calculateScoreFromMatches };