diff --git a/.jshintrc b/.jshintrc index ae29cd95..ecc8f3f8 100644 --- a/.jshintrc +++ b/.jshintrc @@ -2,7 +2,7 @@ "node": true, "curly": true, "eqeqeq": true, - "esversion": 6, + "esversion": 9, "freeze": true, "immed": true, "indent": 2, diff --git a/lib/Queries.js b/lib/Queries.js index a25d3069..9ff8b331 100644 --- a/lib/Queries.js +++ b/lib/Queries.js @@ -80,7 +80,8 @@ module.exports.hasSubject = function( subject, cb ){ } }; -module.exports.matchSubjectDistinctSubjectIds = function( subject, cb ){ +module.exports.matchSubjectDistinctSubjectIds = function( phrase, cb ){ + let subject = phrase.phrase; var isPartialToken = subject.slice(-1) === PARTIAL_TOKEN_SUFFIX; // no-op for empty string diff --git a/lib/Result.js b/lib/Result.js index ce81b0f0..bde21ee0 100644 --- a/lib/Result.js +++ b/lib/Result.js @@ -35,15 +35,15 @@ function Result( group, done ){ } Result.prototype.getSubject = function(){ - return this.group[ this.pos.subject ]; + return this.group[ this.pos.subject ] && this.group[ this.pos.subject ].phrase; }; Result.prototype.getObject = function(){ - return this.group[ this.pos.object ]; + return this.group[ this.pos.object ] && this.group[ this.pos.object ].phrase; }; Result.prototype.getPreviousObject = function(){ - return this.group[ this.pos.prev_object ]; + return this.group[ this.pos.prev_object ] && this.group[ this.pos.prev_object ].phrase; }; Result.prototype.getIdsAsArray = function(){ diff --git a/package.json b/package.json index dc24dd13..a5d6d48f 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,7 @@ }, "author": "mapzen", "license": "MIT", - "main": "server.js", + "main": "server/http.js", "scripts": { "test": "npm run units", "units": "./cmd/units", diff --git a/prototype/query.js b/prototype/query.js index 8839d48d..4ad0473e 100644 --- a/prototype/query.js +++ b/prototype/query.js @@ -133,7 +133,7 @@ function _queryManyGroups( index, groups, done ){ function query( text, done ){ this.tokenize( text, function( err, groups ){ - + switch( groups.length ){ // in a failure case we didnt find any groups; abort now diff --git a/prototype/tokenize.js b/prototype/tokenize.js index 44603420..df97894f 100644 --- a/prototype/tokenize.js +++ b/prototype/tokenize.js @@ -112,8 +112,17 @@ function _groups(tokens, phrases) { // select the longest matching phrase if( !_isArrayRangeIsEqual( tokens, phrase, t ) ){ continue; } + const before = tokens.slice(0, t).join(' '); + const after = tokens.slice(t + phrase.length).join(' '); + // add the match to the groups array - groups.push( phrase.join(' ') ); + groups.push( { + phrase: phrase.join(' '), + remainder: { + before: before ? before : undefined, + after: after ? after : undefined, + } + }); // advance the iterator to skip any other words in the phrase t += phrase.length -1; diff --git a/server/demo/index.html b/server/demo/index.html index 3c2f2b65..ee31948a 100644 --- a/server/demo/index.html +++ b/server/demo/index.html @@ -1,7 +1,6 @@ - @@ -70,11 +69,10 @@ results = results || []; // load token groups - tokenize( args, function( groups ){ - groups = groups || []; - + tokenize( args, function( response ){ + console.log(response); // render results - render( results, groups ); + render( results, response.groups || [] ); }); }); } @@ -124,11 +122,25 @@ return v; }) + const queryBox = [ + ``].join(''); + + view = [ + queryBox, + ...view + ] + // console.log( lins[i] ); $("#results").append('
  • ' + view.join('
    ') + '
  • '); } function render( results, groups ){ + console.log(groups); $('#results').empty(); $('#tokens').empty(); @@ -136,8 +148,9 @@ // display token groups groups.forEach( function( win ){ - var buttons = win.map( function( token ){ - return '
  • ' + token + '
  • '; + let buttons = []; + win.forEach( function( token ){ + buttons.push('
  • ' + token.phrase + '
  • '); }); $("#tokens").html('
  • ' ); }); @@ -189,7 +202,7 @@ function tokenize( args, cb ){ console.info( 'tokenize', args ); - request( '/parser/tokenize', args, cb ); + request( '/parser/tokenize2', args, cb ); } function clearMap(){ @@ -315,7 +328,9 @@ +

    Possible tokens

    +

    Search Results

    diff --git a/server/http.js b/server/http.js index 3735dfd7..89af493a 100644 --- a/server/http.js +++ b/server/http.js @@ -95,6 +95,7 @@ app.get( '/parser/search', require( './routes/search' ) ); app.get( '/parser/findbyid', require( './routes/findbyid' ) ); app.get( '/parser/query', require( './routes/query' ) ); app.get( '/parser/tokenize', require( './routes/tokenize' ) ); +app.get( '/parser/tokenize2', require( './routes/tokenize2' ) ); // demo page app.use('/demo', express.static( __dirname + '/demo' )); diff --git a/server/routes/base_tokenize.js b/server/routes/base_tokenize.js new file mode 100644 index 00000000..1ffcc995 --- /dev/null +++ b/server/routes/base_tokenize.js @@ -0,0 +1,26 @@ + +const PARTIAL_TOKEN_SUFFIX = require('../../lib/analysis').PARTIAL_TOKEN_SUFFIX; + +module.exports = function( req, cb ){ + + // placeholder + var ph = req.app.locals.ph; + + // input text + var text = req.query.text || ''; + + // live mode (autocomplete-style search) + // we append a byte indicating the last word is potentially incomplete. + // except where the last token is a space, then we simply trim the space. + if( req.query.mode === 'live' ){ + if( ' ' === text.slice(-1) ){ + text = text.trim(); + } else { + text += PARTIAL_TOKEN_SUFFIX; + } + } + + ph.tokenize( text, ( err, groups ) => { + cb(err, groups); + }); +}; diff --git a/server/routes/search.js b/server/routes/search.js index ab7a46e8..9f293e8f 100644 --- a/server/routes/search.js +++ b/server/routes/search.js @@ -61,10 +61,15 @@ module.exports = function( req, res ){ // create a map of parents const parents = rowsToIdMap( parentResults ); + const firstUsedGroupIndex = result.mask.indexOf(true); + // map documents to dict using id as key - const docs = documents.map( function( result ){ - return mapResult( ph, result, parents, lang ); - }); + const docs = documents.map( (doc) => ({ + phrase: result.group.slice(firstUsedGroupIndex).map(g => g.phrase).join(' '), + query: result.group[firstUsedGroupIndex].remainder.before, + ...mapResult( ph, doc, parents, lang ), + }) + ); // sort documents according to sorting rules docs.sort( sortingAlgorithm ); diff --git a/server/routes/tokenize.js b/server/routes/tokenize.js index ace32097..96aa7272 100644 --- a/server/routes/tokenize.js +++ b/server/routes/tokenize.js @@ -1,26 +1,10 @@ - -const PARTIAL_TOKEN_SUFFIX = require('../../lib/analysis').PARTIAL_TOKEN_SUFFIX; +const base_tokenize = require('./base_tokenize'); module.exports = function( req, res ){ - - // placeholder - var ph = req.app.locals.ph; - - // input text - var text = req.query.text || ''; - - // live mode (autocomplete-style search) - // we append a byte indicating the last word is potentially incomplete. - // except where the last token is a space, then we simply trim the space. - if( req.query.mode === 'live' ){ - if( ' ' === text.slice(-1) ){ - text = text.trim(); - } else { - text += PARTIAL_TOKEN_SUFFIX; - } - } - - ph.tokenize( text, ( err, groups ) => { - res.status(200).json( groups ); + base_tokenize(req, (err, groups) => { + // for the legacy endpoint, send back a bare string[][] + // an array of groups of phrases: + // - ex: "pizza new york ny" -> [["new york", "ny"]] + res.status(200).json( groups.map((group) => group.map(g => g.phrase)) ); }); }; diff --git a/server/routes/tokenize2.js b/server/routes/tokenize2.js new file mode 100644 index 00000000..3eb4f8ed --- /dev/null +++ b/server/routes/tokenize2.js @@ -0,0 +1,13 @@ +const base_tokenize = require('./base_tokenize'); + +module.exports = function( req, res ){ + base_tokenize(req, (err, groups) => { + // for the tokenize2 endpoint, send back a json dict + // {phrase: string, remainder: {before: string, after: string}}[][] + // with an array of groups of phrase objects + + res.status(200).json({ + groups + }); + }); +}; diff --git a/test/lib/Result.js b/test/lib/Result.js index 2d5dbfdd..70dea13b 100644 --- a/test/lib/Result.js +++ b/test/lib/Result.js @@ -53,10 +53,18 @@ module.exports.getSubject = function(test, common) { const res = new Result(); t.equal(res.getSubject(), undefined); - const res2 = new Result(['a','b','c']); + const res2 = new Result([ + { phrase: 'a' }, + { phrase: 'b' }, + { phrase: 'c' } + ]); t.equal(res2.getSubject(), 'b'); - const res3 = new Result(['a','b','c']); + const res3 = new Result([ + { phrase: 'a' }, + { phrase: 'b' }, + { phrase: 'c' } + ]); res3.pos.subject = 0; t.equal(res3.getSubject(), 'a'); @@ -69,10 +77,18 @@ module.exports.getObject = function(test, common) { const res = new Result(); t.equal(res.getObject(), undefined); - const res2 = new Result(['a','b','c']); + const res2 = new Result([ + { phrase: 'a' }, + { phrase: 'b' }, + { phrase: 'c' } + ]); t.equal(res2.getObject(), 'c'); - const res3 = new Result(['a','b','c']); + const res3 = new Result([ + { phrase: 'a' }, + { phrase: 'b' }, + { phrase: 'c' } + ]); res3.pos.object = 1; t.equal(res3.getObject(), 'b'); @@ -85,10 +101,18 @@ module.exports.getPreviousObject = function(test, common) { const res = new Result(); t.equal(res.getPreviousObject(), undefined); - const res2 = new Result(['a','b','c']); + const res2 = new Result([ + { phrase: 'a' }, + { phrase: 'b' }, + { phrase: 'c' } + ]); t.equal(res2.getPreviousObject(), undefined); - const res3 = new Result(['a','b','c']); + const res3 = new Result([ + { phrase: 'a' }, + { phrase: 'b' }, + { phrase: 'c' } + ]); res3.pos.prev_object = 1; t.equal(res3.getPreviousObject(), 'b'); diff --git a/test/prototype/query.js b/test/prototype/query.js index 8fd942d0..627c3bfd 100644 --- a/test/prototype/query.js +++ b/test/prototype/query.js @@ -76,7 +76,11 @@ module.exports._queryGroup = function(test, common) { }); test('_queryGroup - multiple tokens - no matches', function(t) { - const group = ['hello world', 'test', 'foo bar']; + const group = [ + {phrase: 'hello world'}, + {phrase: 'test'}, + {phrase: 'foo bar'} + ]; t.plan(10); const index = { @@ -110,7 +114,11 @@ module.exports._queryGroup = function(test, common) { }); test('_queryGroup - multiple tokens - matches', function(t) { - const group = ['hello world', 'test', 'foo bar']; + const group = [ + {phrase: 'hello world'}, + {phrase: 'test'}, + {phrase: 'foo bar'} + ]; t.plan(7); const index = { diff --git a/test/prototype/tokenize.js b/test/prototype/tokenize.js index cb5d4de2..88b21ac7 100644 --- a/test/prototype/tokenize.js +++ b/test/prototype/tokenize.js @@ -80,7 +80,11 @@ module.exports._eachSynonym = function(test, common) { test('_eachSynonym', function(t) { const synonym = ['hello', 'big', 'bright', 'new', 'world']; - const expected = [ 'hello big', 'bright', 'new world' ]; + const expected = [ + { phrase: 'hello big', remainder: { before: undefined, after: 'bright new world' } }, + { phrase: 'bright', remainder: { before: 'hello big', after: 'new world' } }, + { phrase: 'new world', remainder: { before: 'hello big bright', after: undefined, } } + ]; var mock = tokenize._eachSynonym.bind({ index: { hasSubject: ( phrase, cb ) => { @@ -172,7 +176,11 @@ module.exports._groups = function(test, common) { 'south wales','new south wales', 'wales', 'north', 'sydney', 'north sydney', 'south', 'au' ]; - const expected = ['north sydney', 'new south wales', 'au']; + const expected = [ + { phrase: 'north sydney', remainder: { before: undefined, after: 'new south wales au' } }, + { phrase: 'new south wales', remainder: { before: 'north sydney', after: 'au' } }, + { phrase: 'au', remainder: { before: 'north sydney new south wales', after: undefined } } + ]; t.deepEqual(tokenize._groups(tokens, phrases), expected); t.end();