diff --git a/.jshintrc b/.jshintrc
index ae29cd95..ecc8f3f8 100644
--- a/.jshintrc
+++ b/.jshintrc
@@ -2,7 +2,7 @@
"node": true,
"curly": true,
"eqeqeq": true,
- "esversion": 6,
+ "esversion": 9,
"freeze": true,
"immed": true,
"indent": 2,
diff --git a/lib/Queries.js b/lib/Queries.js
index a25d3069..9ff8b331 100644
--- a/lib/Queries.js
+++ b/lib/Queries.js
@@ -80,7 +80,8 @@ module.exports.hasSubject = function( subject, cb ){
}
};
-module.exports.matchSubjectDistinctSubjectIds = function( subject, cb ){
+module.exports.matchSubjectDistinctSubjectIds = function( phrase, cb ){
+ let subject = phrase.phrase;
var isPartialToken = subject.slice(-1) === PARTIAL_TOKEN_SUFFIX;
// no-op for empty string
diff --git a/lib/Result.js b/lib/Result.js
index ce81b0f0..bde21ee0 100644
--- a/lib/Result.js
+++ b/lib/Result.js
@@ -35,15 +35,15 @@ function Result( group, done ){
}
Result.prototype.getSubject = function(){
- return this.group[ this.pos.subject ];
+ return this.group[ this.pos.subject ] && this.group[ this.pos.subject ].phrase;
};
Result.prototype.getObject = function(){
- return this.group[ this.pos.object ];
+ return this.group[ this.pos.object ] && this.group[ this.pos.object ].phrase;
};
Result.prototype.getPreviousObject = function(){
- return this.group[ this.pos.prev_object ];
+ return this.group[ this.pos.prev_object ] && this.group[ this.pos.prev_object ].phrase;
};
Result.prototype.getIdsAsArray = function(){
diff --git a/package.json b/package.json
index dc24dd13..a5d6d48f 100644
--- a/package.json
+++ b/package.json
@@ -6,7 +6,7 @@
},
"author": "mapzen",
"license": "MIT",
- "main": "server.js",
+ "main": "server/http.js",
"scripts": {
"test": "npm run units",
"units": "./cmd/units",
diff --git a/prototype/query.js b/prototype/query.js
index 8839d48d..4ad0473e 100644
--- a/prototype/query.js
+++ b/prototype/query.js
@@ -133,7 +133,7 @@ function _queryManyGroups( index, groups, done ){
function query( text, done ){
this.tokenize( text, function( err, groups ){
-
+
switch( groups.length ){
// in a failure case we didnt find any groups; abort now
diff --git a/prototype/tokenize.js b/prototype/tokenize.js
index 44603420..df97894f 100644
--- a/prototype/tokenize.js
+++ b/prototype/tokenize.js
@@ -112,8 +112,17 @@ function _groups(tokens, phrases) {
// select the longest matching phrase
if( !_isArrayRangeIsEqual( tokens, phrase, t ) ){ continue; }
+ const before = tokens.slice(0, t).join(' ');
+ const after = tokens.slice(t + phrase.length).join(' ');
+
// add the match to the groups array
- groups.push( phrase.join(' ') );
+ groups.push( {
+ phrase: phrase.join(' '),
+ remainder: {
+ before: before ? before : undefined,
+ after: after ? after : undefined,
+ }
+ });
// advance the iterator to skip any other words in the phrase
t += phrase.length -1;
diff --git a/server/demo/index.html b/server/demo/index.html
index 3c2f2b65..ee31948a 100644
--- a/server/demo/index.html
+++ b/server/demo/index.html
@@ -1,7 +1,6 @@
-
@@ -70,11 +69,10 @@
results = results || [];
// load token groups
- tokenize( args, function( groups ){
- groups = groups || [];
-
+ tokenize( args, function( response ){
+ console.log(response);
// render results
- render( results, groups );
+ render( results, response.groups || [] );
});
});
}
@@ -124,11 +122,25 @@
return v;
})
+ const queryBox = [
+ ``,
+ result.query ? `- ${result.query}
` : '',
+ `-
+ ${result.phrase}
+
+
`].join('');
+
+ view = [
+ queryBox,
+ ...view
+ ]
+
// console.log( lins[i] );
$("#results").append('' + view.join('
') + '');
}
function render( results, groups ){
+ console.log(groups);
$('#results').empty();
$('#tokens').empty();
@@ -136,8 +148,9 @@
// display token groups
groups.forEach( function( win ){
- var buttons = win.map( function( token ){
- return '' + token + '';
+ let buttons = [];
+ win.forEach( function( token ){
+ buttons.push('' + token.phrase + '');
});
$("#tokens").html('' + buttons.join('\n') + '
' );
});
@@ -189,7 +202,7 @@
function tokenize( args, cb ){
console.info( 'tokenize', args );
- request( '/parser/tokenize', args, cb );
+ request( '/parser/tokenize2', args, cb );
}
function clearMap(){
@@ -315,7 +328,9 @@
+ Possible tokens
+ Search Results
diff --git a/server/http.js b/server/http.js
index 3735dfd7..89af493a 100644
--- a/server/http.js
+++ b/server/http.js
@@ -95,6 +95,7 @@ app.get( '/parser/search', require( './routes/search' ) );
app.get( '/parser/findbyid', require( './routes/findbyid' ) );
app.get( '/parser/query', require( './routes/query' ) );
app.get( '/parser/tokenize', require( './routes/tokenize' ) );
+app.get( '/parser/tokenize2', require( './routes/tokenize2' ) );
// demo page
app.use('/demo', express.static( __dirname + '/demo' ));
diff --git a/server/routes/base_tokenize.js b/server/routes/base_tokenize.js
new file mode 100644
index 00000000..1ffcc995
--- /dev/null
+++ b/server/routes/base_tokenize.js
@@ -0,0 +1,26 @@
+
+const PARTIAL_TOKEN_SUFFIX = require('../../lib/analysis').PARTIAL_TOKEN_SUFFIX;
+
+module.exports = function( req, cb ){
+
+ // placeholder
+ var ph = req.app.locals.ph;
+
+ // input text
+ var text = req.query.text || '';
+
+ // live mode (autocomplete-style search)
+ // we append a byte indicating the last word is potentially incomplete.
+ // except where the last token is a space, then we simply trim the space.
+ if( req.query.mode === 'live' ){
+ if( ' ' === text.slice(-1) ){
+ text = text.trim();
+ } else {
+ text += PARTIAL_TOKEN_SUFFIX;
+ }
+ }
+
+ ph.tokenize( text, ( err, groups ) => {
+ cb(err, groups);
+ });
+};
diff --git a/server/routes/search.js b/server/routes/search.js
index ab7a46e8..9f293e8f 100644
--- a/server/routes/search.js
+++ b/server/routes/search.js
@@ -61,10 +61,15 @@ module.exports = function( req, res ){
// create a map of parents
const parents = rowsToIdMap( parentResults );
+ const firstUsedGroupIndex = result.mask.indexOf(true);
+
// map documents to dict using id as key
- const docs = documents.map( function( result ){
- return mapResult( ph, result, parents, lang );
- });
+ const docs = documents.map( (doc) => ({
+ phrase: result.group.slice(firstUsedGroupIndex).map(g => g.phrase).join(' '),
+ query: result.group[firstUsedGroupIndex].remainder.before,
+ ...mapResult( ph, doc, parents, lang ),
+ })
+ );
// sort documents according to sorting rules
docs.sort( sortingAlgorithm );
diff --git a/server/routes/tokenize.js b/server/routes/tokenize.js
index ace32097..96aa7272 100644
--- a/server/routes/tokenize.js
+++ b/server/routes/tokenize.js
@@ -1,26 +1,10 @@
-
-const PARTIAL_TOKEN_SUFFIX = require('../../lib/analysis').PARTIAL_TOKEN_SUFFIX;
+const base_tokenize = require('./base_tokenize');
module.exports = function( req, res ){
-
- // placeholder
- var ph = req.app.locals.ph;
-
- // input text
- var text = req.query.text || '';
-
- // live mode (autocomplete-style search)
- // we append a byte indicating the last word is potentially incomplete.
- // except where the last token is a space, then we simply trim the space.
- if( req.query.mode === 'live' ){
- if( ' ' === text.slice(-1) ){
- text = text.trim();
- } else {
- text += PARTIAL_TOKEN_SUFFIX;
- }
- }
-
- ph.tokenize( text, ( err, groups ) => {
- res.status(200).json( groups );
+ base_tokenize(req, (err, groups) => {
+ // for the legacy endpoint, send back a bare string[][]
+ // an array of groups of phrases:
+ // - ex: "pizza new york ny" -> [["new york", "ny"]]
+ res.status(200).json( groups.map((group) => group.map(g => g.phrase)) );
});
};
diff --git a/server/routes/tokenize2.js b/server/routes/tokenize2.js
new file mode 100644
index 00000000..3eb4f8ed
--- /dev/null
+++ b/server/routes/tokenize2.js
@@ -0,0 +1,13 @@
+const base_tokenize = require('./base_tokenize');
+
+module.exports = function( req, res ){
+ base_tokenize(req, (err, groups) => {
+ // for the tokenize2 endpoint, send back a json dict
+ // {phrase: string, remainder: {before: string, after: string}}[][]
+ // with an array of groups of phrase objects
+
+ res.status(200).json({
+ groups
+ });
+ });
+};
diff --git a/test/lib/Result.js b/test/lib/Result.js
index 2d5dbfdd..70dea13b 100644
--- a/test/lib/Result.js
+++ b/test/lib/Result.js
@@ -53,10 +53,18 @@ module.exports.getSubject = function(test, common) {
const res = new Result();
t.equal(res.getSubject(), undefined);
- const res2 = new Result(['a','b','c']);
+ const res2 = new Result([
+ { phrase: 'a' },
+ { phrase: 'b' },
+ { phrase: 'c' }
+ ]);
t.equal(res2.getSubject(), 'b');
- const res3 = new Result(['a','b','c']);
+ const res3 = new Result([
+ { phrase: 'a' },
+ { phrase: 'b' },
+ { phrase: 'c' }
+ ]);
res3.pos.subject = 0;
t.equal(res3.getSubject(), 'a');
@@ -69,10 +77,18 @@ module.exports.getObject = function(test, common) {
const res = new Result();
t.equal(res.getObject(), undefined);
- const res2 = new Result(['a','b','c']);
+ const res2 = new Result([
+ { phrase: 'a' },
+ { phrase: 'b' },
+ { phrase: 'c' }
+ ]);
t.equal(res2.getObject(), 'c');
- const res3 = new Result(['a','b','c']);
+ const res3 = new Result([
+ { phrase: 'a' },
+ { phrase: 'b' },
+ { phrase: 'c' }
+ ]);
res3.pos.object = 1;
t.equal(res3.getObject(), 'b');
@@ -85,10 +101,18 @@ module.exports.getPreviousObject = function(test, common) {
const res = new Result();
t.equal(res.getPreviousObject(), undefined);
- const res2 = new Result(['a','b','c']);
+ const res2 = new Result([
+ { phrase: 'a' },
+ { phrase: 'b' },
+ { phrase: 'c' }
+ ]);
t.equal(res2.getPreviousObject(), undefined);
- const res3 = new Result(['a','b','c']);
+ const res3 = new Result([
+ { phrase: 'a' },
+ { phrase: 'b' },
+ { phrase: 'c' }
+ ]);
res3.pos.prev_object = 1;
t.equal(res3.getPreviousObject(), 'b');
diff --git a/test/prototype/query.js b/test/prototype/query.js
index 8fd942d0..627c3bfd 100644
--- a/test/prototype/query.js
+++ b/test/prototype/query.js
@@ -76,7 +76,11 @@ module.exports._queryGroup = function(test, common) {
});
test('_queryGroup - multiple tokens - no matches', function(t) {
- const group = ['hello world', 'test', 'foo bar'];
+ const group = [
+ {phrase: 'hello world'},
+ {phrase: 'test'},
+ {phrase: 'foo bar'}
+ ];
t.plan(10);
const index = {
@@ -110,7 +114,11 @@ module.exports._queryGroup = function(test, common) {
});
test('_queryGroup - multiple tokens - matches', function(t) {
- const group = ['hello world', 'test', 'foo bar'];
+ const group = [
+ {phrase: 'hello world'},
+ {phrase: 'test'},
+ {phrase: 'foo bar'}
+ ];
t.plan(7);
const index = {
diff --git a/test/prototype/tokenize.js b/test/prototype/tokenize.js
index cb5d4de2..88b21ac7 100644
--- a/test/prototype/tokenize.js
+++ b/test/prototype/tokenize.js
@@ -80,7 +80,11 @@ module.exports._eachSynonym = function(test, common) {
test('_eachSynonym', function(t) {
const synonym = ['hello', 'big', 'bright', 'new', 'world'];
- const expected = [ 'hello big', 'bright', 'new world' ];
+ const expected = [
+ { phrase: 'hello big', remainder: { before: undefined, after: 'bright new world' } },
+ { phrase: 'bright', remainder: { before: 'hello big', after: 'new world' } },
+ { phrase: 'new world', remainder: { before: 'hello big bright', after: undefined, } }
+ ];
var mock = tokenize._eachSynonym.bind({
index: { hasSubject: ( phrase, cb ) => {
@@ -172,7 +176,11 @@ module.exports._groups = function(test, common) {
'south wales','new south wales', 'wales', 'north', 'sydney',
'north sydney', 'south', 'au'
];
- const expected = ['north sydney', 'new south wales', 'au'];
+ const expected = [
+ { phrase: 'north sydney', remainder: { before: undefined, after: 'new south wales au' } },
+ { phrase: 'new south wales', remainder: { before: 'north sydney', after: 'au' } },
+ { phrase: 'au', remainder: { before: 'north sydney new south wales', after: undefined } }
+ ];
t.deepEqual(tokenize._groups(tokens, phrases), expected);
t.end();