Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(query-splitting) first try at returning "query" in geocode split in placeholder #192

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .jshintrc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"node": true,
"curly": true,
"eqeqeq": true,
"esversion": 6,
"esversion": 9,
"freeze": true,
"immed": true,
"indent": 2,
Expand Down
3 changes: 2 additions & 1 deletion lib/Queries.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ module.exports.hasSubject = function( subject, cb ){
}
};

module.exports.matchSubjectDistinctSubjectIds = function( subject, cb ){
module.exports.matchSubjectDistinctSubjectIds = function( phrase, cb ){
let subject = phrase.phrase;
var isPartialToken = subject.slice(-1) === PARTIAL_TOKEN_SUFFIX;

// no-op for empty string
Expand Down
6 changes: 3 additions & 3 deletions lib/Result.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ function Result( group, done ){
}

Result.prototype.getSubject = function(){
return this.group[ this.pos.subject ];
return this.group[ this.pos.subject ] && this.group[ this.pos.subject ].phrase;
};

Result.prototype.getObject = function(){
return this.group[ this.pos.object ];
return this.group[ this.pos.object ] && this.group[ this.pos.object ].phrase;
};

Result.prototype.getPreviousObject = function(){
return this.group[ this.pos.prev_object ];
return this.group[ this.pos.prev_object ] && this.group[ this.pos.prev_object ].phrase;
};

Result.prototype.getIdsAsArray = function(){
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
},
"author": "mapzen",
"license": "MIT",
"main": "server.js",
"main": "server/http.js",
"scripts": {
"test": "npm run units",
"units": "./cmd/units",
Expand Down
2 changes: 1 addition & 1 deletion prototype/query.js
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ function _queryManyGroups( index, groups, done ){

function query( text, done ){
this.tokenize( text, function( err, groups ){

switch( groups.length ){

// in a failure case we didnt find any groups; abort now
Expand Down
11 changes: 10 additions & 1 deletion prototype/tokenize.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,17 @@ function _groups(tokens, phrases) {
// select the longest matching phrase
if( !_isArrayRangeIsEqual( tokens, phrase, t ) ){ continue; }

const before = tokens.slice(0, t).join(' ');
const after = tokens.slice(t + phrase.length).join(' ');

// add the match to the groups array
groups.push( phrase.join(' ') );
groups.push( {
phrase: phrase.join(' '),
remainder: {
before: before ? before : undefined,
after: after ? after : undefined,
}
});

// advance the iterator to skip any other words in the phrase
t += phrase.length -1;
Expand Down
31 changes: 23 additions & 8 deletions server/demo/index.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
<!doctype html>
<html lang="en" ng-app="demo">
<head>

<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no" />
<script src="https://code.jquery.com/jquery-2.2.4.min.js" integrity="sha256-BbhdlvQf/xTY9gja0Dq3HiwQF8LaCRTXxZKRutelT44=" crossorigin="anonymous"></script>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" />
Expand Down Expand Up @@ -70,11 +69,10 @@
results = results || [];

// load token groups
tokenize( args, function( groups ){
groups = groups || [];

tokenize( args, function( response ){
console.log(response);
// render results
render( results, groups );
render( results, response.groups || [] );
});
});
}
Expand Down Expand Up @@ -124,20 +122,35 @@
return v;
})

const queryBox = [
`<ul style="margin: 0; padding: 5px; padding-top: 0; list-style: none; background-color: #efefef; margin-bottom: 5px;">`,
result.query ? `<li type="button" class="btn btn-disabled" style="margin-top: 5px; padding: 3px 5px;"><span style="font-size: xx-small"> ${result.query} </span></li>` : '',
`<li type="button" class="btn btn-default" style="margin-top: 5px; padding: 3px 5px;">
<span style="font-size: xx-small"> ${result.phrase} </span>
</li>
</ul>`].join('');

view = [
queryBox,
...view
]

// console.log( lins[i] );
$("#results").append('<li class="list-group-item"><span>' + view.join('<br />') + '</span></li>');
}

function render( results, groups ){
console.log(groups);

$('#results').empty();
$('#tokens').empty();
clearMap();

// display token groups
groups.forEach( function( win ){
var buttons = win.map( function( token ){
return '<li type="button" class="btn btn-default" style="margin-top: 5px;"><span>' + token + '</span></li>';
let buttons = [];
win.forEach( function( token ){
buttons.push('<li type="button" class="btn btn-default" style="margin-top: 5px;"><span>' + token.phrase + '</span></li>');
});
$("#tokens").html('<li><ul style="margin: 0; padding: 5px; padding-top: 0; list-style: none; background-color: #efefef; margin-bottom: 5px;">' + buttons.join('\n') + '</ul></li>' );
});
Expand Down Expand Up @@ -189,7 +202,7 @@

function tokenize( args, cb ){
console.info( 'tokenize', args );
request( '/parser/tokenize', args, cb );
request( '/parser/tokenize2', args, cb );
}

function clearMap(){
Expand Down Expand Up @@ -315,7 +328,9 @@
</span>
</div><!-- /input-group -->

<h3>Possible tokens</h3>
<ul id="tokens" class="btn-group" role="group" style="margin-top:10px; list-style: none;"></ul>
<h3>Search Results</h3>
<ul id="results" class="list-group" style="margin-top:10px;"></ul>
</div><!-- /.col-md-6 -->

Expand Down
1 change: 1 addition & 0 deletions server/http.js
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ app.get( '/parser/search', require( './routes/search' ) );
app.get( '/parser/findbyid', require( './routes/findbyid' ) );
app.get( '/parser/query', require( './routes/query' ) );
app.get( '/parser/tokenize', require( './routes/tokenize' ) );
app.get( '/parser/tokenize2', require( './routes/tokenize2' ) );

// demo page
app.use('/demo', express.static( __dirname + '/demo' ));
Expand Down
26 changes: 26 additions & 0 deletions server/routes/base_tokenize.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

const PARTIAL_TOKEN_SUFFIX = require('../../lib/analysis').PARTIAL_TOKEN_SUFFIX;

module.exports = function( req, cb ){

// placeholder
var ph = req.app.locals.ph;

// input text
var text = req.query.text || '';

// live mode (autocomplete-style search)
// we append a byte indicating the last word is potentially incomplete.
// except where the last token is a space, then we simply trim the space.
if( req.query.mode === 'live' ){
if( ' ' === text.slice(-1) ){
text = text.trim();
} else {
text += PARTIAL_TOKEN_SUFFIX;
}
}

ph.tokenize( text, ( err, groups ) => {
cb(err, groups);
});
};
11 changes: 8 additions & 3 deletions server/routes/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,15 @@ module.exports = function( req, res ){
// create a map of parents
const parents = rowsToIdMap( parentResults );

const firstUsedGroupIndex = result.mask.indexOf(true);

// map documents to dict using id as key
const docs = documents.map( function( result ){
return mapResult( ph, result, parents, lang );
});
const docs = documents.map( (doc) => ({
phrase: result.group.slice(firstUsedGroupIndex).map(g => g.phrase).join(' '),
query: result.group[firstUsedGroupIndex].remainder.before,
...mapResult( ph, doc, parents, lang ),
})
);

// sort documents according to sorting rules
docs.sort( sortingAlgorithm );
Expand Down
28 changes: 6 additions & 22 deletions server/routes/tokenize.js
Original file line number Diff line number Diff line change
@@ -1,26 +1,10 @@

const PARTIAL_TOKEN_SUFFIX = require('../../lib/analysis').PARTIAL_TOKEN_SUFFIX;
const base_tokenize = require('./base_tokenize');

module.exports = function( req, res ){

// placeholder
var ph = req.app.locals.ph;

// input text
var text = req.query.text || '';

// live mode (autocomplete-style search)
// we append a byte indicating the last word is potentially incomplete.
// except where the last token is a space, then we simply trim the space.
if( req.query.mode === 'live' ){
if( ' ' === text.slice(-1) ){
text = text.trim();
} else {
text += PARTIAL_TOKEN_SUFFIX;
}
}

ph.tokenize( text, ( err, groups ) => {
res.status(200).json( groups );
base_tokenize(req, (err, groups) => {
// for the legacy endpoint, send back a bare string[][]
// an array of groups of phrases:
// - ex: "pizza new york ny" -> [["new york", "ny"]]
res.status(200).json( groups.map((group) => group.map(g => g.phrase)) );
});
};
13 changes: 13 additions & 0 deletions server/routes/tokenize2.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
const base_tokenize = require('./base_tokenize');

module.exports = function( req, res ){
base_tokenize(req, (err, groups) => {
// for the tokenize2 endpoint, send back a json dict
// {phrase: string, remainder: {before: string, after: string}}[][]
// with an array of groups of phrase objects

res.status(200).json({
groups
});
});
};
36 changes: 30 additions & 6 deletions test/lib/Result.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,18 @@ module.exports.getSubject = function(test, common) {
const res = new Result();
t.equal(res.getSubject(), undefined);

const res2 = new Result(['a','b','c']);
const res2 = new Result([
{ phrase: 'a' },
{ phrase: 'b' },
{ phrase: 'c' }
]);
t.equal(res2.getSubject(), 'b');

const res3 = new Result(['a','b','c']);
const res3 = new Result([
{ phrase: 'a' },
{ phrase: 'b' },
{ phrase: 'c' }
]);
res3.pos.subject = 0;
t.equal(res3.getSubject(), 'a');

Expand All @@ -69,10 +77,18 @@ module.exports.getObject = function(test, common) {
const res = new Result();
t.equal(res.getObject(), undefined);

const res2 = new Result(['a','b','c']);
const res2 = new Result([
{ phrase: 'a' },
{ phrase: 'b' },
{ phrase: 'c' }
]);
t.equal(res2.getObject(), 'c');

const res3 = new Result(['a','b','c']);
const res3 = new Result([
{ phrase: 'a' },
{ phrase: 'b' },
{ phrase: 'c' }
]);
res3.pos.object = 1;
t.equal(res3.getObject(), 'b');

Expand All @@ -85,10 +101,18 @@ module.exports.getPreviousObject = function(test, common) {
const res = new Result();
t.equal(res.getPreviousObject(), undefined);

const res2 = new Result(['a','b','c']);
const res2 = new Result([
{ phrase: 'a' },
{ phrase: 'b' },
{ phrase: 'c' }
]);
t.equal(res2.getPreviousObject(), undefined);

const res3 = new Result(['a','b','c']);
const res3 = new Result([
{ phrase: 'a' },
{ phrase: 'b' },
{ phrase: 'c' }
]);
res3.pos.prev_object = 1;
t.equal(res3.getPreviousObject(), 'b');

Expand Down
12 changes: 10 additions & 2 deletions test/prototype/query.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ module.exports._queryGroup = function(test, common) {
});
test('_queryGroup - multiple tokens - no matches', function(t) {

const group = ['hello world', 'test', 'foo bar'];
const group = [
{phrase: 'hello world'},
{phrase: 'test'},
{phrase: 'foo bar'}
];
t.plan(10);

const index = {
Expand Down Expand Up @@ -110,7 +114,11 @@ module.exports._queryGroup = function(test, common) {
});
test('_queryGroup - multiple tokens - matches', function(t) {

const group = ['hello world', 'test', 'foo bar'];
const group = [
{phrase: 'hello world'},
{phrase: 'test'},
{phrase: 'foo bar'}
];
t.plan(7);

const index = {
Expand Down
12 changes: 10 additions & 2 deletions test/prototype/tokenize.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ module.exports._eachSynonym = function(test, common) {
test('_eachSynonym', function(t) {

const synonym = ['hello', 'big', 'bright', 'new', 'world'];
const expected = [ 'hello big', 'bright', 'new world' ];
const expected = [
{ phrase: 'hello big', remainder: { before: undefined, after: 'bright new world' } },
{ phrase: 'bright', remainder: { before: 'hello big', after: 'new world' } },
{ phrase: 'new world', remainder: { before: 'hello big bright', after: undefined, } }
];

var mock = tokenize._eachSynonym.bind({
index: { hasSubject: ( phrase, cb ) => {
Expand Down Expand Up @@ -172,7 +176,11 @@ module.exports._groups = function(test, common) {
'south wales','new south wales', 'wales', 'north', 'sydney',
'north sydney', 'south', 'au'
];
const expected = ['north sydney', 'new south wales', 'au'];
const expected = [
{ phrase: 'north sydney', remainder: { before: undefined, after: 'new south wales au' } },
{ phrase: 'new south wales', remainder: { before: 'north sydney', after: 'au' } },
{ phrase: 'au', remainder: { before: 'north sydney new south wales', after: undefined } }
];

t.deepEqual(tokenize._groups(tokens, phrases), expected);
t.end();
Expand Down