Skip to content

Commit

Permalink
Improvements to Author names parsing.
Browse files Browse the repository at this point in the history
  • Loading branch information
Biserkov committed May 19, 2014
1 parent d6652de commit 461c842
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 46 deletions.
62 changes: 33 additions & 29 deletions dbs/tools.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,14 @@ module.exports = {
}
else {
shame.call();
if (err === null) {
p(('Warning: Request ' + res.req._headers.host + res.req.path), 1);
p(' failed with error: ' + ((err === null) ? res.statusCode + ' ' + require('http').STATUS_CODES[res.statusCode] : err), 1);
p((' failed with error: ' + res.statusCode + ' ' + require('http').STATUS_CODES[res.statusCode]), 1);
}
else{
p((err), 1);
}

}
},

Expand Down Expand Up @@ -49,39 +55,28 @@ module.exports = {
//returns an array
text = text.trim();
var firstlast = [
/([A-Z]\.[A-Z]\.[A-Z]\.)\s([A-Za-z]+)/, //A.M.H. Brunsting
/([A-Za-z]+\s[A-Z]\.\s[A-Z]\.)\s([A-Za-z]+)/, //Artur R. M. Serrano
/([A-Z]\.\s[A-Z]\.)\s([A-Za-z]+)/, //J. P. Zaballos
/([A-Z]\.\-[A-Z]\.)\s([A-Za-z]+)/, //S.-A. Bengtson
/([A-Za-z]+ [A-Z]\.) ([A-Za-z]+)/, //Carl H. Lindroth
/([A-Za-z]+\s[A-Za-z]+)\s([A-Za-z]+)/, //Carmen Chavez Ortiz
/([A-Za-z]{2,})\s([A-Za-z]{2,})/, //Lyubomir Penev

/([A-Z])\s([A-Za-z]+)/, //M Baehr
/([A-Z])\. ([A-Za-z]+)/, //L. Penev
/([A-Za-z])\.\,\s([A-Za-z]+)/, //L., Penev

/([A-Z]+)\s([A-Z]+)/, //BERNHARD KROMP
/^([A-Z]\.[A-Z]\.[A-Z]\.)\s+([\S]+)$/, //A.M.H. Brunsting
/^(\S+\s+[A-Z]\.\s+[A-Z]\.)\s+([\S]+)$/, //Artur R. M. Serrano
/^([A-Z]\.\s+[A-Z]\.)\s+([\S]+)$/, //J. P. Zaballos
/^([A-Z]\.\-[A-Z]\.)\s+([\S]+)$/, //S.-A. Bengtson
/^([\S]+\s+[A-Z]\.)\s+([\S]+)$/, //Carl H. Lindroth
/^(\S+\s+\S+)\s+(\S+)$/, //Carmen Chavez Ortiz
/^([\S[^.,]]{2,})\s+([\S]{2,})$/, //Lyubomir Penev
/^([\S^.]\.)\,\s+([\S]+)$/, //L., Penev
/^([\S^.]\.)\s+([\S]+)$/, //L. Penev
/^(\S+)\s+([\S^\.]+)$/, //M Baehr
];

var lastfirst = [/([A-Za-z]+)\s([A-Z])\./, //Penev L.
/([A-Za-z]+)\,\s([A-Z])\./, //Penev, L.
/([A-Za-z]+)\,\s([A-Za-z]+)/, //Penev, Lyubomir
/([A-Za-z]+)\,\s([A-Z])/, //Penev, L
/([A-Za-z]+)\s\,,\s([A-Z])/, //Penev ,, L

var lastfirst = [

/^([\S]+)\,\s+([A-Z]\.)$/, //Penev, L.
/^([\S]+)\,\s+([A-Z]\.\s*[A-Z]\.)$/, //Vázquez, D. P.
/^([^\s.,]+)\,\s*(.+)$/, //PenevB, Lyubomir
/^([^\s,.]+)\,\s+([A-Z])$/, //Peneve, L
/^([^\s.,]+)\s+\,,\s+([A-Z])$/, //Penevf ,, L
/^([^\s]+)\s+([A-Z]\.)$/, //Penevc L.
];
var i, n, lst, l;
for (i = 0, n = firstlast.length; i < n; i+=1) {
if (firstlast[i].test(text)){
//p(firstlast[i])
//p(text)
lst = firstlast[i].exec(text);
l = lst.length;
return lst.slice(1, l);
}
}
for (i = 0, n = lastfirst.length; i < n; i+=1) {
if (lastfirst[i].test(text)){
//p(lastfirst[i])
Expand All @@ -93,6 +88,15 @@ module.exports = {
return first.concat(last);
}
}
for (i = 0, n = firstlast.length; i < n; i+=1) {
if (firstlast[i].test(text)){
//p(firstlast[i])
//p(text)
lst = firstlast[i].exec(text);
l = lst.length;
return lst.slice(1, l);
}
}
return [text];
},

Expand Down
36 changes: 20 additions & 16 deletions test/test_tools.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,26 @@ var assert = require("assert");
describe('Tools', function() {
describe('parseAuthorString', function() {
var examples = [
['M Baehr', ['M', 'Baehr']],
['L. Peneva', ['L', 'Peneva']],
['L., Penevb', ['L', 'Penevb']],
['Penevc L.', ['L', 'Penevc']],
['Penevd, L.', ['L', 'Penevd']],
['Peneve, L', ['L', 'Peneve']],
['Penevf ,, L', ['L', 'Penevf']],
['Lyubomir PenevA', ['Lyubomir', 'PenevA']],
['PenevB, Lyubomir', ['Lyubomir', 'PenevB']],
['BERNHARD KROMP', ['BERNHARD', 'KROMP']],
['J. P. Zaballos', ['J. P.', 'Zaballos']],
['S.-A. Bengtson', ['S.-A.', 'Bengtson']],
['Carl H. Lindroth', ['Carl H.', 'Lindroth']],
['Carmen Chavez Ortiz', ['Carmen Chavez', 'Ortiz']],
['A.M.H. Brunsting', ['A.M.H.', 'Brunsting']],
['Artur R. M. Serrano', ['Artur R. M.', 'Serrano']],
['L., Penevb', ['L.', 'Penevb']],
['M Baehr', ['M', 'Baehr']],
['L. Peneva', ['L.', 'Peneva']],
['Lyubomir PenevA', ['Lyubomir', 'PenevA']],
['BERNHARD KROMP', ['BERNHARD', 'KROMP']],
['J. P. Zaballos', ['J. P.', 'Zaballos']],
['S.-A. Bengtson', ['S.-A.', 'Bengtson']],
['Carl H. Lindroth', ['Carl H.', 'Lindroth']],
['Carmen Chavez Ortiz', ['Carmen Chavez', 'Ortiz']],
['A.M.H. Brunsting', ['A.M.H.', 'Brunsting']],
['Artur R. M. Serrano', ['Artur R. M.', 'Serrano']],

['Vázquez L.', ['L.', 'Vázquez']],
['Smith-Ramírez, L.', ['L.', 'Smith-Ramírez']],
['Peneve, L', ['L', 'Peneve']],
['Penevf ,, L', ['L', 'Penevf']],
['PenevB, Lyubomir', ['Lyubomir', 'PenevB']],
['Öckinger,Erik', ['Erik', 'Öckinger']],
['Vázquez, D. P.', ['D. P.', 'Vázquez']],
['Andersson, Georg K. S.', ['Georg K. S.', 'Andersson']],
];

examples.forEach(function(e){
Expand Down
2 changes: 1 addition & 1 deletion tester.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module.exports = function (app) {
'gnub' : [],
'websrv': [test_websrv]
};
config.modules2test.filter(function (module) { return config.modules.indexOf(module) > -1; }).forEach(function (module) {
config.modules2test.filter(function (module) { return config.modules.indexOf(module) > -1}).forEach(function (module) {
tests[module].forEach(function (test) {
test.call();
});
Expand Down

0 comments on commit 461c842

Please sign in to comment.