-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSocketNER.js
109 lines (100 loc) · 3.57 KB
/
SocketNER.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
var spawn = require("child_process").spawn
var deasync = require("deasync")
function socketNER(port, classifierFileName, pathToNER) {
this.port = port || 1234
this.classifier = classifierFileName || "english.all.3class.distsim.crf.ser.gz"
this.pathToNER = pathToNER || "/"
this.server = undefined
this.client = undefined
}
socketNER.prototype.startServer = deasync(function (cb) {
var self = this
self.server = spawn(
"java",[
"-mx750m", "-cp",
self.pathToNER + "stanford-ner.jar",
"edu.stanford.nlp.ie.NERServer",
"-loadClassifier", self.pathToNER + self.classifier,
"-port", self.port, "-outputFormat", "inlineXML"
]
)
// I don't know why server's stderr stream gets
// all output and why stdout don't.
self.server.stderr.on("data", reader)
// Server would finish loading, when it flushes
// out 'done [x secs]'
function reader(data) {
if (data.toString().search("done") > -1) {
// Removing listener
self.server.stderr.removeListener("data", reader)
cb(null, true)
}
}
})
socketNER.prototype.startClient = deasync(function (cb) {
var self = this
self.client = spawn(
"java",[
"-cp",
self.pathToNER + "stanford-ner.jar",
"edu.stanford.nlp.ie.NERServer",
"-port", self.port, "-client"
]
)
self.client.stdout.once("data", function (data) {
if (data.toString().trim().match(/^Input some text/g)) {
cb(null, true)
}
})
})
socketNER.prototype.init = function () {
var self = this
self.startServer()
self.startClient()
}
socketNER.prototype.close = function () {
var self = this
self.server.kill()
self.client.kill()
}
socketNER.prototype.getEntities = function (rawText, reqEntity) {
var self = this
rawText = rawText.replace(/[\r\n\f\t\v]/g, " ") + "\n"
return self.tagIt(rawText, reqEntity)
}
socketNER.prototype.tagIt = deasync(function (rawText, reqEntity, cb) {
var self = this
// Emptying the readable stream to make it writable
self.client.stdout.read()
// Writing to writable stream to push rawText to NER server
self.client.stdin.write(rawText)
// Processing data when NER server sends back data to stream
// making stream readable again. "data" event would emptify the
// readable stream to make it writable again.
self.client.stdout.once("data", function (data) {
// Trim() is necessary to avoid leading and follwing line breaks.
var taggedText = data.toString().trim()
// Synchronize module follows (err, data) format for cb.
cb(null, self.parser(taggedText, reqEntity))
})
})
socketNER.prototype.parser = function (taggedText, reqEntity) {
var matches, entities = {} // return value of parser function
reqEntity = reqEntity ? reqEntity.toUpperCase() : ""
var re = reqEntity ? new RegExp(["<(",reqEntity,"?)>(.*?)<\/",reqEntity,"?>"].join(""), "g")
: /<([A-Z]+?)>(.*?)<\/[A-Z]+?>/g
while((matches = re.exec(taggedText)) !== null) {
if (entities[matches[1]]) {
// if tagName is present, then pushing in the tagValue Array
entities[matches[1]].push(matches[2])
}
else {
// otherwise adding the tagName with a new tagValue Array
entities[matches[1]] = [matches[2]]
}
}
return entities
}
module.exports = function (port, classifierFileName, pathToNER) {
return new socketNER(port, classifierFileName, pathToNER)
}