Skip to content

Commit

Permalink
Merge branch 'master' of github.com:proycon/folia
Browse files Browse the repository at this point in the history
  • Loading branch information
proycon committed Nov 28, 2017
2 parents cbe917d + 4e11435 commit 1c74bb2
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ A number of command-line tools are readily available for working with FoLiA, to

- ``foliavalidator`` -- Tests if documents are valid FoLiA XML. **Always use this to test your documents if you produce your own FoLiA documents!**
- ``foliaquery`` -- Advanced query tool that searches FoLiA documents for a specified pattern, or modifies a document according to the query. Supports FQL (FoLiA Query Language) and CQL (Corpus Query Language).
- ``foliaeval`` -- Evaluation tool, can compute various evaluation metrics for selected annotation types, either against
a gold standard reference or as a measure of inter-annotated agreement.
- ``folia2txt`` -- Convert FoLiA XML to plain text (pure text, without any annotations)
- ``folia2annotatedtxt`` -- Like above, but produces output simple
token annotations inline, by appending them directly to the word using a specific delimiter.
Expand Down
28 changes: 25 additions & 3 deletions foliatools/foliaeval.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def get_corrections(doc, Class, foliaset):



def evaluate(docs, Class, foliaset, reference, do_corrections=False, verbose=False):
def evaluate(docs, Class, foliaset, reference, do_corrections=False, do_confusionmatrix=False, verbose=False):
assert all((isinstance(doc, folia.Document) for doc in docs))
nr = len(docs)
index = []
Expand Down Expand Up @@ -124,6 +124,8 @@ def evaluate(docs, Class, foliaset, reference, do_corrections=False, verbose=Fal
'targets': {'truepos':0, 'falsepos': 0, 'falseneg':0, 'description': "A measure of detection, expresses whether the right targets (often words or spans of words) have been annotated, regardless of whether the annotation class/text/value is correct"},
valuelabel: {'truepos': 0, 'falsepos': 0, 'falseneg':0, 'description': "A measure of classification with regard to the text, expresses whether the text matches, i.e. the annotation is correct" if valuelabel == 'text' else "A measure of classification with regard to the annotation class, expresses whether the class matches, i.e. the annotation is correct"},
}
if do_confusionmatrix:
evaluation['confusionmatrix'] = {}

if do_corrections:
evaluation.update({
Expand All @@ -141,6 +143,7 @@ def evaluate(docs, Class, foliaset, reference, do_corrections=False, verbose=Fal

evaluator.evaluate(docs, linkchain, Class, reference, do_corrections)


targets_label = " & ".join([ target.id for target in targets])

if evaluator.target_misses:
Expand Down Expand Up @@ -182,6 +185,12 @@ def evaluate(docs, Class, foliaset, reference, do_corrections=False, verbose=Fal
evaluation['correction']['truepos'] += len(evaluator.correction_matches)
evaluation['correction']['falsepos'] += len(evaluator.correction_misses)

if do_confusionmatrix:
for refkey, counter in evaluator.confusionmatrix.items():
if refkey not in evaluation['confusionmatrix']:
evaluation['confusionmatrix'][refkey] = {}
evaluation['confusionmatrix'][refkey].update(counter)

try:
evaluation[valuelabel]['precision'] = evaluation[valuelabel]['truepos'] / (evaluation[valuelabel]['truepos'] + evaluation[valuelabel]['falsepos'])
except ZeroDivisionError:
Expand Down Expand Up @@ -235,6 +244,9 @@ def evaluate(docs, Class, foliaset, reference, do_corrections=False, verbose=Fal
except ZeroDivisionError:
evaluation['correction']['f1score'] = 0




return evaluation


Expand All @@ -261,6 +273,8 @@ def __init__(self):
self.correction_matches = []
self.correction_misses = []

self.confusionmatrix = {}

def evaluate(self, docs, linkchain, Class, reference, do_corrections):
assert all((isinstance(doc, folia.Document) for doc in docs))

Expand All @@ -272,12 +286,19 @@ def evaluate(self, docs, linkchain, Class, reference, do_corrections):

correctionclasses = defaultdict(set) #with corrections
corrections = defaultdict(set) #full corrections; values and correctionclasses
refvalue = None
for docnr, annotation, correction in iter_linkchain(linkchain, do_corrections):
value = get_value(annotation, Class) #gets class or text depending on annotation type
values[value].add(docnr)
if do_corrections and correction:
correctionclasses[correction.cls].add(docnr)
corrections[(correction.cls, value)].add(docnr)
if docnr == 0 and reference and refvalue is None:
refvalue = value
self.confusionmatrix[refvalue] = defaultdict(int)
elif docnr > 0 and reference and refvalue is not None:
self.confusionmatrix[refvalue][value] += 1



alldocset = set(range(0,len(docs)))
Expand Down Expand Up @@ -323,6 +344,7 @@ def main():
parser.add_argument('-s','--set', type=str,help="Set definition (required if there is ambiguity in the document)", action='store',required=False)
parser.add_argument('-c','--corrections', help="Use corrections", action='store_true',default="",required=False)
parser.add_argument('-q','--quiet',dest='verbose', help="Be quiet, do not output verbose information matches/mismatches", action='store_false',default=True,required=False)
parser.add_argument('-M','--confusionmatrix', help="Output and output a confusion matrix", action='store_true',default="",required=False)
parser.add_argument('--ref', help="Take first document to be the reference document, i.e. gold standard. If *not* specified all docuemnts are consider equal and metrics yield inter-annotator agreement", action='store_true')
#parser.add_argument('-i','--number',dest="num", type=int,help="", action='store',default="",required=False)
parser.add_argument('documents', nargs='+', help='FoLiA Documents')
Expand Down Expand Up @@ -355,11 +377,11 @@ def main():
for i, doc in enumerate(docs[1:]):
if i > 0: print(",")
evaldocs = [docs[0], doc]
evaluation = evaluate(evaldocs, Type, foliaset, True, args.corrections, args.verbose)
evaluation = evaluate(evaldocs, Type, foliaset, True, args.corrections, args.confusionmatrix, args.verbose)
print("\"" + doc.filename + "\": " + json.dumps(evaluation, indent=4))
print("}")
else:
evaluation = evaluate(docs, Type, foliaset, False, args.corrections, args.verbose)
evaluation = evaluate(docs, Type, foliaset, False, args.corrections, args.confusionmatrix, args.verbose)
print(json.dumps(evaluation, indent=4))

if __name__ == "__main__":
Expand Down

0 comments on commit 1c74bb2

Please sign in to comment.