-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathshort_questions.py
99 lines (67 loc) · 3.64 KB
/
short_questions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import sys
import nltk
from textblob import TextBlob
from textblob import Word
def create_short_questions(line):
"""
outputs question from the given text
"""
if type(line) is str:
line = TextBlob(line)
bucket = {}
for i,j in enumerate(line.tags):
if j[1] not in bucket:
bucket[j[1]] = i
question = ''
# These are the english part-of-speach tags used in this demo program.
#.....................................................................
# NNS Noun, plural
# JJ Adjective
# NNP Proper noun, singular
# VBG Verb, gerund or present participle
# VBN Verb, past participle
# VBZ Verb, 3rd person singular present
# VBD Verb, past tense
# IN Preposition or subordinating conjunction
# PRP Personal pronoun
# NN Noun, singular or mass
#.....................................................................
# Create a list of tag-combination
l1 = ['NNP', 'VBG', 'VBZ', 'IN']
l2 = ['NNP', 'VBG', 'VBZ']
l3 = ['PRP', 'VBG', 'VBZ', 'IN']
l4 = ['PRP', 'VBG', 'VBZ']
l5 = ['PRP', 'VBG', 'VBD']
l6 = ['NNP', 'VBG', 'VBD']
l7 = ['NN', 'VBG', 'VBZ']
l8 = ['NNP', 'VBZ', 'JJ']
l9 = ['NNP', 'VBZ', 'NN']
l10 = ['NNP', 'VBZ']
l11 = ['PRP', 'VBZ']
l12 = ['NNP', 'NN', 'IN']
l13 = ['NN', 'VBZ']
if all(key in bucket for key in l1): #'NNP', 'VBG', 'VBZ', 'IN' in sentence.
question = 'What' + ' ' + line.words[bucket['VBZ']] +' '+ line.words[bucket['NNP']]+ ' '+ line.words[bucket['VBG']] + '?'
elif all(key in bucket for key in l2): #'NNP', 'VBG', 'VBZ' in sentence.
question = 'What' + ' ' + line.words[bucket['VBZ']] +' '+ line.words[bucket['NNP']] +' '+ line.words[bucket['VBG']] + '?'
elif all(key in bucket for key in l3): #'PRP', 'VBG', 'VBZ', 'IN' in sentence.
question = 'What' + ' ' + line.words[bucket['VBZ']] +' '+ line.words[bucket['PRP']]+ ' '+ line.words[bucket['VBG']] + '?'
elif all(key in bucket for key in l4): #'PRP', 'VBG', 'VBZ' in sentence.
question = 'What ' + line.words[bucket['PRP']] +' '+ ' does ' + line.words[bucket['VBG']]+ ' '+ line.words[bucket['VBG']] + '?'
elif all(key in bucket for key in l7): #'NN', 'VBG', 'VBZ' in sentence.
question = 'What' + ' ' + line.words[bucket['VBZ']] +' '+ line.words[bucket['NN']] +' '+ line.words[bucket['VBG']] + '?'
elif all(key in bucket for key in l8): #'NNP', 'VBZ', 'JJ' in sentence.
question = 'What' + ' ' + line.words[bucket['VBZ']] + ' ' + line.words[bucket['NNP']] + '?'
elif all(key in bucket for key in l9): #'NNP', 'VBZ', 'NN' in sentence
question = 'What' + ' ' + line.words[bucket['VBZ']] + ' ' + line.words[bucket['NNP']] + '?'
elif all(key in bucket for key in l11): #'PRP', 'VBZ' in sentence.
if line.words[bucket['PRP']] in ['she','he']:
question = 'What' + ' does ' + line.words[bucket['PRP']].lower() + ' ' + line.words[bucket['VBZ']].singularize() + '?'
elif all(key in bucket for key in l10): #'NNP', 'VBZ' in sentence.
question = 'What' + ' does ' + line.words[bucket['NNP']] + ' ' + line.words[bucket['VBZ']].singularize() + '?'
elif all(key in bucket for key in l13): #'NN', 'VBZ' in sentence.
question = 'What' + ' ' + line.words[bucket['VBZ']] + ' ' + line.words[bucket['NN']] + '?'
# When the tags are generated 's is split to ' and s. To overcome this issue.
if 'VBZ' in bucket and line.words[bucket['VBZ']] == "’":
question = question.replace(" ’ ","'s ")
return question