-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
109 lines (82 loc) · 2.56 KB
/
app.py
File metadata and controls
109 lines (82 loc) · 2.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals
import logging
import ast
from flask import Flask, request
from flask_cors import CORS
app = Flask(__name__, instance_relative_config=True)
# from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer as Summarizer
# from sumy.summarizers.luhn import LuhnSummarizer as Summarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
from bs4 import BeautifulSoup
import nltk
nltk.download('punkt')
LANGUAGE = "english"
def get_paragraphs(html_doc):
soup = BeautifulSoup(html_doc, 'html.parser')
paras = []
counter = 0
for para in soup.find_all("p"):
text_data = para.text
for txt in text_data.split("\n"):
if txt.strip() != "":
counter += 1
paras.append(" ".join(txt.strip().split()))
return paras, counter
def extract_request_params (request):
if not request.is_json:
logging.error("Cannot parse request parameters")
# request is invalid
return {}
# Extract JSON data
data_ = request.get_json()
return data_
@app.route("/process", methods=['POST'])
def process ():
"""
Process html content
"""
# get parameters
html = None
if extract_request_params(request).get("html"):
html = extract_request_params(request)["html"]
if not html:
# Build error response
return {
"success": False,
"message": "Invalid parameters"
}, 400
# process logic
text_in, nlines = get_paragraphs(html)
sentances_ret = []
parser = PlaintextParser.from_string("\n".join(text_in), Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
SENTENCES_COUNT = int(nlines * 0.2)
if SENTENCES_COUNT > 100:
SENTENCES_COUNT = 100
if SENTENCES_COUNT < 1:
SENTENCES_COUNT = nlines
for sentence in summarizer(parser.document, SENTENCES_COUNT):
sentances_ret.append(sentence._text)
# Build response
return {
"success": True,
"result": sentances_ret
}, 200
# Server starter
def flaskserver ():
"""
start server
"""
app.run(host='0.0.0.0', port=5008, debug=False)
# Enable CORS
CORS(app)
if __name__ == "__main__":
flaskserver()