Skip to content

Commit

Permalink
PR to remove API breaking TODO by implementing OCR plus small None ch…
Browse files Browse the repository at this point in the history
…eck (#21)

* changes to get API working

* changes to OCR works now

* tgypo

* additional changes, NOne error removed

* req changes
  • Loading branch information
LordBurtz authored Sep 18, 2021
1 parent 40c55bb commit b8063e5
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 4 deletions.
37 changes: 33 additions & 4 deletions dsbapi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@
import uuid
import base64

try:
from PIL import Image
except:
import Image
import pytesseract
import requests

class DSBApi:
def __init__(self, username, password, tablemapper=['type','class','lesson','subject','room','new_subject','new_teacher','teacher']):
"""
Expand Down Expand Up @@ -41,7 +48,7 @@ def __init__(self, username, password, tablemapper=['type','class','lesson','sub
i += 1


def fetch_entries(self):
def fetch_entries(self, images=True):
"""
Fetch all the DSBMobile entries
@return: list, containing lists of DSBMobile entries from the tables or only the entries if just one table was received (default: empty list)
Expand Down Expand Up @@ -96,12 +103,22 @@ def fetch_entries(self):
for entry in final:
if entry.endswith(".htm") and not entry.endswith(".html") and not entry.endswith("news.htm"):
output.append(self.fetch_timetable(entry))
elif entry.endswith(".jpg"):
elif entry.endswith(".jpg") and images == True:
output.append(self.fetch_img(entry))

final = []
for entry in output:
if entry is not None:
final.append(entry)

output = final

if len(output) == 1:
return output[0]
else:
return output


def fetch_img(self, imgurl):
"""
Extract data from the image
Expand All @@ -110,8 +127,20 @@ def fetch_img(self, imgurl):
@todo: Future use - implement OCR
@raise Exception: If the function will be crawled, because the funbtion is not implemented yet
"""
raise Exception('Extraction of data from images is not implemented yet!')
return(list(dict()))

try:
img = Image.open(io.BytesIO(requests.get(imgurl)))
except:
return #haha this is quality coding surplus

string = ""

try:
return pytesseract.image_to_string(img)
except TesseractError:
raise Exception("You have to make the tesseract command accessible and work!")
return None

def fetch_timetable(self, timetableurl):
"""
parse the timetableurl HTML page and return the parsed entries
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
requests
beautifulsoup4
datetime
pytesseract
requests
PIL

0 comments on commit b8063e5

Please sign in to comment.