Skip to content

Commit

Permalink
Release 0.2 Fixes Train Parsing Problems
Browse files Browse the repository at this point in the history
  • Loading branch information
ash2shukla committed Jul 29, 2017
1 parent 1508688 commit 5ce1967
Show file tree
Hide file tree
Showing 15 changed files with 405 additions and 68 deletions.
12 changes: 12 additions & 0 deletions RailIN.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Metadata-Version: 1.1
Name: RailIN
Version: 0.2
Summary: Unofficial API for Indian Railways.
Home-page: https://github.com/ash2shukla/RailIN
Author: Ashish Shukla
Author-email: ash2shukla@gmail.com
License: MIT
Download-URL: https://github.com/ash2shukla/RailIN/archive/0.1.tar.gz
Description: UNKNOWN
Keywords: Railways,API,IR,unofficial
Platform: UNKNOWN
11 changes: 11 additions & 0 deletions RailIN.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
setup.cfg
setup.py
RailIN/CaptchaBreak.py
RailIN/RailIN.py
RailIN/__init__.py
RailIN/prettify.py
RailIN.egg-info/PKG-INFO
RailIN.egg-info/SOURCES.txt
RailIN.egg-info/dependency_links.txt
RailIN.egg-info/requires.txt
RailIN.egg-info/top_level.txt
1 change: 1 addition & 0 deletions RailIN.egg-info/dependency_links.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

5 changes: 5 additions & 0 deletions RailIN.egg-info/requires.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bs4
requests==2.18.1
user_agent==0.1.8
pillow==4.2.1
pytesseract==0.1.7
1 change: 1 addition & 0 deletions RailIN.egg-info/top_level.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
RailIN
6 changes: 3 additions & 3 deletions RailIN/CaptchaBreak.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ def _add_background(self,image):
return image

def decode(self,raw_image):
file = open('x.png','wb')
file = open('/tmp/x.png','wb')
file.write(raw_image)
file.close()
data = Image.open('x.png')
data = Image.open('/tmp/x.png')
# adding background after scaling will create blurry image
data = self._add_background(data)
# resize the image to 300 x 128 for better recognition by tesseract
data = data.resize((300,128),Image.ANTIALIAS)
remove('x.png')
remove('/tmp/x.png')
# convert all pixel's alpha to 255
string = its(data)
try:
Expand Down
4 changes: 2 additions & 2 deletions RailIN/RailIN.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def getFare(self,TN,F,T):
URL_Fare = "https://erail.in/data.aspx?Action=GetTrainFare&train="+str(TN)+"&from="+F+"&to="+T
return Prettify().FareToJson(get(URL_Fare).text)

def getStatus(self,TN,DD,MMM,YYYY,CD):
def getStatus(self,TN,DD,MMM,YYYY,STN):
D = '-'.join([str(DD), MMM, str(YYYY)])
URL_Live = "https://data.erail.in/getIR.aspx?&jsonp=true&Data=RUNSTATUS~0_"+str(TN)+"_"+D+"_"+CD
URL_Live = "https://data.erail.in/getIR.aspx?&jsonp=true&Data=RUNSTATUS~0_"+str(TN)+"_"+D+"_"+STN
return loads(get(URL_Live).text.strip('()'))
110 changes: 54 additions & 56 deletions RailIN/prettify.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def TrainsToJson(self,string):

def MakeSenseTrain(self,string):
data = string.split('~~~~~~~~')

data1 = data[0].split('~')
retval = {}

Expand All @@ -32,10 +31,16 @@ def MakeSenseTrain(self,string):
dic1['travel_time'] = data1[12]
dic1['running_days'] = data1[13]

retval['train_base']=dic1
retval['train_base'] = dic1

# dealing with the second part , third part isn't interpreted yet
data2 = data[1].split('~~')
# produces atleast 6 parts at max 8


data2 = data[1].split('~~~')
data2_1 = data2[0].split('~')
# 0th - Extract coach types etc

retval['coach_types']={}
retval['coach_types']['1A'] = data2_1[0][0]
retval['coach_types']['2A'] = data2_1[0][1]
Expand All @@ -47,67 +52,60 @@ def MakeSenseTrain(self,string):

retval['train_base']['source_depart'] = data2_1[5]
retval['train_base']['dstn_reach'] = data2_1[6]
retval['train_base']['type'] = data2_1[11]
retval['train_base']['train_id']= data2_1[12]
retval['train_base']['distance_from_to'] = data2_1[18]
retval['train_base']['average_speed'] = data2_1[19]

retval['train_base'] = dic1

data2_arr = data2[1].split('~~')

info_count = len(data2_arr)

base_arr = data2_arr[0].split('~')
# 1st - Extract notification

retval['train_base']['type'] = base_arr[0]
retval['train_base']['train_id'] = base_arr[1]
retval['train_base']['distance_from_to'] = base_arr[7]
retval['train_base']['average_speed'] = base_arr[8]
if len(base_arr)==16:
retval['train_base']['notif_coach'] = base_arr[12]
else:
pass
try:
retval['train_base']['notif_special'] = data2_arr[1].split('~')[3]
except:
pass
try:
# if this field has more words than a train type i.e. 3-4 then it is a notif
if len(data_arr[2].split())>5:
retval['train_base']['notif_special2'] = data2_arr[2]
else:
pass
except:
pass
try:
retval['train_base']['owned_by'] = data2_arr[3].split('~')[1]
except:
pass
try:
retval['train_base']['owned_by'] = data2_arr[3].split('~')[1]
except:
pass
try:
# if this part is having a alpha only string then it is owned by or region where train comes from
val = data2_arr[4].split('~')[1]
if all(i.isalpha() for i in val):
retval['train_base']['owned_by'] = val
else:
retval['train_base']['rake_share'] = val
except:
pass
try:
val = data2_arr[5].split('~')[1]
if ':' in val:
retval['coach_arrangement'] = []
for i in val.split(':'):
index = str(val.index(i))
i = i.split(',')
retval['coach_arrangement'].append({index : {'tag':i[0],'coach_id':i[1],'type':i[2]}})
else:
pass
notif = data2[1].split('~')[3]
retval['train_base']['notif_coach'] = notif
except:
pass

# 2nd - Extract train type or notification
for i in range(2,7):
try:
if data2[i] is not None:
key,value = self._extract(data2[i])
retval[key]=value
except:
pass
return retval

def _extract(self,string):
val = string.strip('~')
val = val.split('~')
length = len(val)
if length == 1:
if val[0] == "" :
return 'No Data'
else:
words = len(val[0].split())
if words >3:
return "notif",val[0]
if all((i.isalpha()) or (i=='&') or (i==' ') for i in val[0]) and (words <4) and (val[0]!='BG'):
return "train_type",val[0]
if length >= 2:
if all((i.isalpha()) or (i=='&') or (i==' ') for i in val[1]) and (len(val[1].split()) <4) and (val[1]!='BG') and len(val[1])>4:
# Check camel case
if all(i[0].isupper() and all(j.islower() for j in i[1:]) for i in val[1].split() if len(i)>1):
return "train_type",val[1]
elif all(i.isalpha() and i.isupper() for i in val[1]) :
# it is a region
return "region",val[1]
elif all((i.isdigit())or (i==",") for i in val[1]):
return "rake_share",val[1].split(',')
elif ':' in val[1]:
coach = []
data = val[1].strip(':').split(':')
for i in range(len(data)):
f = data[i].split(',')
coach.append({str(i+1) : {'tag':f[0],'coach_id':f[1],'type':f[2]}})
return "coach_arrangement",coach

def StationToJson(self,string):
retval = []
data = string.split('^')
Expand Down
45 changes: 45 additions & 0 deletions build/lib/RailIN/CaptchaBreak.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from PIL import Image

from os import remove

from pytesseract import image_to_string as its

class Captcha:
def _add_background(self,image):
data = image.convert('RGBA').getdata()
to_White = []
for i in data:
if i[3]==0:
to_White.append((255,255,255,255))
else:
to_White.append(i)
image.putdata(to_White)
return image

def decode(self,raw_image):
file = open('/tmp/x.png','wb')
file.write(raw_image)
file.close()
data = Image.open('/tmp/x.png')
# adding background after scaling will create blurry image
data = self._add_background(data)
# resize the image to 300 x 128 for better recognition by tesseract
data = data.resize((300,128),Image.ANTIALIAS)
remove('/tmp/x.png')
# convert all pixel's alpha to 255
string = its(data)
try:
end = string.index('=')
except ValueError:
# if = doesn't exist in string then
# sometimes = is recognized as semicolon
end = string.index(':')
finally:
# return evaluated string before = or :
# if neither = nor : is found then something wrong went
# rather than raising an error return 0
try:
return str(eval(string[:end]))
except NameError:
# if end was undefined i.e. = or : isn't defined.
return '0'
78 changes: 78 additions & 0 deletions build/lib/RailIN/RailIN.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from requests import get,post,Session
from CaptchaBreak import Captcha
from user_agent import generate_user_agent as gua
from json import dumps,loads
from prettify import Prettify
from datetime import date
'''
This API scrapes data of Indian Railways from erail.in and parses into JSON for use in personal applications.
Not intended for commercial use.
1. Get Route ( X )
2. Get Availability ( X )
3. Get Fare ( X )
4. Get Train Status ( X )
5. Get StationCode ( X )
6. Get PNR ( X )
'''

class RailIN:
# Website asks for captcha which can be pre-generated due to the flaw
def getPNR(self,PNR):
if (len(str(PNR))<10) or (len(str(PNR))>10):
return dumps({'error':'PNR must be 10 digit.'})
URL_captcha = 'http://www.indianrail.gov.in/enquiry/captchaDraw.png'
# create a Session
s = Session()
s.headers['User-Agent'] = gua()
probable_captcha = Captcha().decode(s.get(URL_captcha).content)
try:
URL = 'http://www.indianrail.gov.in/enquiry/CommonCaptcha?inputCaptcha='+probable_captcha+'&inputPnrNo='+PNR+'&inputPage=PNR'
return loads(s.get(URL).text)
except:
return dumps({'error':'some captcha error occured'})

def getRoute(self,TN):
ID = self.getTrain(TN)
try:
ID['error']
except KeyError:
ID = ID['train_base']['train_id']
URL_Route = "https://erail.in/data.aspx?Action=TRAINROUTE&Password=2012&Data1="+ID+"&Data2=0&Cache=true"
return Prettify().StationToJson(get(URL_Route).text)

def getAllTrains(self,F,T):
URL_Trains = "https://erail.in/rail/getTrains.aspx?Station_From="+F+"&Station_To="+T+"&DataSource=0&Language=0&Cache=true"
return Prettify().TrainsToJson(get(URL_Trains,headers = {'User-Agent':gua()}).text)

# Pass in date month and year
def getTrainsOn(self,F,T,DD,MM,YYYY):
retval = []
D = date(YYYY,MM,DD).weekday()
for i in self.getAllTrains(F,T):
if i['train_base']['running_days'][D]=='1':
retval.append(i)
return dumps(retval)

def getTrain(self,TN):
URL_Train = "https://erail.in/rail/getTrains.aspx?TrainNo="+str(TN)+"&DataSource=0&Language=0&Cache=true"
try:
return Prettify().TrainsToJson(get(URL_Train).text)[0]
except:
return {'error':'Unexpected Server Response'}

def getAvailability(self,TN,SSTN,DSTN,CLS,QT,DD,MM):
URL_Avail = "https://d.erail.in/AVL_Request?Key="
val = '_'.join([str(TN),SSTN,DSTN,CLS,QT,str(DD)+'-'+str(MM)])
return Prettify().AvailToJson(get(URL_Avail+val).text)


def getFare(self,TN,F,T):
URL_Fare = "https://erail.in/data.aspx?Action=GetTrainFare&train="+str(TN)+"&from="+F+"&to="+T
return Prettify().FareToJson(get(URL_Fare).text)

def getStatus(self,TN,DD,MMM,YYYY,STN):
D = '-'.join([str(DD), MMM, str(YYYY)])
URL_Live = "https://data.erail.in/getIR.aspx?&jsonp=true&Data=RUNSTATUS~0_"+str(TN)+"_"+D+"_"+STN
return loads(get(URL_Live).text.strip('()'))
3 changes: 3 additions & 0 deletions build/lib/RailIN/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from RailIN import RailIN

__all__ = ['RailIN']
Loading

0 comments on commit 5ce1967

Please sign in to comment.