-
Notifications
You must be signed in to change notification settings - Fork 9
final_data.js获取说明
gojuukaze edited this page Dec 29, 2020
·
3 revisions
final_data.js
是我从百度地图上获取的(没有固定的网址,需要手动搜“北京地铁线路图”)。
大致就是手动复制的每一条路线的svg代码,然后解析。
对于解析,有几点需要注意:
-
它不同线的绘制规则可能会不一样,同一条线中前半部分和后半部分的绘制规则也可能不一样,所以解析这个会很麻烦,不一样的地方要特殊处理。
final_data.js
中的draw_type
,draw_args
就是处理不一样的规则的。还有reverse
这个是因为这几条线是从后往前绘制的。 -
你在html中复制到的svg只有看到的部分;比如4号线比较长,一屏显示不下,svg只有显示的部分,这种情况也要特殊处理。
-
svg会随着你的缩放比例而变化,所有获取时用固定的比例
关于这块的代码,其实没有多少可借鉴性,另外由于年代久远,大都忘记了。(我今天试了一下不能正常运行了)。
鉴于好多人都找我要,贴出来以供参考:
import requests
import re
from urllib import request
from bs4 import BeautifulSoup as bs
from datetime_helper import get_yesterday_date,datetime_to_str,str_to_date
def reverse_stations(stations):
stations.reverse()
new_stations=[]
i=0
while i<len(stations):
s=stations[i]
if s['draw_type']=='L':
new_stations.append(s)
i+=1
else:
new_stations.append({'name':s['name'],'draw_name':s['draw_name'],'draw_img':s['draw_img'],'xy':s['xy'],'draw_type':'L','draw_args':''})
args=s['draw_args']
s=stations[i+1]
args+='M%s'%(s['xy'])
new_stations.append({'name':s['name'],'draw_name':s['draw_name'],'draw_img':s['draw_img'],'xy':s['xy'],'draw_type':'Q','draw_args':args})
i+=2
return new_stations
def xy_equals(xy1,xy2,base=0.001):
x1,y1=xy1.split(',')
x2,y2=xy2.split(',')
if abs(float(x1)-float(x2))<=base and abs(float(y1)-float(y2))<=base:
return True
return False
def get_img_xy(img_tag):
if img_tag.name=='ellipse':
return '%s,%s'%(img_tag.attrs['cx'],img_tag.attrs['cy'])
elif img_tag.name=='svg':
return '%s,%s'%(img_tag.attrs['x'],img_tag.attrs['y'])
def tttt(line_name,xy,last_xy):
if line_name=='4号线' and xy_equals(xy,'1723.53,476.45'):
return True,'L1723.53,476.45'
elif line_name=='6号线' and xy_equals(xy,'2194.00,876.98'):
return True,'L2194.00,876.98'
elif line_name=='7号线' and xy_equals(xy,'2659.53,1194.57'):
return True,'L2659.53,1194.57'
elif line_name=='8号线' and xy_equals(xy,'2137.32,319.47'):
return True,'L2137.32,319.47'
elif line_name=='10号线' and xy_equals(xy,'2460.63,638.30'):
return True,''
elif line_name=='13号线' and xy_equals(xy,'2315.16,398.32'):
return True,'L2315.16,398.32'
elif line_name=='13号线' and xy_equals(xy,'2365.86,441.21999999999997'):
return True,''
elif line_name=='14号线东段' and xy_equals(xy,'2529.53,1154.79'):
return True,'L2529.53,1154.79'
elif line_name=='14号线东段' and xy_equals(xy,'2422.15,570.44'):
return True,'L2422.15,570.44'
elif line_name=='14号线东段' and xy_equals(xy,'2529.40,667.94'):
return True,'L2529.40,667.94'
elif line_name=='15号线' and xy_equals(xy,'2509.51,535.21'):
return True,'L2509.51,535.21'
elif line_name=='昌平线' and xy_equals(xy,'1647.09,66.17'):
return True,'L1647.09,66.17'
elif line_name=='昌平线' and xy_equals(xy,'1701.69,92.17'):
return True,'L1701.69,92.17'
elif line_name=='昌平线' and xy_equals(xy,'1929.19,92.17'):
return True,'L1929.19,92.17'
elif line_name=='燕房线' and xy_equals(xy,'316.15,1610.05'):
return True,'L316.15,1610.05'
return False,None
reverse=['9号线','14号线西段','14号线东段','昌平线','房山线','s1线','燕房线','西郊线']
# 关于这个line.html是啥?
# - 不好意思,这个真忘记了。有可能是一条线路的svg
with open('/Users/xx/Desktop/cc/bd_line/line.html','r')as f:
html=f.read()
soup = bs(html, 'html.parser')
svg=soup.find('svg')
group=svg.find_all('g',class_='LINE')
bj_sub=[]
has_q=set()
for g in group[:-1]:
line_name=g.attrs['id']
path=g.find('path')
line_color=path.attrs['stroke']
print(line_name,line_color)
tags=g.find_all(recursive=False)
path_data=re.findall(r'[0-9 \. \,]+|[A-Z]+',path.attrs['d'])
i=1
count=0
line_stations=[]
last_xy=''
while i<len(tags):
station_img=tags[i]
station_name=tags[i+1]
draw_args=''
while True:
if path_data[count] in['M','L']:
draw_type='L'
xy=path_data[count+1].strip()
count+=2
if last_xy and xy_equals(xy,last_xy):
continue
flag,args=tttt(line_name,xy,last_xy)
last_xy=xy
if flag:
draw_args+=args
continue
break
elif path_data[count] in['Q']:
draw_type='Q'
xy=','.join(temp.strip() for temp in path_data[count+1].split(',')[2:])
draw_args+='M'+last_xy+'Q'+path_data[count+1].strip()
count+=2
flag,args=tttt(line_name,xy,last_xy)
last_xy=xy
if flag:
draw_args+=args
continue
break
else:
print("EEEE "+path_data[count],path_data[count+1])
break
last_xy=xy
i+=2
line_stations.append({'draw_name':str(station_name),'draw_img':str(station_img),'xy':xy,
'draw_type':draw_type,'draw_args':draw_args,'name':station_name.get_text()})
if count<len(path_data):
if path_data[count]=='Z':
line_stations.append(line_stations[0])
elif len(path_data)-count==2:
xy=path_data[count+1].strip()
if xy_equals(xy,last_xy):
pass
else:
print(line_name+' count<len(path_data) ')
break
else:
print(line_name+' count<len(path_data) ')
break
if line_name in reverse:
line_stations=reverse_stations(line_stations)
bj_sub.append({'name':line_name,'color':line_color,'stations':line_stations})
with open('/Users/xx/Desktop/cc/bd_line/final_data','w')as f:
f.write(str(bj_sub))
datetime_helper是我自己的时间处理包,下面是它的代码
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
时间处理函数
"""
import time
from datetime import timedelta, datetime, date
FORMAT_DATE = '%Y-%m-%d'
FORMAT_DATE_CHINA = "%-m月%-d日"
FORMAT_DATE_CHINA_FULL = "%Y年%m月%d日"
COMPACT_DATE = "%Y%m%d"
DOT_DATE = "%Y.%m.%d"
COMPACT_TIME = "%H%M%S"
FORMAT_DATETIME = '%Y-%m-%d %H:%M:%S'
FORMAT_ONLY_TIME = "%H:%M"
COMPACT_DATETIME = "%Y%m%d%H%M%S"
def now():
return datetime.now()
def datetime_to_str(_datetime, date_format=FORMAT_DATETIME):
"""
将datetime对象转换成字符串
:param _datetime:
:param date_format:
:return:
"""
return _datetime.strftime(date_format)
def str_to_datetime(date_str, date_format=FORMAT_DATETIME):
"""
将时间字符串转换成datetime对象
:param date_str:
:param date_format:
:return:
"""
return datetime.strptime(date_str, date_format)
def str_to_date(date_str, date_format=FORMAT_DATE):
return datetime.strptime(date_str, date_format).date()
def get_today():
"""
获取当前日期,格式:2014-07-14 00:00:00
"""
_now = datetime.now()
return datetime(_now.year, _now.month, _now.day)
def get_today_date():
"""
获取当前日期,格式:2014-07-14
"""
_now = datetime.now()
return date(_now.year, _now.month, _now.day)
def get_yesterday():
"""
"""
_date = datetime.now() - timedelta(days=1)
return datetime(_date.year, _date.month, _date.day)
def get_yesterday_date():
_date = datetime.now() - timedelta(days=1)
return date(_date.year, _date.month, _date.day)
def get_tomorrow():
# type: () -> object
"""
返回明天
:return:
"""
_date = datetime.now() + timedelta(days=1)
return datetime(_date.year, _date.month, _date.day)
def is_today(_datetime):
"""
判断时间是否是今天
:param _datetime:
"""
_now = datetime.now()
return True if _now.year == _datetime.year and _now.month == _datetime.month and _now.day == _datetime.day else False
def is_yesterday(_datetime):
"""
判断是否是昨天
:param _datetime:
:return:
"""
yesterday = datetime.now() - timedelta(days=1)
return yesterday.year == _datetime.year and yesterday.month == _datetime.month and yesterday.day == _datetime.day
def timedelta_to_hour(delta):
"""
换算时间
:param delta:
"""
seconds = int(round(delta.total_seconds()))
hour = seconds / 3600
minute = (seconds % 3600) / 60
second = (seconds % 3600) % 60
return hour, minute, second
def datetime_to_timestamp(time_):
"""
datetime类型转换为unix时间戳*1000
:param time_:
"""
timestamp = time.mktime(time_.timetuple())
return int(timestamp * 1000)
def minus_one_month(_datetime):
"""
当前日期增加一个月
:param _datetime:
:return:
"""
day = _datetime.day
month = _datetime.month - 1
if month == 0:
month = 12
year = _datetime.year - 1
else:
year = _datetime.year
return datetime(year=year, month=month, day=day, hour=_datetime.hour, minute=_datetime.minute,
second=_datetime.second)
def add_one_month(_datetime):
"""
当前日期增加一个月
:param _datetime:
:return:
"""
return add_month(_datetime, 1)
def is_leap_year(year):
"""
判断闰年
:param year:
:return:
"""
if ((year % 4 == 0 and year % 100 != 0) or (year % 400 == 0)):
return True
return False
def add_month(_datetime, months):
"""
为当前时间加几个月
:param _datetime:
:param months:
:return:
"""
small_month = [4, 6, 9, 11]
day = _datetime.day
month = _datetime.month + months
if month % 12 == 0:
add_year = month / 12 - 1
month = 12
else:
add_year = month / 12
month %= 12
year = _datetime.year + add_year
if month in small_month and day > 30:
day = 30
elif month == 2 and day > 28:
if is_leap_year(year):
day = 29
else:
day = 28
return datetime(
year=year, month=month, day=day, hour=_datetime.hour, minute=_datetime.minute, second=_datetime.second)
def show_time_for_people(_datetime):
"""
根据时间决定是否输出几天前或者几月几日还是今天的时间(给人类看的时间...)
:param _datetime:
:return:
"""
now_time = now()
yesterday = get_yesterday()
if now_time.year == _datetime.year and now_time.month == _datetime.month and now_time.day == _datetime.day:
return datetime_to_str(_datetime, date_format=FORMAT_ONLY_TIME)
elif yesterday.year == _datetime.year and yesterday.month == _datetime.month and yesterday.day == _datetime.day:
return u"昨天"
else:
return datetime_to_str(_datetime, date_format=FORMAT_DATE_CHINA)
def get_week_start_end(_datetime):
"""
得到datetime所处周的开始,结束
python weekday()返回的周,周一是0,周日是6
注意!
为了方便,返回的结束时间是下周一 0点
:param datetime:
:return: start,end
"""
weekday = _datetime.weekday()
start = _datetime + timedelta(- weekday)
end = _datetime + timedelta(7 - weekday)
return start, end
def is_birthday_today(birthday):
"""
判断生日
:param birthday: datetime
:return:
"""
if get_today().month == birthday.month and get_today().day == birthday.day:
return True
else:
return False