Skip to content

final_data.js获取说明

gojuukaze edited this page Dec 29, 2020 · 3 revisions

final_data.js 是我从百度地图上获取的(没有固定的网址,需要手动搜“北京地铁线路图”)。

大致就是手动复制的每一条路线的svg代码,然后解析。

对于解析,有几点需要注意:

  • 它不同线的绘制规则可能会不一样,同一条线中前半部分和后半部分的绘制规则也可能不一样,所以解析这个会很麻烦,不一样的地方要特殊处理。 final_data.js 中的 draw_type , draw_args 就是处理不一样的规则的。还有 reverse 这个是因为这几条线是从后往前绘制的。

  • 你在html中复制到的svg只有看到的部分;比如4号线比较长,一屏显示不下,svg只有显示的部分,这种情况也要特殊处理。

  • svg会随着你的缩放比例而变化,所有获取时用固定的比例


关于这块的代码,其实没有多少可借鉴性,另外由于年代久远,大都忘记了。(我今天试了一下不能正常运行了)。
鉴于好多人都找我要,贴出来以供参考:

import requests
import re
from urllib import request
from bs4 import BeautifulSoup as bs
from datetime_helper import get_yesterday_date,datetime_to_str,str_to_date


def reverse_stations(stations):
    stations.reverse()

    new_stations=[]
    i=0
    while i<len(stations):
        s=stations[i]
        if s['draw_type']=='L':
            new_stations.append(s)
            i+=1
        else:
            new_stations.append({'name':s['name'],'draw_name':s['draw_name'],'draw_img':s['draw_img'],'xy':s['xy'],'draw_type':'L','draw_args':''})
            args=s['draw_args']
            s=stations[i+1]
            args+='M%s'%(s['xy'])
            new_stations.append({'name':s['name'],'draw_name':s['draw_name'],'draw_img':s['draw_img'],'xy':s['xy'],'draw_type':'Q','draw_args':args})
            i+=2
    return new_stations

def xy_equals(xy1,xy2,base=0.001):
    x1,y1=xy1.split(',')
    x2,y2=xy2.split(',')
    if abs(float(x1)-float(x2))<=base and abs(float(y1)-float(y2))<=base:
        return True
    return False

def get_img_xy(img_tag):
    if img_tag.name=='ellipse':
       return '%s,%s'%(img_tag.attrs['cx'],img_tag.attrs['cy'])
    elif img_tag.name=='svg':
        return '%s,%s'%(img_tag.attrs['x'],img_tag.attrs['y'])

def tttt(line_name,xy,last_xy):

    if line_name=='4号线' and xy_equals(xy,'1723.53,476.45'):
        return True,'L1723.53,476.45'
    elif line_name=='6号线' and xy_equals(xy,'2194.00,876.98'):
        return True,'L2194.00,876.98'
    elif line_name=='7号线' and xy_equals(xy,'2659.53,1194.57'):
        return True,'L2659.53,1194.57'
    elif line_name=='8号线' and xy_equals(xy,'2137.32,319.47'):
        return True,'L2137.32,319.47'
    elif line_name=='10号线' and xy_equals(xy,'2460.63,638.30'):
        return True,''
    elif line_name=='13号线' and xy_equals(xy,'2315.16,398.32'):
        return True,'L2315.16,398.32'
    elif line_name=='13号线' and xy_equals(xy,'2365.86,441.21999999999997'):
        return True,''
    elif line_name=='14号线东段' and xy_equals(xy,'2529.53,1154.79'):
        return True,'L2529.53,1154.79'
    elif line_name=='14号线东段' and xy_equals(xy,'2422.15,570.44'):
        return True,'L2422.15,570.44'
    elif line_name=='14号线东段' and xy_equals(xy,'2529.40,667.94'):
        return True,'L2529.40,667.94'
    elif line_name=='15号线' and xy_equals(xy,'2509.51,535.21'):
        return True,'L2509.51,535.21'
    elif line_name=='昌平线' and xy_equals(xy,'1647.09,66.17'):
        return True,'L1647.09,66.17'
    elif line_name=='昌平线' and xy_equals(xy,'1701.69,92.17'):
        return True,'L1701.69,92.17'
    elif line_name=='昌平线' and xy_equals(xy,'1929.19,92.17'):
        return True,'L1929.19,92.17'
    elif line_name=='燕房线' and xy_equals(xy,'316.15,1610.05'):
        return True,'L316.15,1610.05'
    return False,None

reverse=['9号线','14号线西段','14号线东段','昌平线','房山线','s1线','燕房线','西郊线']

# 关于这个line.html是啥? 
# - 不好意思,这个真忘记了。有可能是一条线路的svg
with open('/Users/xx/Desktop/cc/bd_line/line.html','r')as f:
    html=f.read()

soup = bs(html, 'html.parser')
svg=soup.find('svg')

group=svg.find_all('g',class_='LINE')

bj_sub=[]
has_q=set()
for g in group[:-1]:
    line_name=g.attrs['id']
    path=g.find('path')

    line_color=path.attrs['stroke']
    print(line_name,line_color)

    tags=g.find_all(recursive=False)
    path_data=re.findall(r'[0-9 \. \,]+|[A-Z]+',path.attrs['d'])
    i=1
    count=0
    line_stations=[]
    last_xy=''
    while i<len(tags):
        station_img=tags[i]
        station_name=tags[i+1]
        draw_args=''
        while True:
            if path_data[count] in['M','L']: 
                draw_type='L'
                xy=path_data[count+1].strip()
                count+=2
                if last_xy and xy_equals(xy,last_xy):
                    continue
                

                flag,args=tttt(line_name,xy,last_xy)
                last_xy=xy
                if flag:
                    draw_args+=args
                    continue
                break
            elif path_data[count] in['Q']:
                draw_type='Q'
                xy=','.join(temp.strip() for temp in path_data[count+1].split(',')[2:])
                draw_args+='M'+last_xy+'Q'+path_data[count+1].strip()
                count+=2
                flag,args=tttt(line_name,xy,last_xy)
                last_xy=xy
                if flag:
                    draw_args+=args
                    continue
                break
            else:
                print("EEEE  "+path_data[count],path_data[count+1])
                break
        last_xy=xy
        i+=2
        line_stations.append({'draw_name':str(station_name),'draw_img':str(station_img),'xy':xy,
                              'draw_type':draw_type,'draw_args':draw_args,'name':station_name.get_text()})
    if count<len(path_data):
        if path_data[count]=='Z':
            line_stations.append(line_stations[0])
        elif len(path_data)-count==2:
            xy=path_data[count+1].strip()
            if xy_equals(xy,last_xy):
                pass
            else:
                print(line_name+' count<len(path_data) ')
                break

        else:
            print(line_name+' count<len(path_data) ')
            break
    if line_name in reverse:
        line_stations=reverse_stations(line_stations)
    bj_sub.append({'name':line_name,'color':line_color,'stations':line_stations})

with open('/Users/xx/Desktop/cc/bd_line/final_data','w')as f:
    f.write(str(bj_sub))
    

datetime_helper是我自己的时间处理包,下面是它的代码

#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
    时间处理函数
"""
import time
from datetime import timedelta, datetime, date

FORMAT_DATE = '%Y-%m-%d'
FORMAT_DATE_CHINA = "%-m月%-d日"
FORMAT_DATE_CHINA_FULL = "%Y年%m月%d日"
COMPACT_DATE = "%Y%m%d"
DOT_DATE = "%Y.%m.%d"
COMPACT_TIME = "%H%M%S"
FORMAT_DATETIME = '%Y-%m-%d %H:%M:%S'
FORMAT_ONLY_TIME = "%H:%M"
COMPACT_DATETIME = "%Y%m%d%H%M%S"


def now():
    return datetime.now()


def datetime_to_str(_datetime, date_format=FORMAT_DATETIME):
    """
    将datetime对象转换成字符串
    :param _datetime:
    :param date_format:
    :return:
    """
    return _datetime.strftime(date_format)


def str_to_datetime(date_str, date_format=FORMAT_DATETIME):
    """
    将时间字符串转换成datetime对象
    :param date_str:
    :param date_format:
    :return:
    """
    return datetime.strptime(date_str, date_format)


def str_to_date(date_str, date_format=FORMAT_DATE):
    return datetime.strptime(date_str, date_format).date()


def get_today():
    """
         获取当前日期,格式:2014-07-14 00:00:00
    """
    _now = datetime.now()
    return datetime(_now.year, _now.month, _now.day)


def get_today_date():
    """
         获取当前日期,格式:2014-07-14
    """
    _now = datetime.now()
    return date(_now.year, _now.month, _now.day)


def get_yesterday():
    """
    """
    _date = datetime.now() - timedelta(days=1)
    return datetime(_date.year, _date.month, _date.day)


def get_yesterday_date():
    _date = datetime.now() - timedelta(days=1)
    return date(_date.year, _date.month, _date.day)


def get_tomorrow():
    # type: () -> object
    """
    返回明天
    :return:
    """
    _date = datetime.now() + timedelta(days=1)
    return datetime(_date.year, _date.month, _date.day)


def is_today(_datetime):
    """
          判断时间是否是今天
          :param _datetime:
    """
    _now = datetime.now()
    return True if _now.year == _datetime.year and _now.month == _datetime.month and _now.day == _datetime.day else False


def is_yesterday(_datetime):
    """
    判断是否是昨天
    :param _datetime:
    :return:
    """
    yesterday = datetime.now() - timedelta(days=1)
    return yesterday.year == _datetime.year and yesterday.month == _datetime.month and yesterday.day == _datetime.day


def timedelta_to_hour(delta):
    """
         换算时间
         :param delta:
    """
    seconds = int(round(delta.total_seconds()))
    hour = seconds / 3600
    minute = (seconds % 3600) / 60
    second = (seconds % 3600) % 60
    return hour, minute, second


def datetime_to_timestamp(time_):
    """
        datetime类型转换为unix时间戳*1000
        :param time_:
    """
    timestamp = time.mktime(time_.timetuple())
    return int(timestamp * 1000)


def minus_one_month(_datetime):
    """
    当前日期增加一个月
    :param _datetime:
    :return:
    """
    day = _datetime.day
    month = _datetime.month - 1
    if month == 0:
        month = 12
        year = _datetime.year - 1
    else:
        year = _datetime.year
    return datetime(year=year, month=month, day=day, hour=_datetime.hour, minute=_datetime.minute,
                    second=_datetime.second)


def add_one_month(_datetime):
    """
    当前日期增加一个月
    :param _datetime:
    :return:
    """
    return add_month(_datetime, 1)


def is_leap_year(year):
    """
    判断闰年
    :param year:
    :return:
    """
    if ((year % 4 == 0 and year % 100 != 0) or (year % 400 == 0)):
        return True
    return False


def add_month(_datetime, months):
    """
    为当前时间加几个月
    :param _datetime:
    :param months:
    :return:
    """
    small_month = [4, 6, 9, 11]
    day = _datetime.day
    month = _datetime.month + months
    if month % 12 == 0:
        add_year = month / 12 - 1
        month = 12
    else:
        add_year = month / 12
        month %= 12
    year = _datetime.year + add_year
    if month in small_month and day > 30:
        day = 30
    elif month == 2 and day > 28:
        if is_leap_year(year):
            day = 29
        else:
            day = 28
    return datetime(
        year=year, month=month, day=day, hour=_datetime.hour, minute=_datetime.minute, second=_datetime.second)


def show_time_for_people(_datetime):
    """
    根据时间决定是否输出几天前或者几月几日还是今天的时间(给人类看的时间...)
    :param _datetime:
    :return:
    """
    now_time = now()
    yesterday = get_yesterday()
    if now_time.year == _datetime.year and now_time.month == _datetime.month and now_time.day == _datetime.day:
        return datetime_to_str(_datetime, date_format=FORMAT_ONLY_TIME)
    elif yesterday.year == _datetime.year and yesterday.month == _datetime.month and yesterday.day == _datetime.day:
        return u"昨天"
    else:
        return datetime_to_str(_datetime, date_format=FORMAT_DATE_CHINA)


def get_week_start_end(_datetime):
    """
    得到datetime所处周的开始,结束

    python weekday()返回的周,周一是0,周日是6

    注意!
    为了方便,返回的结束时间是下周一 0点

    :param datetime:
    :return: start,end
    """
    weekday = _datetime.weekday()
    start = _datetime + timedelta(- weekday)
    end = _datetime + timedelta(7 - weekday)
    return start, end


def is_birthday_today(birthday):
    """
    判断生日
    :param birthday: datetime
    :return:
    """
    if get_today().month == birthday.month and get_today().day == birthday.day:
        return True
    else:
        return False
Clone this wiki locally