-
Notifications
You must be signed in to change notification settings - Fork 87
/
Copy pathbaseinfo.py
331 lines (293 loc) · 14 KB
/
baseinfo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
# encoding:utf8
# 包含股票进步的基础数据信息
#
#
#
#
__author__ = 'nooper'
import re
import json
import cStringIO
import os
from bs4 import BeautifulSoup
from httpGet import httpGetContent
from common import decimal, validate_decimal, str_to_date
#todo 正常可以用
def stock_base_code():
"""
根据同花顺股票列表数据,得到股票信息
code: 600383
name :金地集团
market: sh,sz
sh:上证股票
sz:深证股票
"""
file = os.path.dirname(__file__) + '/code.txt'
regex = re.compile('(\d{6})')
f = open(file, 'r')
lines = f.readlines()
for line in lines:
line = line.strip('\n')
if line != '':
p = line.decode('GBK').encode('utf8')
market = p[:2]
code = re.findall(regex, p)[0]
name = p.split('\t')[1].strip('\t')
yield (code, name, market)
f.close()
# todo 正常可用
def stock_finical_quarter(code):
"""
通过同花顺行业数据得到数据报告信息。
指标\日期 基本每股收益 摊薄每股收益 每股净资产 每股现金流 每股未分配利润 每股公积金 主营收入 利润总额 净利润 净资产收益率 销售毛利率 主营收入同比增长率 净利润同比增长率
2013-3-31 0.04 0.04 5.33 -0.68 2.66 1.37 425230.9 37821.37 18722.95 0.79 26.43 143.93 19.68
http://basic.10jqka.com.cn/600383/xls/Important_declaredate.xls"
"""
url = "http://basic.10jqka.com.cn/%s/xls/Important_declaredate.xls" % (code)
content = httpGetContent(url)
stock_dict = {}
if content:
content = content.decode('gb2312').encode('utf8')
data = cStringIO.StringIO(content)
for i, line in enumerate(data):
if i == 1:
item = line.strip('\n').split('\t')
date = str_to_date(item[0], "%Y-%m-%d") # 日期
earnings = decimal(item[2]) # 摊薄每股收益(数据相对准确)不以基本每股收益为准 0.04
net_asset_value = decimal(item[3]) # 每股净资产 5.33
cash_flow = decimal(item[4]) # 每股现金流 -0.86
profit_per_share = decimal(item[5]) # 每股未分配利润
capital_fund = decimal(item[6]) # 每股公积金
main_income = decimal(item[7]) # 主营业收入
total_profit = decimal(item[8]) # 利润总额
total_net_profit = decimal(item[9]) # 净利润
return_on_asserts = decimal(item[10]) # 净资产收益率 0.79%
income_rise = decimal(item[12]) # 主营业收入同比增长率 143.93%
net_profit_rise = decimal(item[13]) # 净利润同比增长率 19.68%
stock_dict = {
"code": code,
"date": date,
"earnings": earnings,
"net_asset_value": net_asset_value,
"cash_flow": cash_flow,
"return_on_asserts": return_on_asserts,
"income_rise": income_rise,
"net_profit_rise": net_profit_rise,
"profit_per_share": profit_per_share,
"capital_fund": capital_fund,
"main_income": main_income,
"total_profit": total_profit,
"total_net_profit": total_net_profit
}
break
return stock_dict
def stock_base_info(code):
"""
得到股票其他的基础数据信息
包含:
pe_trands 市盈率(动态):47.98
type 分类 :big(大盘股)medium (中盘股)small(小盘股)
pe_static 市盈率(静态):8.61
total_capital 总股本 44.7亿股
ciculate_capital 流通股本 44.7亿股
pb 市净率 1.24
"""
url = "http://basic.10jqka.com.cn/%s/" % code
content = httpGetContent(url)
if content:
stock_dict = {}
soup = BeautifulSoup(content)
profile = soup.select('div#profile')
table = profile[0].select('table')[1]
td_list = table.select('td')
td_select = lambda td: td.select('span')[1].text
# regex = re.compile(r'^([0-9]{1,}[.][0-9]*|-[0-9]{1,}[.][0-9]*|\d+|-\d+)')
# find = lambda value: float(re.findall(regex, value)[0]) if re.findall(regex, value) else None
stock_dict["code"] = code
for i, td in enumerate(td_list):
if i == 0: # 市盈率(动态):
stock_dict["pe_ratio_dynamic"] = validate_decimal(td_select(td))
elif i == 3: # 分类
text = td_select(td)
if text == u"大盘股":
stock_dict['type'] = 'big'
elif text == u'中盘股':
stock_dict['type'] = 'medium'
elif text == u"小盘股":
stock_dict['type'] = 'small'
else:
stock_dict['type'] = text
elif i == 4: # 市盈率(静态)
stock_dict['pe_ratio_static'] = validate_decimal(td_select(td))
elif i == 7: # 总股本
stock_dict['total_capital'] = validate_decimal(td_select(td))
elif i == 8: # 市净率
stock_dict['pb'] = validate_decimal(td_select(td))
elif i == 11: # 流通股本
stock_dict['circulate_capital'] = validate_decimal(td_select(td))
return stock_dict
#todo 数据分析有问题
# def getStockFinicalAdvPost():
# """抓取同花顺业绩预告分析板块
# http://data.10jqka.com.cn/interface/financial/yjyg/enddate/desc/4/null/0/2013-06-30
# """
# headers = {
# "Host": "data.10jqka.com.cn",
# "Referer": "http://data.10jqka.com.cn/financial/yjyg/",
# "X-Requested-With": "XMLHttpRequest"
# }
# number = range(1, 50)
# urls = ["http://data.10jqka.com.cn/interface/financial/yjyg/enddate/desc/%s/null/0/2013-06-30" % num for num in
# number]
# for url in urls:
# content = httpGetContent(url, headers)
# if content:
# jsoncontent = json.loads(content)
# stock_list = jsoncontent["data"]
# if not stock_list:
# continue
# for stock in stock_list:
# stock_dict = {}
# stock_dict["postdate"] = stock["enddate"] #公告日期
# stock_dict["date"] = stock["rdate"] #报表日期
# stock_dict["code"] = stock["stockcode"] #日期
# stock_dict["per_eps"] = float(stock["mgsytqb"]) #每股收益
# stock_dict["profit_percent"] = float(stock["jlrbdfd"]) #净利润变动幅度
# stock_dict["increase_type"] = stock["yglx"] #预告类型
# stock_dict["content"] = stock["ygzy"] #预告摘要
# stock_dict["type"] = stock["datename"] #中报
# yield stock_dict
# def getStockFinicalPerPost():
# """
# 抓取同花顺业绩快报板块
# http://data.10jqka.com.cn/financial/yjkb/
# """
# headers = {
# "Host": "data.10jqka.com.cn",
# "Referer": "http://data.10jqka.com.cn/financial/yjkb/",
# "X-Requested-With": "XMLHttpRequest"
# }
# count = range(1, 50)
# urls = ["http://data.10jqka.com.cn/interface/financial/yjkb/rdate/desc/%s/null/2013-06-30" % num for num in count]
# for url in urls:
# content = httpGetContent(url, headers)
# if content:
# jsoncontent = json.loads(content)
# stock_list = jsoncontent["data"]
# if not stock_list:
# continue
# for stock in stock_list:
# stock_dict = {}
# stock_dict["code"] = stock["stockcode"]
# stock_dict["postdate"] = stock["rdate"] #公告日期
# stock_dict["per_eps"] = float(stock["mgsy"]) #每股收益
# stock_dict["asset_increase_percent"] = __convertfloat(stock["jlrtqb"]) #净利润同比增长
# stock_dict["income_increase_percent"] = __convertfloat(stock["yysrtqb"]) #营业额收入同比增长
# stock_dict["assets_percent"] = __convertfloat(stock["zcsyl"]) #资产收益率 百分比
# stock_dict["income_total"] = __convertfloat(stock["yysr"]) #营业收入百万元
# stock_dict["net_profit"] = __convertfloat(stock["jlr"]) #净利润收入,百万元
# stock_dict["post_type"] = stock["datename"] #预告类型
# yield stock_dict
def stock_finical_post():
"""抓取同花顺业绩公告板块
http://data.10jqka.com.cn/financial/yjgg/
http://data.10jqka.com.cn/financial/yjgg/page/56/ajax/1/
"""
headers = {
"Host": "data.10jqka.com.cn",
"Referer": "http://data.10jqka.com.cn/financial/yjyg/",
"X-Requested-With": "XMLHttpRequest"
}
count = range(1, 60)
urls = ["http://data.10jqka.com.cn/financial/yjgg/page/%s/ajax/1/" % num for num in count]
for url in urls:
content = httpGetContent(url, headers, "gb2312")
if content:
soup = BeautifulSoup(content)
stock_item = soup.select("tbody > tr")
for item in stock_item:
tds = item.select('td')
stock_dict = {}
for i, td in enumerate(tds):
if i == 1: # 代码
stock_dict["code"] = td.select('a')[0].string
elif i == 3: # 日期
stock_dict["date"] = str_to_date(td.string, "%Y-%m-%d")
elif i == 4: # 每股收益
stock_dict["earnings"] = decimal(td.string)
elif i == 5: # 营业收入
stock_dict["main_income"] = decimal(td.string)
elif i == 6: # 营业收入同比增长%
stock_dict["income_rise"] = validate_decimal(td.string)
elif i == 7: # 净利润 万元
stock_dict["net_profit"] = decimal(td.string)
elif i == 8: #净利润同比增长%
stock_dict["net_profit_rise"] = validate_decimal(td.string)
yield stock_dict
#todo 测试完成
def stock_industry():
"""
@注意该股票会包含部分st股票,但是实际选股票数据不会包含st股票数据
得到股票的板块历史数据信息
http://q.10jqka.com.cn/stock/thshy/
http://q.10jqka.com.cn/interface/stock/thshy/zdf/desc/1/quote/quote
"""
url = ('http://q.10jqka.com.cn/interface/stock/thshy/zdf/desc/1/quote/quote',
'http://q.10jqka.com.cn/interface/stock/thshy/zdf/desc/2/quote/quote')
for u in url:
content = httpGetContent(u)
if content:
json_stock = json.loads(content)
industry_list = json_stock['data']
for industry in industry_list:
num = int(industry['num'])
industry_str = industry['hycode']
industry_dict = {}
industry_dict['industry_id'] = industry['platecode']
industry_dict['industry_str'] = industry_str
industry_dict['name'] = industry['platename']
industry_dict['num'] = num
#/用于处理判定数量的请求信息
if num / 50 >= 1:
url_no = [n for n in range(1, num / 50 + 2)]
elif num / 50 == 0 or num == 50:
url_no = [1]
industry_url_list = ['http://q.10jqka.com.cn/interface/stock/detail/zdf/desc/%s/1/%s' \
% (i, industry_str) for i in url_no]
code_list = []
for industry_url in industry_url_list:
industry_content = httpGetContent(industry_url)
if industry_content:
stock_dict = json.loads(industry_content)
stock_list = stock_dict['data']
for n in stock_list:
code_list.append(n['stockcode'])
industry_dict['stock'] = code_list
yield industry_dict
def stock_industry_day():
"""
根据同花顺得到板块数据分析
http://q.10jqka.com.cn/stock/thshy/
http://q.10jqka.com.cn/interface/stock/thshy/zdf/desc/1/quote/quote
"""
urls = ('http://q.10jqka.com.cn/interface/stock/thshy/zdf/desc/1/quote/quote',
'http://q.10jqka.com.cn/interface/stock/thshy/zdf/desc/2/quote/quote')
for url in urls:
content = httpGetContent(url)
if content:
json_stock = json.loads(content)
industry_list = json_stock["data"]
for industry in industry_list:
industry_dict = {}
industry_dict["date"] = str_to_date( json_stock["rtime"][:10],"%Y-%m-%d")
industry_dict["name"] = industry["platename"]
industry_dict["industry_code"] = industry["platecode"]
industry_dict["price"] = float(industry["zxj"]) # 最近价格
industry_dict["volume"] = float(industry["cjl"]) # 总成交量多少万手
industry_dict["total"] = float(industry["cje"]) # 总成交额多少亿元数据
industry_dict["rise_percent"] = float(industry["zdf"]) # 涨跌幅
industry_dict["rise_price"] = float(industry["zde"]) # 涨跌额度(价格)
industry_dict["net_inflow"] = float(industry["jlr"]) # 净流入(亿元数据)
yield industry_dict
if __name__ == "__main__":
pass