基金爬虫

爬取天天基金网指定基金数据,计算出平均值。可发送到指定邮箱,结合 linux crontab 更好。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# -*- coding: utf-8 -*-

import datetime
import smtplib
from email.header import Header
from email.mime.text import MIMEText

import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd


def get_url(url, params=None, proxies=None):
"""
请求页面信息
:param url:
:param params:
:param proxies:
:return:
"""
rsp = requests.get(url, params=params, proxies=proxies)
rsp.raise_for_status()
return rsp.text


def get_fund_data(code, per=10, sdate='', edate='', proxies=None):
"""
抓取数据
:param code:
:param per:
:param sdate:
:param edate:
:param proxies:
:return:
"""
url = 'http://fund.eastmoney.com/f10/F10DataApi.aspx'
params = {'type': 'lsjz', 'code': code, 'page': 1, 'per': per, 'sdate': sdate, 'edate': edate}
html = get_url(url, params, proxies)
soup = BeautifulSoup(html, 'html.parser')

pattern = re.compile(r'pages:(.*),')
result = re.search(pattern, html).group(1)
pages = int(result)

heads = []
for head in soup.findAll("th"):
heads.append(head.contents[0])
records = []
page = 1
while page <= pages:
params = {'type': 'lsjz', 'code': code, 'page': page, 'per': per, 'sdate': sdate, 'edate': edate}
html = get_url(url, params, proxies)
soup = BeautifulSoup(html, 'html.parser')

for row in soup.findAll("tbody")[0].findAll("tr"):
row_records = []
for record in row.findAll('td'):
val = record.contents

if val == []:
row_records.append(np.nan)
else:
row_records.append(val[0])
records.append(row_records)
page = page + 1
np_records = np.array(records)
data = pd.DataFrame()
for col, col_name in enumerate(heads):
data[col_name] = np_records[:, col]

return data


def send_email(msg):
"""
发送邮件
:param to_send:
:return:
"""
# 第三方 SMTP 服务
mail_host = "smtp.exmail.qq.com" # 设置服务器
mail_user = "" # 用户名
mail_pass = "" # 口令

sender = ''
receivers = [''] # 接收邮件,可设置为你的QQ邮箱或者其他邮箱

message = MIMEText(msg, 'plain', 'utf-8')
message['From'] = Header("基金每日均值", 'utf-8')
message['To'] = Header("", 'utf-8')

subject = '基金每日均值'
message['Subject'] = Header(subject, 'utf-8')

try:
smtpObj = smtplib.SMTP()
smtpObj.connect(mail_host, 25) # 25 为 SMTP 端口号
smtpObj.login(mail_user, mail_pass)
smtpObj.sendmail(sender, receivers, message.as_string())
print "邮件发送成功"
except Exception as e:
print "Error: 无法发送邮件:" + str(e)


if __name__ == "__main__":
report = ""

now = datetime.datetime.now().strftime("%Y-%m-%d")
two_day = (datetime.datetime.now() - datetime.timedelta(days=2)).strftime("%Y-%m-%d")
three_day = (datetime.datetime.now() - datetime.timedelta(days=3)).strftime("%Y-%m-%d")
week = (datetime.datetime.now() - datetime.timedelta(days=5)).strftime("%Y-%m-%d")
month = (datetime.datetime.now() - datetime.timedelta(days=30)).strftime("%Y-%m-%d")
season = (datetime.datetime.now() - datetime.timedelta(days=30 * 4)).strftime("%Y-%m-%d")
half_year = (datetime.datetime.now() - datetime.timedelta(days=30 * 6)).strftime("%Y-%m-%d")
year = (datetime.datetime.now() - datetime.timedelta(days=365)).strftime("%Y-%m-%d")
two_year = (datetime.datetime.now() - datetime.timedelta(days=365 * 2)).strftime("%Y-%m-%d")
three_year = (datetime.datetime.now() - datetime.timedelta(days=365 * 3)).strftime("%Y-%m-%d")

fund_list = ["519732"]
for fund in fund_list:
# now_data = get_fund_data(fund, per=49, sdate=now, edate=now)
week_data = get_fund_data(fund, per=49, sdate=week, edate=now)
two_day_data = get_fund_data(fund, per=49, sdate=two_day, edate=now)
three_day_data = get_fund_data(fund, per=49, sdate=three_day, edate=now)
month_data = get_fund_data(fund, per=49, sdate=month, edate=now)
season_data = get_fund_data(fund, per=49, sdate=season, edate=now)
half_year_data = get_fund_data(fund, per=49, sdate=half_year, edate=now)
year_data = get_fund_data(fund, per=49, sdate=year, edate=now)
two_year_data = get_fund_data(fund, per=49, sdate=two_year, edate=now)
three_year_data = get_fund_data(fund, per=49, sdate=three_year, edate=now)

report += """
""" +fund+ """:
2-单位净值: """ + str(round(two_day_data[u"单位净值"].astype('float').mean(), 4)) + """
2-累计净值:""" + str(round(two_day_data[u"累计净值"].astype('float').mean(), 4)) + """
3-单位净值:""" + str(round(three_day_data[u"单位净值"].astype('float').mean(), 4))+ """
3-累计净值:""" + str(round(three_day_data[u"累计净值"].astype('float').mean(), 4)) + """
周-单位净值:""" + str(round(week_data[u"单位净值"].astype('float').mean(), 4)) + """
周-累计净值:""" + str(round(week_data[u"累计净值"].astype('float').mean(), 4)) + """
月-单位净值:""" + str(round(month_data[u"单位净值"].astype('float').mean(), 4)) + """
月-累计净值:""" + str(round(month_data[u"累计净值"].astype('float').mean(), 4)) + """
季-单位净值:""" + str(round(season_data[u"单位净值"].astype('float').mean(), 4)) + """
季-累计净值:""" + str(round(season_data[u"累计净值"].astype('float').mean(), 4)) + """
半年-单位净值:""" + str(round(half_year_data[u"单位净值"].astype('float').mean(), 4)) + """
半年-累计净值:""" + str(round(half_year_data[u"累计净值"].astype('float').mean(), 4)) + """
年-单位净值:""" + str(round(year_data[u"单位净值"].astype('float').mean(), 4)) + """
年-累计净值:""" + str(round(year_data[u"累计净值"].astype('float').mean(), 4)) + """
2年-单位净值:""" + str(round(two_year_data[u"单位净值"].astype('float').mean(), 4)) + """
2年-累计净值:""" + str(round(two_year_data[u"累计净值"].astype('float').mean(), 4)) + """
3年-单位净值:""" + str(round(three_year_data[u"单位净值"].astype('float').mean(), 4)) + """
3年-累计净值:""" + str(round(three_year_data[u"累计净值"].astype('float').mean(), 4))+ """
"""
print report
send_email(report)