csdn.py
#coding: utf8
'''
CDSN登录
'''
import re
import requests
head={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36",
}
def csdn_login(username, password):
'''参数说明:
username: csdn账号
password: csdn的密码
'''
url="https://passport.csdn.net/account/login"
s=requests.session()
r=s.get(url,headers=head)
lt_execution_id=re.findall('name="lt" value="(.*?)".*\sname="execution" value="(.*?)"', r.text, re.S)
payload={
"username":"%s" % username,
"password":"%s" % password,
"lt":lt_execution_id[0][0],
"execution":lt_execution_id[0][1],
"_eventId":"submit"
}
r2=s.post(url,headers=head,data=payload)
return s
dl_csdn_meeting_pdf.py
#!/usr/bin/env python
#coding:utf8
#__author__ = "Huang Jinqiang<hzkeung@vip.qq.com"
"""
CSDN会议PPT下载
"""
import os
import re
import sys
import time
import urllib
import csdn
reload(sys)
sys.setdefaultencoding('utf8')
csdn_login=csdn.csdn_login
head = csdn.head
def download(url, s, intodir):
'''参数说明:
url: 会议pdf下载地址
s: requests的session
intodir: 下载的目标目录
'''
#创建目录
if not os.path.isdir(intodir):
os.mkdir(intodir)
#获取html
def gethtml(url):
rs=s.get("%s" % url,headers=head)
result = rs.text
return result
html = gethtml(url)
#获取html源码显示全部的sid号
sid_reg = r"(\d+),23,'http://meet.download.csdn.net/speech"
sid_re = re.compile(sid_reg)
sids = re.findall(sid_re, html)
for sid in sids:
result = gethtml("http://download.csdn.net/index.php/meeting/speechlist/?sid=%s" % sid)
for res in eval(result):
#获取文件名
_of = res['originfile'] #获取到的是unicode编码内的字符串, 如u'\uxxx'里的xxx
of = eval("""u'%s'""" % _of) #把字符串还原为unicode编码
#获取下载链接
time.sleep(5)
_msg = gethtml('http://download.csdn.net/index.php/meeting/do_download_speech/%s/%s' % (res['id'],res['mid']))
msg = eval(_msg)
_pdfurl = msg.get('msg')
pdfurl = re.sub(r'\\', '', _pdfurl)
#下载文件
r = s.get("%s" % pdfurl, headers=head)
try:
with open("%s/%s" % (intodir, of), "wb") as code:
print "donwloading %s" % of
#保存
code.write(r.content)
print "donwload %s status: ok" % of
except Exception, e:
print "donwload %s status: fail" % of
print(e)
if __name__ == "__main__":
s = csdn_login('cdsn_user', 'csdn_password') #登录的session,参数用户名密码
download('http://download.csdn.net/meeting/meeting_detail/23', s, 'SDCC2016')
脚本适用于下载csdn会议的pdf文档
下载的文件 都是几k pdf文件也打不开
你看下自己的csdn账号是否可以正常登录使用,我发现要进行手机号码验证,否则无法正常使用