#coding: utf8 ''' CDSN登录 ''' import re import requests head={ "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36", } def csdn_login(username, password): '''参数说明: username: csdn账号 password: csdn的密码 ''' url="https://passport.csdn.net/account/login" s=requests.session() r=s.get(url,headers=head) lt_execution_id=re.findall('name="lt" value="(.*?)".*\sname="execution" value="(.*?)"', r.text, re.S) payload={ "username":"%s" % username, "password":"%s" % password, "lt":lt_execution_id[0][0], "execution":lt_execution_id[0][1], "_eventId":"submit" } r2=s.post(url,headers=head,data=payload) return s
#!/usr/bin/env python #coding:utf8 #__author__ = "Huang Jinqiang<hzkeung@vip.qq.com" """ CSDN会议PPT下载 """ import os import re import sys import time import urllib import csdn reload(sys) sys.setdefaultencoding('utf8') csdn_login=csdn.csdn_login head = csdn.head def download(url, s, intodir): '''参数说明: url: 会议pdf下载地址 s: requests的session intodir: 下载的目标目录 ''' #创建目录 if not os.path.isdir(intodir): os.mkdir(intodir) #获取html def gethtml(url): rs=s.get("%s" % url,headers=head) result = rs.text return result html = gethtml(url) #获取html源码显示全部的sid号 sid_reg = r"(\d+),23,'http://meet.download.csdn.net/speech" sid_re = re.compile(sid_reg) sids = re.findall(sid_re, html) for sid in sids: result = gethtml("http://download.csdn.net/index.php/meeting/speechlist/?sid=%s" % sid) for res in eval(result): #获取文件名 _of = res['originfile'] #获取到的是unicode编码内的字符串, 如u'\uxxx'里的xxx of = eval("""u'%s'""" % _of) #把字符串还原为unicode编码 #获取下载链接 time.sleep(5) _msg = gethtml('http://download.csdn.net/index.php/meeting/do_download_speech/%s/%s' % (res['id'],res['mid'])) msg = eval(_msg) _pdfurl = msg.get('msg') pdfurl = re.sub(r'\\', '', _pdfurl) #下载文件 r = s.get("%s" % pdfurl, headers=head) try: with open("%s/%s" % (intodir, of), "wb") as code: print "donwloading %s" % of #保存 code.write(r.content) print "donwload %s status: ok" % of except Exception, e: print "donwload %s status: fail" % of print(e) if __name__ == "__main__": s = csdn_login('cdsn_user', 'csdn_password') #登录的session,参数用户名密码 download('http://download.csdn.net/meeting/meeting_detail/23', s, 'SDCC2016')
下载的文件 都是几k pdf文件也打不开