資源簡(jiǎn)介
實(shí)現(xiàn)對(duì)正方教務(wù)系統(tǒng)成績(jī)的爬取。
實(shí)現(xiàn)對(duì)正方教務(wù)系統(tǒng)成績(jī)的爬取。
代碼片段和文件信息
#?-*-?coding:gb2312?-*-
import?urllib?urllib2?cookielib
import?re?os?string
from?bs4?import?BeautifulSoup
#?from?PIL?import?Image
import?sys
reload(sys)
sys.setdefaultencoding(‘gb2312‘)
baseUrl?=?‘http://222.24.19.201/‘
codeUrl?=?‘CheckCode.aspx‘
loginUrl?=?‘default2.aspx‘
scoreUrl?=?‘xscjcx.aspx‘
def?downImg(url?name):
????‘‘‘
????下載驗(yàn)證碼
????:param?url:驗(yàn)證碼獲取接口
????:param?name:?驗(yàn)證碼存儲(chǔ)文件名
????:return:
????‘‘‘
????try:
????????req?=?urllib2.Request(url)
????????req?=?urllib2.urlopen(req)
????????content?=?req.read()
????????file?=?open(os.getcwd()?+?‘/‘?+?name?‘w+b‘)
????????file.write(content)
????????file.close()
????except?Exception?e:
????????print?‘Error?:‘?e
def?setCookie():
????‘‘‘
????創(chuàng)建cookie
????:return:cookie句柄
????‘‘‘
????cookie?=?cookielib.LWPCookieJar()
????opener?=?urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
????urllib2.install_opener(opener)
????opener.open(baseUrl)
????return?cookie
def?login(username?password?cookie):
????‘‘‘
????登錄教務(wù)系統(tǒng)
????:param?username:用戶(hù)名
????:param?password:密碼
????:param?cookie:setcookie的cookie句柄
????:return:用戶(hù)名以及session_id
????‘‘‘
????request?=?urllib2.Request(baseUrl)
????text?=?urllib2.urlopen(request).read()
????downImg(baseUrl?+?codeUrl?‘code.png‘)
????#?image?=?Image.open(‘code.png‘)
????#?print?image_to_string(image)
????code?=?raw_input(‘請(qǐng)輸入驗(yàn)證碼:‘)
????soup?=?BeautifulSoup(text?‘html.parser‘)
????_VIEWSTATE?=?soup.find_all(‘input‘)[0].get(‘value‘)
????headers?=?{
????????‘User-Agent‘?:?‘Mozilla/5.0?(Windows?NT?6.1;?WOW64;?rv:14.0)?Gecko/20100101?Firefox/14.0.1‘
????????‘Referer‘????:?baseUrl
????}
????postData?=?{
????????‘__VIEWSTATE‘?:?_VIEWSTATE
????????‘txtUserName‘?:?username
????????‘TextBox2‘?:?password
????????‘txtSecretCode‘?:?code
????????‘RadioButtonList1‘?:?‘學(xué)生‘
????????‘Button1‘?:?‘‘
????????‘lbLanguage‘?:?‘‘
????????‘hidPdrs‘?:?‘‘
????????‘hidsc‘?:?‘‘
????}
????postData?=?urllib.urlencode(postData)
????request?=?urllib2.Request(baseUrl?+?loginUrl?postData?headers)
????response?=?urllib2.urlopen(request)
????text?=?response.read()
????soup?=?BeautifulSoup(text?‘html.parser‘)
????if?re.search(‘驗(yàn)證碼不正確‘?text):
????????print?‘驗(yàn)證碼錯(cuò)誤‘
????????exit(1)
????elif?re.search(‘‘?text):
????????result?=?{}
????????name?=?soup.find(id?=?‘xhxm‘).string
????????name?=?name.decode(‘gb2312‘).encode(‘gb2312‘)
????????name?=?string.replace(name?‘同學(xué)‘?‘‘)
????????result[‘name‘]?=?name
????????session_id?=?cookie._cookies[‘222.24.19.201‘][‘/‘][‘ASP.NET_SessionId‘].value
????????result[‘session_id‘]?=?session_id
????????return?result
????else:
????????print?‘登錄失敗‘
????????exit(1)
def?getScore(username?name?session_id?ddlXN?ddlXQ):
評(píng)論
共有 條評(píng)論