Seleium使用
# ASC爬取功耗代码
from selenium import webdriver
import time
while(True):
chrom='D:\\Desktop\\chromedriver_win32\\chromedriver.exe'
brow=webdriver.Chrome(executable_path = chrom)
#下载浏览器驱动,并把路径填入webdriver的构造方法
brow.get("<http://10.20.19.119:8080/ASC21/page/mainmain1.html>")
#webdriver对象获取要爬取的链接
from pyquery import PyQuery as pq
#然后用pyquery解析网页源码,由于该程序需要持续爬取,间隔设定为5s
p=pq(brow.page_source)
time.sleep(5)
brow.close()
#写入文件
with open("D:\\Desktop\\cost.txt",'a+',encoding='utf-8') as f:
for i in p('div.school-header').items():
res=i('div.school-name > p').text()+" " +\\
i.siblings('div.school-footer').find('div.school-power > div > h3').text()+time.ctime()+"\\n"
f.write(res)
f.write("*****************************************\\n")
beautifulsoup
pyquery
非常抱歉,全站内容审核中...
python pyquery 基本用法
seleium