7.1 浏览器驱动
7.2 基本使用方法
-
示例一:
from selenium import webdriver # 实例化一个浏览器对象(传入浏览器的驱动程序) bro = webdriver.Chrome(executable_path='./chromedriver') """ browser = webdriver.Firefox() browser = webdriver.Edge() browser = webdriver.PhantomJS() browser = webdriver.Safari() """ # 浏览器发起请求:get bro.get('https://www.taobao.com/') # 获取当前页面的源码数据:page_source page_text = bro.page_source # 标签定位:find_element...,返回标签 search_input = bro.find_element_by_id('q') # 标签定位:find_elements...,返回标签列表 search_input = bro.find_elements_by_id('q') # 标签交互:send_keys、clear search_input.send_keys('iPhone') search_input.clear() # 执行一组js程序:execute_script bro.execute_script('window.scrollTo(0, document.body.scrollHeight)') # 点击搜索按钮 btn = bro.find_element_by_css_selector('.btn-search') btn.click() # 浏览器发起新的请求:get bro.get('https://www.baidu.com') # 回退 bro.back() # 前进 bro.forward() # 关闭浏览器 bro.quit()
7.3 动作链 + iframe处理
-
示例一:
from selenium import webdriver # 导入动作链对应的类 from selenium.webdriver import ActionChains bro = webdriver.Chrome(executable_path='./chromedriver') bro.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') # 定位的标签存在于iframe标签之中,必须切换浏览器标签定位的作用域(通过id查找iframe) bro.switch_to.frame('iframeResult') div = bro.find_element_by_id('draggable') # 动作链 action = ActionChains(bro) # 点击并长按指定的标签 action.click_and_hold(div) for i in range(5): # move_by_offset(x,y):x水平方向 y竖直方向;perform()立即执行动作链操作 action.move_by_offset(17, 0).perform() # 释放动作链 action.release() # 关闭浏览器 bro.quit()
-
示例二:模拟登录QQ空间
from selenium import webdriver from time import sleep bro = webdriver.Chrome(executable_path='./chromedriver') bro.get('https://qzone.qq.com/') # 切换到iframe作用域 bro.switch_to.frame('login_frame') # 切换到账号密码登录 a_tag = bro.find_element_by_id("switcher_plogin") a_tag.click() # 账号密码 userName_tag = bro.find_element_by_id('u') password_tag = bro.find_element_by_id('p') sleep(1) userName_tag.send_keys('*******') sleep(1) password_tag.send_keys('*******') sleep(1) btn = bro.find_element_by_id('login_button') btn.click() sleep(5) # 关闭浏览器 bro.quit()
7.4 无可视化页面 + 规避检测
-
示例一:
from selenium import webdriver from time import sleep # 无可视化界面 from selenium.webdriver.chrome.options import Options # 规避检测 from selenium.webdriver import ChromeOptions # 无可视化界面 chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') # 规避检测 option = ChromeOptions() option.add_experimental_option('excludeSwitches', ['enable-automation']) # 实现selenium:无可视化界面 + 规避检测 bro = webdriver.Chrome(executable_path='./chromedriver', chrome_options=chrome_options, options=option) bro.get('https://www.baidu.com') print(bro.page_source) bro.quit()
7.5 12306登录示例
-
示例一:
from selenium import webdriver import time from PIL import Image from selenium.webdriver import ActionChains bro = webdriver.Chrome(executable_path='./chromedriver') bro.get('https://kyfw.12306.cn/otn/login/init') time.sleep(1) # 找到验证码图片标签 code_img_ele = bro.find_element_by_xpath('//*[@id="loginForm"]/div/ul[2]/li[4]/div/div/div[3]/img') # 图片标签左上角坐标:{'x': 366, 'y': 274} location = code_img_ele.location # 图片标签尺寸:{'height': 190, 'width': 293} size = code_img_ele.size # 图片标签左上角和右下角坐标 rangle = ( int(location['x']), int(location['y']), int(location['x'] + size['width']), int(location['y'] + size['height']), ) # 将当前页面截图并保存 bro.save_screenshot('aa.png') i = Image.open('./aa.png') # 根据指定区域对图片进行裁剪 frame = i.crop(rangle) frame.save('./code.png') # 将验证码图片提交给超级鹰进行识别 chaojiying = Chaojiying_Client('账号', '密码', '软件ID') im = open('code.png', 'rb').read() result = chaojiying.PostPic(im, 9004)['pic_str'] # 要存储即将被点击的点的坐标:[[x1, y1], [x2, y2]] all_list = [] if '|' in result: list_1 = result.split('|') count_1 = len(list_1) for i in range(count_1): xy_list = [] x = int(list_1[i].split(',')[0]) y = int(list_1[i].split(',')[1]) xy_list.append(x) xy_list.append(y) all_list.append(xy_list) else: x = int(result.split(',')[0]) y = int(result.split(',')[1]) xy_list = [] xy_list.append(x) xy_list.append(y) all_list.append(xy_list) # 遍历列表,使用动作链对每一个列表元素对应的x,y指定的位置进行点击操作 for l in all_list: x = l[0] y = l[1] # 找到对应的标签,并移动到选定坐标,再执行点击动作 ActionChains(bro).move_to_element_with_offset(code_img_ele, x, y).click().perform() time.sleep(1) # 账号密码 bro.find_element_by_id('username').send_keys('www.zhangbowudi@qq.com') time.sleep(2) bro.find_element_by_id('password').send_keys('bobo_15027900535') time.sleep(2) bro.find_element_by_id('loginSub').click() time.sleep(30) # 关闭浏览器 bro.quit()
7.6 phantomJS模块
-
PhantomJS是一款无界面的浏览器,其自动化操作流程和上述操作谷歌浏览器是一致的。由于是无界面的,为了能够展示自动化操作流程,PhantomJS为用户提供了一个截屏的功能,使用save_screenshot函数实现。
from selenium import webdriver # phantomjs路径 bro = webdriver.PhantomJS('PhantomJS驱动路径') # 打开百度 bro.get('http://www.baidu.com/') # 截图 bro.save_screenshot(r'phantomjs\baidu.png')
7.7 Pyppeteer模块
-
示例一:
import asyncio from pyppeteer import launch from lxml import etree async def main(): # 新建一个 bro 浏览器对象:headless无头模式、args关闭提示条:”Chrome 正受到自动测试软件的控制” bro = await launch(headless=False, args=['--disable-infobars']) # 在浏览器中新建了一个选项卡 page = await bro.newPage() # page 对象调用了 goto 方法就相当于在浏览器中输入了这个 URL await page.goto('http://quotes.toscrape.com/js/') # 当前浏览器页面的源代码 page_text = await page.content() tree = etree.HTML(page_text) div_list = tree.xpath('//div[@class="quote"]') print(len(div_list)) await bro.close() asyncio.get_event_loop().run_until_complete(main())