七、selenium模块


返回

7.1 浏览器驱动

7.2 基本使用方法

  • 示例一:

    from selenium import webdriver
    
    # 实例化一个浏览器对象(传入浏览器的驱动程序)
    bro = webdriver.Chrome(executable_path='./chromedriver')
    """
    browser = webdriver.Firefox()
    browser = webdriver.Edge()
    browser = webdriver.PhantomJS()
    browser = webdriver.Safari()
    """
    
    # 浏览器发起请求:get
    bro.get('https://www.taobao.com/')
    
    # 获取当前页面的源码数据:page_source
    page_text = bro.page_source
    
    # 标签定位:find_element...,返回标签
    search_input = bro.find_element_by_id('q')
    # 标签定位:find_elements...,返回标签列表
    search_input = bro.find_elements_by_id('q')
    
    # 标签交互:send_keys、clear
    search_input.send_keys('iPhone')
    search_input.clear()
    
    # 执行一组js程序:execute_script
    bro.execute_script('window.scrollTo(0, document.body.scrollHeight)')
    
    # 点击搜索按钮
    btn = bro.find_element_by_css_selector('.btn-search')
    btn.click()
    
    # 浏览器发起新的请求:get
    bro.get('https://www.baidu.com')
    
    # 回退
    bro.back()
    
    # 前进
    bro.forward()
    
    # 关闭浏览器
    bro.quit()
    
    

7.3 动作链 + iframe处理

  • 示例一:

    from selenium import webdriver
    
    # 导入动作链对应的类
    from selenium.webdriver import ActionChains
    
    bro = webdriver.Chrome(executable_path='./chromedriver')
    bro.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
    
    # 定位的标签存在于iframe标签之中,必须切换浏览器标签定位的作用域(通过id查找iframe)
    bro.switch_to.frame('iframeResult')  
    div = bro.find_element_by_id('draggable')
    
    # 动作链
    action = ActionChains(bro)
    
    # 点击并长按指定的标签
    action.click_and_hold(div)
    
    for i in range(5):
        # move_by_offset(x,y):x水平方向 y竖直方向;perform()立即执行动作链操作
        action.move_by_offset(17, 0).perform()
    
    # 释放动作链
    action.release()
    
    # 关闭浏览器
    bro.quit()
    
    
  • 示例二:模拟登录QQ空间

    from selenium import webdriver
    from time import sleep
    
    bro = webdriver.Chrome(executable_path='./chromedriver')
    bro.get('https://qzone.qq.com/')
    
    # 切换到iframe作用域
    bro.switch_to.frame('login_frame')
    
    # 切换到账号密码登录
    a_tag = bro.find_element_by_id("switcher_plogin")
    a_tag.click()
    
    # 账号密码
    userName_tag = bro.find_element_by_id('u')
    password_tag = bro.find_element_by_id('p')
    sleep(1)
    userName_tag.send_keys('*******')
    sleep(1)
    password_tag.send_keys('*******')
    sleep(1)
    btn = bro.find_element_by_id('login_button')
    btn.click()
    
    sleep(5)
    
    # 关闭浏览器
    bro.quit()
    
    

7.4 无可视化页面 + 规避检测

  • 示例一:

    from selenium import webdriver
    from time import sleep
    # 无可视化界面
    from selenium.webdriver.chrome.options import Options
    # 规避检测
    from selenium.webdriver import ChromeOptions
    
    # 无可视化界面
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    
    # 规避检测
    option = ChromeOptions()
    option.add_experimental_option('excludeSwitches', ['enable-automation'])
    
    # 实现selenium:无可视化界面 + 规避检测
    bro = webdriver.Chrome(executable_path='./chromedriver', chrome_options=chrome_options, options=option)
    
    bro.get('https://www.baidu.com')
    
    print(bro.page_source)
    
    bro.quit()
    
    

7.5 12306登录示例

  • 示例一:

    from selenium import webdriver
    import time
    from PIL import Image
    from selenium.webdriver import ActionChains
    
    bro = webdriver.Chrome(executable_path='./chromedriver')
    bro.get('https://kyfw.12306.cn/otn/login/init')
    time.sleep(1)
    
    # 找到验证码图片标签
    code_img_ele = bro.find_element_by_xpath('//*[@id="loginForm"]/div/ul[2]/li[4]/div/div/div[3]/img')
    # 图片标签左上角坐标:{'x': 366, 'y': 274}
    location = code_img_ele.location
    # 图片标签尺寸:{'height': 190, 'width': 293}
    size = code_img_ele.size
    # 图片标签左上角和右下角坐标
    rangle = (
        int(location['x']), int(location['y']),
        int(location['x'] + size['width']), int(location['y'] + size['height']),
    )
    
    # 将当前页面截图并保存
    bro.save_screenshot('aa.png')
    i = Image.open('./aa.png')
    # 根据指定区域对图片进行裁剪
    frame = i.crop(rangle)
    frame.save('./code.png')
    
    # 将验证码图片提交给超级鹰进行识别
    chaojiying = Chaojiying_Client('账号', '密码', '软件ID')  
    im = open('code.png', 'rb').read() 
    result = chaojiying.PostPic(im, 9004)['pic_str']
    
    # 要存储即将被点击的点的坐标:[[x1, y1], [x2, y2]]
    all_list = []  
    if '|' in result:
        list_1 = result.split('|')
        count_1 = len(list_1)
        for i in range(count_1):
            xy_list = []
            x = int(list_1[i].split(',')[0])
            y = int(list_1[i].split(',')[1])
            xy_list.append(x)
            xy_list.append(y)
            all_list.append(xy_list)
    else:
        x = int(result.split(',')[0])
        y = int(result.split(',')[1])
        xy_list = []
        xy_list.append(x)
        xy_list.append(y)
        all_list.append(xy_list)
        
    # 遍历列表,使用动作链对每一个列表元素对应的x,y指定的位置进行点击操作
    for l in all_list:
        x = l[0]
        y = l[1]
        # 找到对应的标签,并移动到选定坐标,再执行点击动作
        ActionChains(bro).move_to_element_with_offset(code_img_ele, x, y).click().perform()
        time.sleep(1)
    
    # 账号密码
    bro.find_element_by_id('username').send_keys('www.zhangbowudi@qq.com')
    time.sleep(2)
    bro.find_element_by_id('password').send_keys('bobo_15027900535')
    time.sleep(2)
    bro.find_element_by_id('loginSub').click()
    time.sleep(30)
    
    # 关闭浏览器
    bro.quit()
    
    

7.6 phantomJS模块

  • PhantomJS是一款无界面的浏览器,其自动化操作流程和上述操作谷歌浏览器是一致的。由于是无界面的,为了能够展示自动化操作流程,PhantomJS为用户提供了一个截屏的功能,使用save_screenshot函数实现。

    from selenium import webdriver
    
    # phantomjs路径
    bro = webdriver.PhantomJS('PhantomJS驱动路径')
    # 打开百度
    bro.get('http://www.baidu.com/')
    # 截图
    bro.save_screenshot(r'phantomjs\baidu.png')
    
    

7.7 Pyppeteer模块

  • 示例一:

    import asyncio
    from pyppeteer import launch
    from lxml import etree
    
    async def main():
        # 新建一个 bro 浏览器对象:headless无头模式、args关闭提示条:”Chrome 正受到自动测试软件的控制”
        bro = await launch(headless=False, args=['--disable-infobars'])
        # 在浏览器中新建了一个选项卡
        page = await bro.newPage()
        # page 对象调用了 goto 方法就相当于在浏览器中输入了这个 URL
        await page.goto('http://quotes.toscrape.com/js/')
        # 当前浏览器页面的源代码
        page_text = await page.content()
        tree = etree.HTML(page_text)
        div_list = tree.xpath('//div[@class="quote"]')
        print(len(div_list))
        await bro.close()
    asyncio.get_event_loop().run_until_complete(main())
    

7.8 示例

返回