本文分为四部分:

一、绕过对selenium的检测

直接上代码 ↓

import os
import time
import json
import shutil
import random
from uuid import uuid4
from _thread import start_new_thread
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Chrome配置(自行调整位置)
# chrome浏览器执行文件路径
chrome_execute = '"C:/Program Files/Google/Chrome/Application/chrome.exe"'
# chromedriver文件路径,注意和chrome浏览器版本一致
# 获取地址:<https://googlechromelabs.github.io/chrome-for-testing/>
chrome_driver = 'D:/Anaconda3/chromedriver.exe'
# chrome浏览器的用户配置主目录
chrome_user_path = "D:/selenum"
# chrome浏览器的用户配置模板
# 开一个新的chrome会有各种前置点击,何不先预设好一个打开就能用的配置模板
chrome_default_user = f"{chrome_user_path}/user_data"

# 执行chrome的组合命令
def start_chrome(port, new_user_data):
    # # 有复制配置模板到新目录的逻辑,按需使用
    # if os.path.exists(new_user_data):
    #     shutil.rmtree(new_user_data)
    # shutil.copytree(chrome_default_user, new_user_data)
    cmd = (f'{chrome_execute} \\
            --remote-debugging-port={port} \\
            --user-data-dir={new_user_data} \\
            --start-maximized'
    )
    os.system(cmd)

# 启动chrome
def make_chrome():
    driver_port = random.randint(50000, 60000)
    # new_user_data = f"{user_path}/{str(uuid4().hex)}"
    new_user_data = chrome_default_user
    start_new_thread(start_chrome, (driver_port, new_user_data))
    time.sleep(5)
    return driver_port, new_user_data

# 等待页面加载
def wait_for_page_load(driver, timeout=10):
    WebDriverWait(driver, timeout).until(
        EC.presence_of_element_located((By.TAG_NAME, "body"))
    )

# 主逻辑
def main():
    driver_port, new_user_data = make_chrome()
    
    chrome_options = Options()
    # 指定已经打开浏览器的地址及端口号
    chrome_options.add_experimental_option("debuggerAddress", f"127.0.0.1:{driver_port}")
    driver = webdriver.Chrome(service=Service(chrome_driver), options=chrome_options)
    driver.get("<https://www.baidu.com>")
    print(driver.page_source)
    wait_for_page_load(driver)
    
    driver.close()
    driver.quit()

if __name__ == "__main__":
    main()

二、获取请求数据

整体代码逻辑不变,主要新加解析请求数据函数parse_requests和变更主逻辑函数main

# 解析请求数据
def parse_requests(driver):
    # 获取performance日志
    for log in driver.get_log("performance"):
        log_data = json.loads(log["message"])["message"]
        if log_data["method"] == "Network.requestWillBeSent":
            request = log_data["params"]["request"]
            headers = request["headers"]
            url = request["url"]
            print(url)
            print(headers)

# 主逻辑
def main():
    driver_port, new_user_data = make_chrome()
    
    chrome_options = Options()
    # 指定已经打开浏览器的地址及端口号
    chrome_options.add_experimental_option("debuggerAddress", f"127.0.0.1:{driver_port}")
    # 开启性能日志
    chrome_options.set_capability("goog:loggingPrefs", {"performance": "ALL"})
    driver = webdriver.Chrome(service=Service(chrome_driver), options=chrome_options)
    # 启用 DevTools
    driver.execute_cdp_cmd('Network.enable', {})
    driver.get("<https://www.baidu.com>")
    wait_for_page_load(driver)
    # 解析请求数据
    parse_requests(driver)
    
    driver.close()
    driver.quit()

if __name__ == "__main__":
    main()

三、获取响应数据

整体代码逻辑不变,主要新加解析响应数据函数parse_responses和变更主逻辑函数main

# 解析响应数据
def parse_responses(driver):
    # 获取performance日志
    for log in driver.get_log("performance"):
        message = json.loads(log["message"])["message"]
        if message["method"] == "Network.responseReceived":
            response = message["params"]["response"]
            url = response["url"]
            if "<https://www.baidu.com/sugrec>" in url:
                request_id = message["params"]["requestId"]
                body = driver.execute_cdp_cmd(
                    "Network.getResponseBody",
                    {"requestId": request_id}
                )["body"]
                print(body)

# 主逻辑
def main():
    driver_port, new_user_data = make_chrome()
    
    chrome_options = Options()
    # 指定已经打开浏览器的地址及端口号
    chrome_options.add_experimental_option("debuggerAddress", f"127.0.0.1:{driver_port}")
    # 开启性能日志
    chrome_options.set_capability("goog:loggingPrefs", {"performance": "ALL"})
    driver = webdriver.Chrome(service=Service(chrome_driver), options=chrome_options)
    # 启用 DevTools
    driver.execute_cdp_cmd('Network.enable', {})
    driver.get("<https://www.baidu.com>")
    wait_for_page_load(driver)
    # 解析响应数据
    parse_responses(driver)
    
    driver.close()
    driver.quit()

if __name__ == "__main__":
    main()

四、完整代码

import os
import time
import json
import shutil
import random
from uuid import uuid4
from _thread import start_new_thread
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Chrome配置(自行调整位置)
# chrome浏览器执行文件路径
chrome_execute = '"C:/Program Files/Google/Chrome/Application/chrome.exe"'
# chromedriver文件路径,注意和chrome浏览器版本一致
# 获取地址:<https://googlechromelabs.github.io/chrome-for-testing/>
chrome_driver = 'D:/Anaconda3/chromedriver.exe'
# chrome浏览器的用户配置主目录
chrome_user_path = "D:/selenum"
# chrome浏览器的用户配置模板
# 开一个新的chrome会有各种前置点击,何不先预设好一个打开就能用的配置模板
chrome_default_user = f"{chrome_user_path}/user_data"

# 执行chrome的组合命令
def start_chrome(port, new_user_data):
    # # 有复制配置模板到新目录的逻辑,按需使用
    # if os.path.exists(new_user_data):
    #     shutil.rmtree(new_user_data)
    # shutil.copytree(chrome_default_user, new_user_data)
    cmd = (f'{chrome_execute} \\
            --remote-debugging-port={port} \\
            --user-data-dir={new_user_data} \\
            --start-maximized'
    )
    os.system(cmd)

# 启动chrome
def make_chrome():
    driver_port = random.randint(50000, 60000)
    # new_user_data = f"{user_path}/{str(uuid4().hex)}"
    new_user_data = chrome_default_user
    start_new_thread(start_chrome, (driver_port, new_user_data))
    time.sleep(5)
    return driver_port, new_user_data

# 等待页面加载
def wait_for_page_load(driver, timeout=10):
    WebDriverWait(driver, timeout).until(
        EC.presence_of_element_located((By.TAG_NAME, "body"))
    )

# 解析请求数据
def parse_requests(driver):
    # 获取performance日志
    for log in driver.get_log("performance"):
        log_data = json.loads(log["message"])["message"]
        if log_data["method"] == "Network.requestWillBeSent":
            request = log_data["params"]["request"]
            headers = request["headers"]
            url = request["url"]
            print(url)
            print(headers)

# 解析响应数据
def parse_responses(driver):
    # 获取performance日志
    for log in driver.get_log("performance"):
        message = json.loads(log["message"])["message"]
        if message["method"] == "Network.responseReceived":
            response = message["params"]["response"]
            url = response["url"]
            if "<https://www.baidu.com/sugrec>" in url:
                request_id = message["params"]["requestId"]
                body = driver.execute_cdp_cmd(
                    "Network.getResponseBody",
                    {"requestId": request_id}
                )["body"]
                print(body)

# 主逻辑
def main():
    driver_port, new_user_data = make_chrome()
    
    chrome_options = Options()
    # 指定已经打开浏览器的地址及端口号
    chrome_options.add_experimental_option("debuggerAddress", f"127.0.0.1:{driver_port}")
    # 开启性能日志
    chrome_options.set_capability("goog:loggingPrefs", {"performance": "ALL"})
    driver = webdriver.Chrome(service=Service(chrome_driver), options=chrome_options)
    # 启用 DevTools
    driver.execute_cdp_cmd('Network.enable', {})
    driver.get("<https://www.baidu.com>")
    wait_for_page_load(driver)
    # 解析请求数据
    parse_requests(driver)
    # 解析响应数据
    parse_responses(driver)
    
    driver.close()
    driver.quit()

if __name__ == "__main__":
    main()