本文分为四部分:
直接上代码 ↓
import os
import time
import json
import shutil
import random
from uuid import uuid4
from _thread import start_new_thread
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# Chrome配置(自行调整位置)
# chrome浏览器执行文件路径
chrome_execute = '"C:/Program Files/Google/Chrome/Application/chrome.exe"'
# chromedriver文件路径,注意和chrome浏览器版本一致
# 获取地址:<https://googlechromelabs.github.io/chrome-for-testing/>
chrome_driver = 'D:/Anaconda3/chromedriver.exe'
# chrome浏览器的用户配置主目录
chrome_user_path = "D:/selenum"
# chrome浏览器的用户配置模板
# 开一个新的chrome会有各种前置点击,何不先预设好一个打开就能用的配置模板
chrome_default_user = f"{chrome_user_path}/user_data"
# 执行chrome的组合命令
def start_chrome(port, new_user_data):
# # 有复制配置模板到新目录的逻辑,按需使用
# if os.path.exists(new_user_data):
# shutil.rmtree(new_user_data)
# shutil.copytree(chrome_default_user, new_user_data)
cmd = (f'{chrome_execute} \\
--remote-debugging-port={port} \\
--user-data-dir={new_user_data} \\
--start-maximized'
)
os.system(cmd)
# 启动chrome
def make_chrome():
driver_port = random.randint(50000, 60000)
# new_user_data = f"{user_path}/{str(uuid4().hex)}"
new_user_data = chrome_default_user
start_new_thread(start_chrome, (driver_port, new_user_data))
time.sleep(5)
return driver_port, new_user_data
# 等待页面加载
def wait_for_page_load(driver, timeout=10):
WebDriverWait(driver, timeout).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
# 主逻辑
def main():
driver_port, new_user_data = make_chrome()
chrome_options = Options()
# 指定已经打开浏览器的地址及端口号
chrome_options.add_experimental_option("debuggerAddress", f"127.0.0.1:{driver_port}")
driver = webdriver.Chrome(service=Service(chrome_driver), options=chrome_options)
driver.get("<https://www.baidu.com>")
print(driver.page_source)
wait_for_page_load(driver)
driver.close()
driver.quit()
if __name__ == "__main__":
main()
整体代码逻辑不变,主要新加解析请求数据函数parse_requests和变更主逻辑函数main ↓
# 解析请求数据
def parse_requests(driver):
# 获取performance日志
for log in driver.get_log("performance"):
log_data = json.loads(log["message"])["message"]
if log_data["method"] == "Network.requestWillBeSent":
request = log_data["params"]["request"]
headers = request["headers"]
url = request["url"]
print(url)
print(headers)
# 主逻辑
def main():
driver_port, new_user_data = make_chrome()
chrome_options = Options()
# 指定已经打开浏览器的地址及端口号
chrome_options.add_experimental_option("debuggerAddress", f"127.0.0.1:{driver_port}")
# 开启性能日志
chrome_options.set_capability("goog:loggingPrefs", {"performance": "ALL"})
driver = webdriver.Chrome(service=Service(chrome_driver), options=chrome_options)
# 启用 DevTools
driver.execute_cdp_cmd('Network.enable', {})
driver.get("<https://www.baidu.com>")
wait_for_page_load(driver)
# 解析请求数据
parse_requests(driver)
driver.close()
driver.quit()
if __name__ == "__main__":
main()
整体代码逻辑不变,主要新加解析响应数据函数parse_responses和变更主逻辑函数main ↓
# 解析响应数据
def parse_responses(driver):
# 获取performance日志
for log in driver.get_log("performance"):
message = json.loads(log["message"])["message"]
if message["method"] == "Network.responseReceived":
response = message["params"]["response"]
url = response["url"]
if "<https://www.baidu.com/sugrec>" in url:
request_id = message["params"]["requestId"]
body = driver.execute_cdp_cmd(
"Network.getResponseBody",
{"requestId": request_id}
)["body"]
print(body)
# 主逻辑
def main():
driver_port, new_user_data = make_chrome()
chrome_options = Options()
# 指定已经打开浏览器的地址及端口号
chrome_options.add_experimental_option("debuggerAddress", f"127.0.0.1:{driver_port}")
# 开启性能日志
chrome_options.set_capability("goog:loggingPrefs", {"performance": "ALL"})
driver = webdriver.Chrome(service=Service(chrome_driver), options=chrome_options)
# 启用 DevTools
driver.execute_cdp_cmd('Network.enable', {})
driver.get("<https://www.baidu.com>")
wait_for_page_load(driver)
# 解析响应数据
parse_responses(driver)
driver.close()
driver.quit()
if __name__ == "__main__":
main()
import os
import time
import json
import shutil
import random
from uuid import uuid4
from _thread import start_new_thread
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# Chrome配置(自行调整位置)
# chrome浏览器执行文件路径
chrome_execute = '"C:/Program Files/Google/Chrome/Application/chrome.exe"'
# chromedriver文件路径,注意和chrome浏览器版本一致
# 获取地址:<https://googlechromelabs.github.io/chrome-for-testing/>
chrome_driver = 'D:/Anaconda3/chromedriver.exe'
# chrome浏览器的用户配置主目录
chrome_user_path = "D:/selenum"
# chrome浏览器的用户配置模板
# 开一个新的chrome会有各种前置点击,何不先预设好一个打开就能用的配置模板
chrome_default_user = f"{chrome_user_path}/user_data"
# 执行chrome的组合命令
def start_chrome(port, new_user_data):
# # 有复制配置模板到新目录的逻辑,按需使用
# if os.path.exists(new_user_data):
# shutil.rmtree(new_user_data)
# shutil.copytree(chrome_default_user, new_user_data)
cmd = (f'{chrome_execute} \\
--remote-debugging-port={port} \\
--user-data-dir={new_user_data} \\
--start-maximized'
)
os.system(cmd)
# 启动chrome
def make_chrome():
driver_port = random.randint(50000, 60000)
# new_user_data = f"{user_path}/{str(uuid4().hex)}"
new_user_data = chrome_default_user
start_new_thread(start_chrome, (driver_port, new_user_data))
time.sleep(5)
return driver_port, new_user_data
# 等待页面加载
def wait_for_page_load(driver, timeout=10):
WebDriverWait(driver, timeout).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
# 解析请求数据
def parse_requests(driver):
# 获取performance日志
for log in driver.get_log("performance"):
log_data = json.loads(log["message"])["message"]
if log_data["method"] == "Network.requestWillBeSent":
request = log_data["params"]["request"]
headers = request["headers"]
url = request["url"]
print(url)
print(headers)
# 解析响应数据
def parse_responses(driver):
# 获取performance日志
for log in driver.get_log("performance"):
message = json.loads(log["message"])["message"]
if message["method"] == "Network.responseReceived":
response = message["params"]["response"]
url = response["url"]
if "<https://www.baidu.com/sugrec>" in url:
request_id = message["params"]["requestId"]
body = driver.execute_cdp_cmd(
"Network.getResponseBody",
{"requestId": request_id}
)["body"]
print(body)
# 主逻辑
def main():
driver_port, new_user_data = make_chrome()
chrome_options = Options()
# 指定已经打开浏览器的地址及端口号
chrome_options.add_experimental_option("debuggerAddress", f"127.0.0.1:{driver_port}")
# 开启性能日志
chrome_options.set_capability("goog:loggingPrefs", {"performance": "ALL"})
driver = webdriver.Chrome(service=Service(chrome_driver), options=chrome_options)
# 启用 DevTools
driver.execute_cdp_cmd('Network.enable', {})
driver.get("<https://www.baidu.com>")
wait_for_page_load(driver)
# 解析请求数据
parse_requests(driver)
# 解析响应数据
parse_responses(driver)
driver.close()
driver.quit()
if __name__ == "__main__":
main()