使用需要先运行mitm脚本,然后运行selenium自动点击页面
selenium 负责点击链接, 需要使用mitm代理
class ContrlFlow():def __init__(self):self.driver: Optional[webdriver.Chrome] = self.gen_driver()def gen_driver(self) -> webdriver.Chrome:options = webdriver.ChromeOptions()# 保存用户配置user_path = gen_source_path() / 'temp' / 'webdriver' / 'chromedriver' / 'user_data'user_path.mkdir(exist_ok=True, parents=True)print(user_path.absolute())options.add_argument(rf"user-data-dir={user_path.absolute()}")# 设置代理 重要 使用代理后, mitm才能查看请求与响应options.add_argument('--proxy-server=http://{}:{}'.format('127.0.0.1', 8080))# 伪装头user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15"options.add_argument('--user-agent=%s' % user_agent)options.add_argument("disable-blink-features=AutomationControlled")# 生成浏览器头driver = webdriver.Chrome(options=options)return driverdef crawl(self):self.driver_flow_market()def driver_flow_market(self):"""获取页面"""self.driver_get("https://xxxxx")sleep(5)
mitm代理查看请求与响应数据
from mitmproxy.http import HTTPFlowclass Addon:"""mitmproxy 监听模组"""def request(self, flow: HTTPFlow):request = flow.request # 获取请求对象print("request data = ", request.data)def response(self, flow: HTTPFlow):if flow.response.status_code != 200:returnresponse = flow.response # 获取响应对象print(f"response content = {response.content}")from mitmproxy.http import HTTPFlow
import json
import pandas as pd
from diskcache import Cache
from mitmproxy.tools import main
from pathlib import Path
from datetime import datetimeclass AddonMuji(Addon):def __init__(self, ):...def request(self, flow: HTTPFlow):"""request的函数名称是固定的"""...# request = flow.request # 获取请求对象# print("request data = ", request.data)def response(self, flow: HTTPFlow):""":param flow::return:"""if flow.response.status_code != 200:returnurl = flow.request.urlif url == 'https://xxxxx':self.proc_searchOffer_v2(flow=flow)def proc_searchOffer_v2(self, flow: HTTPFlow = None):"""获取报盘价格"""data = self.flow_to_json(flow=flow, type='response')data = data['result']['list']data = pd.json_normalize(data)def flow_to_json(self, flow: HTTPFlow = None, type='request'):""""""if type == 'request':try:data = flow.request.json()except json.decoder.JSONDecodeError as e:print(f'解析json报错, flow.request为 ->{flow.request.text}')data = Noneelse:try:data = flow.response.json()except json.decoder.JSONDecodeError as e:print(f'解析json报错, flow.response为 ->{flow.response.text}')data = Nonereturn dataaddons = [AddonMuji()
]if __name__ == '__main__':# 运行此脚本, 打开代理后运行 selenium自动化脚本main.mitmdump(['-s', __file__, '--listen-host', '127.0.0.1', '-p', '8080', '--set', 'block_global=false'])