python selenium网页爬虫多线程并发执行偶尔会报错,请问是什么原因?
公司有个需求需要将动态html保存为pdf文件,准备用python+selenium实现。调用chromedriver的Page.printToPdf命令,在页面加载完成后获取打印的response,最后转为pdf保存。考虑到并发性,用多线程模拟测试发现偶尔会出现报错,单个执行又没问题,不知道啥原因
python 3.9.0
selenium 4.16.0
from selenium import webdriverfrom selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import staleness_of
from urllib.parse import urlparse
import json
import base64
import threading
import time
def test():
webdriver_service = Service(r"D:\work\chromedriver-win64\chromedriver.exe")
webdriver_options = Options()
webdriver_options.binary_location = r"D:\work\chrome-win64\chrome.exe"
webdriver_options.add_argument('--no-sandbox')
#webdriver_options.add_argument('--headless')
webdriver_options.add_argument('--disable-gpu')
webdriver_options.add_argument("--remote-debugging-port=9225")
webdriver_options.add_argument("--incognito")
#webdriver_options.page_load_strategy = 'eager'
#webdriver_options.add_argument('--disable-dev-shm-usage')
webdriver_prefs = {}
webdriver_options.experimental_options['prefs'] = webdriver_prefs
webdriver_prefs['profile.default_content_settings'] = {'images': 2}
driver = webdriver.Chrome(options=webdriver_options, service=webdriver_service)
print(driver.session_id)
driver.get("https://www.baidu.com")
driver.quit()
if __name__ == '__main__':
t1 = threading.Thread(target=test)
t2 = threading.Thread(target=test)
t3 = threading.Thread(target=test)
t1.start()
t2.start()
t3.start()
Exception in thread Thread-2:Traceback (most recent call last):
File "C:\Users\Lenovo\AppData\Local\Programs\Python\Python39\lib\threading.py", line 950, in _bootstrap_inner
self.run()
File "C:\Users\Lenovo\AppData\Local\Programs\Python\Python39\lib\threading.py", line 888, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\Lenovo\Desktop\canon\pythonProject\http\convert_html.py", line 104, in test
raise e
File "C:\Users\Lenovo\Desktop\canon\pythonProject\http\convert_html.py", line 102, in test
driver = webdriver.Chrome(options=webdriver_options, service=webdriver_service)
File "C:\Users\Lenovo\Desktop\canon\pythonProject\.venv\lib\site-packages\selenium\webdriver\chrome\webdriver.py", line 45, in __init__
super().__init__(
File "C:\Users\Lenovo\Desktop\canon\pythonProject\.venv\lib\site-packages\selenium\webdriver\chromium\webdriver.py", line 61, in __init__
super().__init__(command_executor=executor, options=options)
File "C:\Users\Lenovo\Desktop\canon\pythonProject\.venv\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 209, in __init__
self.start_session(capabilities)
File "C:\Users\Lenovo\Desktop\canon\pythonProject\.venv\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 293, in start_session
response = self.execute(Command.NEW_SESSION, caps)["value"]
File "C:\Users\Lenovo\Desktop\canon\pythonProject\.venv\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 348, in execute
self.error_handler.check_response(response)
File "C:\Users\Lenovo\Desktop\canon\pythonProject\.venv\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 229, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: disconnected: Unable to receive message from renderer
(failed to check if window was closed: disconnected: not connected to DevTools)
(Session info: chrome=120.0.6099.71)
Stacktrace:
GetHandleVerifier [0x00007FF6341E4D02+56194]
(No symbol) [0x00007FF6341504B2]
(No symbol) [0x00007FF633FF76AA]
(No symbol) [0x00007FF633FE0839]
(No symbol) [0x00007FF633FE06EB]
(No symbol) [0x00007FF633FDEE3D]
(No symbol) [0x00007FF633FDF603]
(No symbol) [0x00007FF633FDE026]
(No symbol) [0x00007FF633FEEC0F]
(No symbol) [0x00007FF633FE02A8]
(No symbol) [0x00007FF633FDEE3D]
(No symbol) [0x00007FF633FDF603]
(No symbol) [0x00007FF633FDE026]
(No symbol) [0x00007FF633FE8394]
(No symbol) [0x00007FF633FE02A8]
(No symbol) [0x00007FF633FDEE3D]
(No symbol) [0x00007FF633FDF603]
(No symbol) [0x00007FF633FDE026]
(No symbol) [0x00007FF633FE55B2]
(No symbol) [0x00007FF633FE02A8]
(No symbol) [0x00007FF633FDEE3D]
(No symbol) [0x00007FF633FDF603]
(No symbol) [0x00007FF633FDE026]
(No symbol) [0x00007FF633FD5EEA]
(No symbol) [0x00007FF633FDD62D]
(No symbol) [0x00007FF633FDD1DF]
(No symbol) [0x00007FF633FF9931]
(No symbol) [0x00007FF633FD040E]
(No symbol) [0x00007FF633FCFCAC]
(No symbol) [0x00007FF634070A1C]
(No symbol) [0x00007FF634065C23]
(No symbol) [0x00007FF634034A45]
(No symbol) [0x00007FF634035AD4]
GetHandleVerifier [0x00007FF63455D5BB+3695675]
GetHandleVerifier [0x00007FF6345B6197+4059159]
GetHandleVerifier [0x00007FF6345ADF63+4025827]
GetHandleVerifier [0x00007FF63427F029+687785]
(No symbol) [0x00007FF63415B508]
(No symbol) [0x00007FF634157564]
(No symbol) [0x00007FF6341576E9]
(No symbol) [0x00007FF634148094]
BaseThreadInitThunk [0x00007FF97B5E7C24+20]
RtlUserThreadStart [0x00007FF97C3CD4D1+33]
回答:
试了下冲突主要是这行
webdriver_options.add_argument("--remote-debugging-port=9225")
这里共用同一个端口出问题了吧,去掉这行或者把端口做参数传入不同的端口测试正常
以上是 python selenium网页爬虫多线程并发执行偶尔会报错,请问是什么原因? 的全部内容, 来源链接: utcz.com/p/939104.html