Skip to content

Commit 3513ee4

Browse files
committed
fix: added test results and handled WebDriverException
1 parent 7f11954 commit 3513ee4

File tree

208 files changed

+3005
-180
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

208 files changed

+3005
-180
lines changed

benchmark.py

+96-57
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,11 @@
88
from selenium.webdriver.chrome.options import Options
99
from selenium.webdriver.chrome.service import Service
1010
from webdriver_manager.chrome import ChromeDriverManager
11+
from selenium.common.exceptions import WebDriverException
1112

1213
load_dotenv()
1314
api_key = os.getenv('OPENAI_API_KEY')
1415

15-
# Hard-coded coordinates to open web-wand side panel
16-
extensions_pos = (1060, 110)
17-
web_wand_pos = (900, 280)
18-
1916
# Place to store task execution results
2017
results_dir = 'results'
2118
os.makedirs(results_dir, exist_ok=True)
@@ -26,6 +23,8 @@ def setup_driver():
2623
chrome_options.add_argument("--load-extension=./dist")
2724
service = Service(ChromeDriverManager().install())
2825
driver = webdriver.Chrome(service=service, options=chrome_options)
26+
# Set script timeout to 240 seconds
27+
driver.set_script_timeout(240)
2928
return driver
3029

3130
def dispatch_event(driver, event_name, event):
@@ -35,57 +34,84 @@ def dispatch_event(driver, event_name, event):
3534
"""
3635
driver.execute_script(script)
3736

38-
def add_task_listener(driver, task_id):
39-
# Async script to add event listeners for taskStatus, taskHistory, and screenshot events
40-
script = """
37+
def add_task_listener(driver, task_id, max_retries=3):
38+
print('add_task_listener', task_id)
39+
"""
40+
Add event listeners for task history and screenshot events. Both events include task status.
41+
Then process those events as they are captured.
42+
"""
43+
44+
script = f"""
4145
var callback = arguments[0];
42-
var keepListening = true;
43-
var historyData = null;
44-
45-
var statusListener = function (e) {
46-
if (e.detail.status !== 'running' && e.detail.status !== 'idle') {
47-
keepListening = false;
48-
document.removeEventListener('TaskStatusUpdate', statusListener);
49-
document.removeEventListener('ScreenshotUpdate', screenshotListener);
50-
document.removeEventListener('TaskHistoryUpdate', historyListener);
51-
callback({type: 'status', data: 'stopped', history: historyData});
52-
}
53-
};
54-
55-
var screenshotListener = function (e) {
56-
callback({type: 'screenshot', data: e.detail.imgData});
57-
};
58-
59-
var historyListener = function (e) {
60-
historyData = e.detail.history;
61-
};
62-
63-
document.addEventListener('TaskStatusUpdate', statusListener);
64-
document.addEventListener('ScreenshotUpdate', screenshotListener);
65-
document.addEventListener('TaskHistoryUpdate', historyListener);
66-
67-
// To keep the async script alive
68-
var checkInterval = setInterval(function () {
69-
if (!keepListening) {
70-
clearInterval(checkInterval);
71-
}
72-
}, 1000);
46+
var eventListener = function (e) {{
47+
if (e.detail.type == 'history') {{
48+
console.log("event listener received history event");
49+
if (e.detail.status === 'success' || e.detail.status === 'error') {{
50+
callback({{status: e.detail.status, type: 'history', data: e.detail.data}});
51+
document.removeEventListener('TaskUpdate', eventListener);
52+
console.log("event listener removed");
53+
}}
54+
// Does not do anything when the status is 'running' or 'idle'.
55+
// The status 'interrupted' will never be triggered automatically.
56+
}} else if (e.detail.type == 'screenshot') {{
57+
console.log("event listener received screenshot event");
58+
callback({{status: e.detail.status, type: 'screenshot', data: e.detail.data}});
59+
document.removeEventListener('TaskUpdate', eventListener);
60+
console.log("event listener removed");
61+
}} else {{
62+
throw new Error("Invalid event type received: " + e.detail.type);
63+
}}
64+
}};
65+
66+
document.addEventListener('TaskUpdate', eventListener);
67+
console.log("added event listener");
7368
"""
7469

75-
try:
76-
while True:
77-
event_data = driver.execute_async_script(script)
78-
if event_data:
79-
if event_data['type'] == 'screenshot':
80-
write_screenshots(task_id, event_data['data'])
81-
elif event_data['type'] == 'status' and event_data['data'] == 'stopped':
82-
if event_data.get('history'):
83-
write_history(task_id, event_data['history'])
84-
break
85-
except Exception as e:
86-
print(f"Error while listening for updates: {e}")
70+
completed = {'status': None}
71+
attempts = 0
72+
73+
def handle_event(event_data):
74+
nonlocal attempts
75+
if not event_data:
76+
print("no event data")
77+
return
78+
if event_data['type'] == 'history':
79+
# Record history when task stops
80+
completed['status'] = event_data['status']
81+
write_history(task_id, event_data['data'])
82+
return
83+
if event_data['type'] == 'screenshot':
84+
write_screenshots(task_id, event_data['data'])
85+
# Task is still running. Continue to listen for events
86+
handle_event(driver.execute_async_script(script))
87+
else:
88+
raise ValueError(f"Unhandled event data type: {event_data['type']}")
89+
attempts = 0
90+
print("reset attempts to zero")
91+
92+
while attempts < max_retries:
93+
try:
94+
handle_event(driver.execute_async_script(script))
95+
break
96+
except WebDriverException as e:
97+
if "javascript error: document unloaded while waiting for result" in str(e):
98+
print(f"Document unloaded error: {e}")
99+
attempts += 1
100+
print(f"Attempt {attempts}: Document unloaded error. Retrying...")
101+
if attempts == max_retries:
102+
print("Maximum retry attempts reached. Cannot recover from document unloaded error.")
103+
else:
104+
print(f"Other WebDriver error: {e}")
105+
break
106+
except Exception as e:
107+
print(f"Error while listening for updates: {e}")
108+
break
109+
110+
print("completed['status']", completed['status'])
111+
return completed['status']
87112

88113
def write_history(task_id, task_history):
114+
print('write_history', task_id)
89115
task_dir = os.path.join(results_dir, f"test{task_id}")
90116
os.makedirs(task_dir, exist_ok=True)
91117
file_path = os.path.join(task_dir, 'interact_messages.json')
@@ -94,21 +120,33 @@ def write_history(task_id, task_history):
94120
json.dump(task_history, file, indent=4)
95121

96122
def write_screenshots(task_id, image_data):
123+
print('write_screenshots', task_id)
97124
image_bytes = base64.b64decode(image_data)
98-
99125
task_dir = os.path.join(results_dir, f"test{task_id}")
100126
os.makedirs(task_dir, exist_ok=True)
101127
timestamp = int(time.time())
102128
file_path = os.path.join(task_dir, f'screenshot_{timestamp}.png')
103-
104129
with open(file_path, 'wb') as file:
105130
file.write(image_bytes)
106131

107132
def run_webwand_task(driver, task_id, task_description):
133+
print('run_webwand_task', task_id, task_description)
108134
dispatch_event(driver, 'SetAPIKey', {"value": api_key})
109135
dispatch_event(driver, 'SetTask', {"value": task_description})
110136
dispatch_event(driver, 'RunTask', {})
111-
add_task_listener(driver, task_id)
137+
task_status = add_task_listener(driver, task_id)
138+
return task_status
139+
140+
def click_extensions_icon(driver):
141+
# Simulate click to open side panel
142+
window_position = driver.get_window_rect()
143+
top = window_position['y']
144+
right = window_position['x'] + window_position['width']
145+
# click Extensions icon
146+
pyautogui.click(right - 150, top + 50)
147+
148+
# click webwand
149+
pyautogui.click(right - 300, top + 210)
112150

113151
def main():
114152
driver = setup_driver()
@@ -121,12 +159,13 @@ def main():
121159
driver.get(task['web'])
122160

123161
if initial_load:
124-
# Simulate click to open side panel
125-
pyautogui.click(extensions_pos)
126-
pyautogui.click(web_wand_pos)
162+
click_extensions_icon(driver)
127163
initial_load = False
128164

129-
run_webwand_task(driver, task_id, task['ques'])
165+
task_status = run_webwand_task(driver, task_id, task['ques'])
166+
while task_status not in ['success', 'error']:
167+
print("wait task_status", task_status)
168+
time.sleep(3) # Wait for 3 seconds till the current task completes
130169
driver.quit()
131170

132171
if __name__ == "__main__":

0 commit comments

Comments
 (0)