diff --git a/.gitignore b/.gitignore index f01e1e1..72cfec2 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,9 @@ utils/reload/*.js utils/reload/injections/*.js public/manifest.json + +# benchmarking +/results +tasks_test.jsonl +webwand_test_log.txt +tasks_status.txt \ No newline at end of file diff --git a/benchmark.py b/benchmark.py new file mode 100644 index 0000000..db7becc --- /dev/null +++ b/benchmark.py @@ -0,0 +1,205 @@ +""" +To run benchmark testing, copy and paste corresponding test dataset from https://github.com/MinorJerry/WebVoyager/blob/main/data/WebVoyager_data.jsonl +to the tasks_test.jsonl file. +Then in terminal, first run "pnpm dev" to start the webwand server. +Then run "python benchmark.py" +""" + +import base64 +import os +import json +import time +import pyautogui +import logging +from dotenv import load_dotenv +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.chrome.service import Service +from webdriver_manager.chrome import ChromeDriverManager +from selenium.common.exceptions import WebDriverException + +load_dotenv() +api_key = os.getenv('OPENAI_API_KEY') + +dataset = 'tasks_test.jsonl' + +# Place to store task execution results +results_dir = 'results' +os.makedirs(results_dir, exist_ok=True) + +# Setup logging +logs_path = 'webwand_test_log.txt' +logging.basicConfig(filename=logs_path, level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s') + +def setup_driver(): + chrome_options = Options() + # Load the unpacked webwand chrome extension + chrome_options.add_argument("--load-extension=./dist") + chrome_options.add_argument("--window-size=1600,1000") + service = Service(ChromeDriverManager().install()) + driver = webdriver.Chrome(service=service, options=chrome_options) + # Set script timeout to 120 seconds + driver.set_script_timeout(120) + return driver + +def dispatch_event(driver, event_name, event): + logging.info(f'Dispatched event {event_name}') + script = f""" + var event = new CustomEvent('{event_name}', {{ detail: {json.dumps(event)} }}); + document.dispatchEvent(event); + """ + driver.execute_script(script) + +def add_task_listener(driver, task_id, max_retries=3): + logging.info(f'Adding task listeners for task {task_id}') + """ + Add event listeners for task history and screenshot events. Both events include task status. + Then process those events as they are captured. + """ + + script = f""" + var callback = arguments[0]; + var eventListener = function (e) {{ + console.log("received evnet"); + console.log(e); + if (e.detail.type == 'history') {{ + if (e.detail.status === 'success' || e.detail.status === 'error') {{ + callback({{status: e.detail.status, type: 'history', data: e.detail.data, errorMessage: e.detail.errorMessage}}); + document.removeEventListener('TaskUpdate', eventListener); // Optional: remove if you need continuous listening + console.log("listener removed after history"); + }} + // Does not do anything when the status is 'running' or 'idle'. + // The status 'interrupted' will never be triggered automatically. + }} else if (e.detail.type == 'screenshot') {{ + callback({{status: e.detail.status, type: 'screenshot', data: e.detail.data}}); + }} else {{ + throw new Error("Invalid event type received: " + e.detail.type); + }} + }}; + + document.addEventListener('TaskUpdate', eventListener); + console.log("added event listener"); + """ + + attempts = 0 + result = "" + + def handle_event(event_data): + nonlocal attempts + nonlocal result + if not event_data: + logging.info("No event data received") + return + if event_data['type'] == 'history': + # Record history when task stops + if event_data['status'] == 'error': + logging.error(f"Task {task_id} error: {event_data['errorMessage']}") + result = event_data['status'] + write_history(task_id, event_data['data']) + attempts = 0 + return + if event_data['type'] == 'screenshot': + write_screenshots(task_id, event_data['data']) + attempts = 0 + # Task is still running. Continue to listen for events + handle_event(driver.execute_async_script(script)) + else: + logging.error(f"Unhandled event data type: {event_data['type']}") + result = "script-error" + return + + while attempts < max_retries: + try: + logging.info("Setting up event listener...") + handle_event(driver.execute_async_script(script)) + break + except WebDriverException as e: + if "javascript error: document unloaded while waiting for result" in str(e): + attempts += 1 + logging.warning(f'Document unloaded error during task {task_id} attempt {attempts}: {str(e)}') + if attempts == max_retries: + logging.error(f'Maximum retry attempts reached for task {task_id}.') + result = 'doc-unload-max-retry' + break + else: + logging.info("Retrying...") + elif "script timeout" in str(e): + logging.error(f'Script timeout for task {task_id}: {str(e)}') + result = 'js-script-timeout' + break + else: + logging.error(f'WebDriver exception for task {task_id}: {str(e)}') + result = 'webdriver-error' + break + except Exception as e: + logging.error(f'Unhandled error for task {task_id}: {str(e)}') + result = 'python-script-error' + break + return result + +def write_history(task_id, task_history): + task_dir = os.path.join(results_dir, f"test{task_id}") + os.makedirs(task_dir, exist_ok=True) + file_path = os.path.join(task_dir, 'interact_messages.json') + + with open(file_path, 'w') as file: + json.dump(task_history, file, indent=4) + logging.info(f'History saved for task {task_id}') + +def write_screenshots(task_id, image_data): + image_bytes = base64.b64decode(image_data) + task_dir = os.path.join(results_dir, f"test{task_id}") + os.makedirs(task_dir, exist_ok=True) + timestamp = int(time.time()) + file_path = os.path.join(task_dir, f'screenshot_{timestamp}.png') + with open(file_path, 'wb') as file: + file.write(image_bytes) + logging.info(f'Screenshot saved for task {task_id}') + +def run_webwand_task(driver, task_id, task_description): + logging.info(f'Start running task {task_id} {task_description}') + start = time.time() + dispatch_event(driver, 'SetAPIKey', {"value": api_key}) + dispatch_event(driver, 'SetTask', {"value": task_description}) + dispatch_event(driver, 'RunTask', {}) + result = add_task_listener(driver, task_id) + end = time.time() + logging.info(f'Task {task_id} took {end - start} seconds to complete.') + return result + +def click_extensions_icon(driver): + # Simulate click to open side panel + window_position = driver.get_window_rect() + top = window_position['y'] + right = window_position['x'] + window_position['width'] + # click Extensions icon + pyautogui.click(right - 165, top + 50) + # click webwand + pyautogui.click(right - 300, top + 210) + +def main(): + driver = setup_driver() + initial_load = True + + with open(dataset, 'r') as file: + for line in file: + logging.info(f'-------------------------------------') + task = json.loads(line) + task_id = task["id"] + driver.get(task['web']) + + if initial_load: + click_extensions_icon(driver) + initial_load = False + + result = run_webwand_task(driver, task_id, task['ques']) + logging.info(f'Task {task_id} status: {result}') + # Optional: if the previous task timed out, reset the driver after each task to ensure proper state for the next task + if result == "js-script-timeout": + driver.quit() + driver = setup_driver() + initial_load = True + driver.quit() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/example_results/testGoogle--0/interact_messages.json b/example_results/testGoogle--0/interact_messages.json new file mode 100644 index 0000000..dcd2642 --- /dev/null +++ b/example_results/testGoogle--0/interact_messages.json @@ -0,0 +1,37 @@ +[ + { + "action": { + "operation": { + "args": { + "label": "8", + "value": "bananas" + }, + "name": "setValueAndEnter" + }, + "thought": "I will enter 'bananas' into the search bar and then press Enter to perform the search." + }, + "prompt": "The user requests the following task:\n\n Search for bananas\n\n\nCurrent time: 5/16/2024, 4:24:11 PM\nCurrent URL: https://www.google.com/\nCurrent page scrolling position: 100.0%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = About\ntagName = A\n===\nlabel = 2\nname = Store\ntagName = A\n===\nlabel = 3\nname = Gmail\ntagName = A\n===\nlabel = 4\nname = Search for Images\ntagName = A\n===\nlabel = 5\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 6\nname = Sign in\ntagName = A\n===\nlabel = 7\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n===\nlabel = 9\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 10\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 11\nname = Google Search\ntagName = INPUT\ncurrentValue = Google Search\nplaceholder = \nrole = button\n===\nlabel = 12\nname = I'm Feeling Lucky\ntagName = INPUT\ncurrentValue = I'm Feeling Lucky\nplaceholder = \nrole = button\n===\nlabel = 13\nname = Advertising\ntagName = A\n===\nlabel = 14\nname = Business\ntagName = A\n===\nlabel = 15\nname = How Search works\ntagName = A\n===\nlabel = 16\nname = Our third decade of climate action: join us\ntagName = A\n===\nlabel = 17\nname = Privacy\ntagName = A\n===\nlabel = 18\nname = Terms\ntagName = A\n===\nlabel = 19\nname = Settings\ntagName = DIV\nrole = button\n\nThis textarea currently has focus:\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n", + "response": "{\n \"thought\": \"I will enter 'bananas' into the search bar and then press Enter to perform the search.\",\n \"action\": {\n \"name\": \"setValueAndEnter\",\n \"args\": {\n \"label\": \"8\",\n \"value\": \"bananas\"\n }\n }\n}", + "usage": { + "completion_tokens": 66, + "prompt_tokens": 2599, + "total_tokens": 2665 + } + }, + { + "action": { + "operation": { + "args": {}, + "name": "finish" + }, + "thought": "The search results for 'bananas' are displayed. The first result is a Wikipedia link about bananas." + }, + "prompt": "The user requests the following task:\n\n Search for bananas\nYou have already taken the following actions: \nThought: I will enter 'bananas' into the search bar and then press Enter to perform the search.\nAction:{\"name\":\"setValueAndEnter\",\"args\":{\"label\":\"8\",\"value\":\"bananas\"}}\n\n\n\nCurrent time: 5/16/2024, 4:24:20 PM\nCurrent URL: https://www.google.com/search\nCurrent page scrolling position: 22.2%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 2\nname = Search\ntagName = TEXTAREA\ncurrentValue = bananas\nplaceholder = \nrole = combobox\n===\nlabel = 3\nname = Clear\ntagName = DIV\nrole = button\n===\nlabel = 4\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 5\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 6\nname = Search\ntagName = BUTTON\n===\nlabel = 7\nname = Settings\ntagName = DIV\nrole = button\n===\nlabel = 8\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 9\nname = Sign in\ntagName = A\n===\nlabel = 10\nname = Images\ntagName = A\nrole = link\n===\nlabel = 11\nname = Shopping\ntagName = A\nrole = link\n===\nlabel = 12\nname = Videos\ntagName = A\nrole = link\n===\nlabel = 13\nname = Forums\ntagName = A\nrole = link\n===\nlabel = 14\nname = More\ntagName = DIV\nrole = button\n===\nlabel = 15\nname = Tools\ntagName = DIV\nrole = button\n===\nlabel = 16\nname = Add Nutrition facts\ntagName = A\nrole = link\n===\nlabel = 17\nname = Add Benefits\ntagName = A\nrole = link\n===\nlabel = 18\nname = Add Recipes\ntagName = A\nrole = link\n===\nlabel = 19\nname = Add In stock\ntagName = A\nrole = link\n===\nlabel = 20\nname = Add Types\ntagName = A\nrole = link\n===\nlabel = 21\nname = Add Open now\ntagName = A\nrole = link\n===\nlabel = 22\nname = Add Tree\ntagName = A\nrole = link\n===\nlabel = 23\nname = Add Fruit\ntagName = A\nrole = link\n===\nlabel = 24\nname = Add Plant\ntagName = A\nrole = link\n===\nlabel = 25\nname = SafeSearch\ntagName = DIV\nrole = button\n===\nlabel = 26\nname = Banana Wikipedia https://en.wikipedia.org \u203a wiki \u203a Banana\ntagName = A\n===\nlabel = 27\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 28\nname = List of banana cultivars\ntagName = A\n===\nlabel = 29\nname = Cavendish banana\ntagName = A\n===\nlabel = 30\nname = Gros Michel\ntagName = A\n===\nlabel = 31\nname = Cooking banana\ntagName = A\n===\nlabel = 32\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 33\nname = What are the benefits of eating bananas?\ntagName = DIV\nrole = button\n===\nlabel = 34\nname = Is it good to eat a banana every day?\ntagName = DIV\nrole = button\n===\nlabel = 35\nname = What are bananas high in?\ntagName = DIV\nrole = button\n===\nlabel = 36\nname = What is the best time to eat bananas?\ntagName = DIV\nrole = button\n===\nlabel = 37\nname = Feedback\ntagName = DIV\nrole = button\n===\nlabel = 38\nname = Food Nutrition\u2026Banana nutrition\ntagName = DIV\nrole = button\n===\nlabel = 39\nname = Protein Amount\u2026protein in banana\ntagName = DIV\nrole = button\n===\nlabel = 40\nname = Equivalent\u2026what fruit is equivalent to a banana\ntagName = DIV\nrole = button\n===\nlabel = 41\nname = Sugar Amount\u2026how much sugar in a banana\ntagName = DIV\nrole = button\n===\nlabel = 42\nname = 6 more\ntagName = DIV\nrole = button\n===\nlabel = 43\nname = Feedback\ntagName = DIV\nrole = button\n", + "response": "{\n \"thought\": \"The search results for 'bananas' are displayed. The first result is a Wikipedia link about bananas.\",\n \"action\": {\n \"name\": \"finish\",\n \"args\": {}\n }\n}", + "usage": { + "completion_tokens": 47, + "prompt_tokens": 3148, + "total_tokens": 3195 + } + } +] \ No newline at end of file diff --git a/example_results/testGoogle--0/screenshot_1715891051.png b/example_results/testGoogle--0/screenshot_1715891051.png new file mode 100644 index 0000000..bbf0d44 Binary files /dev/null and b/example_results/testGoogle--0/screenshot_1715891051.png differ diff --git a/example_results/testGoogle--0/screenshot_1715891060.png b/example_results/testGoogle--0/screenshot_1715891060.png new file mode 100644 index 0000000..2138be5 Binary files /dev/null and b/example_results/testGoogle--0/screenshot_1715891060.png differ diff --git a/example_results/testGoogle--1/interact_messages.json b/example_results/testGoogle--1/interact_messages.json new file mode 100644 index 0000000..3ae1493 --- /dev/null +++ b/example_results/testGoogle--1/interact_messages.json @@ -0,0 +1,36 @@ +[ + { + "action": { + "operation": { + "args": { + "label": "9", + "value": "apples" + }, + "name": "setValueAndEnter" + }, + "thought": "I will enter 'apples' into the search bar and press Enter to search." + }, + "prompt": "The user requests the following task:\n\n Search for apples\n\n\nCurrent time: 5/16/2024, 4:24:30 PM\nCurrent URL: https://www.google.com/\nCurrent page scrolling position: 100.0%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = About\ntagName = A\n===\nlabel = 2\nname = Store\ntagName = A\n===\nlabel = 3\nname = Gmail\ntagName = A\n===\nlabel = 4\nname = Search for Images\ntagName = A\n===\nlabel = 5\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 6\nname = Sign in\ntagName = A\n===\nlabel = 7\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 8\nname = Share\ntagName = BUTTON\n===\nlabel = 9\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n===\nlabel = 10\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 11\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 12\nname = Google Search\ntagName = INPUT\ncurrentValue = Google Search\nplaceholder = \nrole = button\n===\nlabel = 13\nname = I'm Feeling Lucky\ntagName = INPUT\ncurrentValue = I'm Feeling Lucky\nplaceholder = \nrole = button\n===\nlabel = 14\nname = Advertising\ntagName = A\n===\nlabel = 15\nname = Business\ntagName = A\n===\nlabel = 16\nname = How Search works\ntagName = A\n===\nlabel = 17\nname = Our third decade of climate action: join us\ntagName = A\n===\nlabel = 18\nname = Privacy\ntagName = A\n===\nlabel = 19\nname = Terms\ntagName = A\n===\nlabel = 20\nname = Settings\ntagName = DIV\nrole = button\n\nThis textarea currently has focus:\nlabel = 9\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n", + "response": "{\n \"thought\": \"I will enter 'apples' into the search bar and press Enter to search.\",\n \"action\": {\n \"name\": \"setValueAndEnter\",\n \"args\": {\n \"label\": \"9\",\n \"value\": \"apples\"\n }\n }\n}", + "usage": { + "completion_tokens": 63, + "prompt_tokens": 2614, + "total_tokens": 2677 + } + }, + { + "action": { + "operation": { + "name": "finish" + }, + "thought": "The search results for 'apples' are displayed. The results include links to Wikipedia, Washington Apples, and Apple Inc., as well as sponsored products like Organic Rainbow Apples, Gala Apples, and Honeycrisp Apple Giant Ultra-Premium." + }, + "prompt": "The user requests the following task:\n\n Search for apples\nYou have already taken the following actions: \nThought: I will enter 'apples' into the search bar and press Enter to search.\nAction:{\"name\":\"setValueAndEnter\",\"args\":{\"label\":\"9\",\"value\":\"apples\"}}\n\n\n\nCurrent time: 5/16/2024, 4:24:39 PM\nCurrent URL: https://www.google.com/search\nCurrent page scrolling position: 16.3%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 2\nname = Search\ntagName = TEXTAREA\ncurrentValue = apples\nplaceholder = \nrole = combobox\n===\nlabel = 3\nname = Clear\ntagName = DIV\nrole = button\n===\nlabel = 4\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 5\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 6\nname = Search\ntagName = BUTTON\n===\nlabel = 7\nname = Settings\ntagName = DIV\nrole = button\n===\nlabel = 8\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 9\nname = Sign in\ntagName = A\n===\nlabel = 10\nname = Images\ntagName = A\nrole = link\n===\nlabel = 11\nname = Shopping\ntagName = A\nrole = link\n===\nlabel = 12\nname = Videos\ntagName = A\nrole = link\n===\nlabel = 13\nname = News\ntagName = A\nrole = link\n===\nlabel = 14\nname = More\ntagName = DIV\nrole = button\n===\nlabel = 15\nname = Tools\ntagName = DIV\nrole = button\n===\nlabel = 16\nname = Add Benefits\ntagName = A\nrole = link\n===\nlabel = 17\nname = Add Nutrition\ntagName = A\nrole = link\n===\nlabel = 18\nname = Add Types\ntagName = A\nrole = link\n===\nlabel = 19\nname = Add Fruit\ntagName = A\nrole = link\n===\nlabel = 20\nname = Add Open now\ntagName = A\nrole = link\n===\nlabel = 21\nname = Add Clipart\ntagName = A\nrole = link\n===\nlabel = 22\nname = Add Top rated\ntagName = A\nrole = link\n===\nlabel = 23\nname = Add Green\ntagName = A\nrole = link\n===\nlabel = 24\nname = Add Variety chart\ntagName = A\nrole = link\n===\nlabel = 25\nname = SafeSearch\ntagName = DIV\nrole = button\n===\nlabel = 26\nname = Stay signed out\ntagName = G-RAISED-BUTTON\nrole = button\n===\nlabel = 27\nname = Sign in\ntagName = G-RAISED-BUTTON\nrole = button\n===\nlabel = 28\nname = Apple Wikipedia https://en.wikipedia.org \u203a wiki \u203a Apple\ntagName = A\n===\nlabel = 29\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 30\nname = Apple Inc.\ntagName = A\n===\nlabel = 31\nname = List of apple cultivars\ntagName = A\n===\nlabel = 32\nname = Welsh apples\ntagName = A\n===\nlabel = 33\nname = Countries by apple production\ntagName = A\n===\nlabel = 34\nname = All Apple Varieties Washington Apples https://waapple.org \u203a Apple Varieties\ntagName = A\n===\nlabel = 35\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 36\nname = Apple Apple https://www.apple.com\ntagName = A\n===\nlabel = 37\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 38\nname = Why this ad?\ntagName = DIV\nrole = button\n===\nlabel = 39\nname = Image of Organic Rainbow Apples\ntagName = DIV\nrole = link\n===\nlabel = 40\nname = Organic Rainbow Apples\ntagName = DIV\nrole = link\n===\nlabel = 41\nname = Image of Gala Apples Fresh Produce Fruit, 3 LB Bag\ntagName = DIV\nrole = link\n===\nlabel = 42\nname = Gala Apples Fresh Produce Fruit, 3 LB Bag\ntagName = DIV\nrole = link\n===\nlabel = 43\nname = Image of Honeycrisp Apple Giant Ultra-Premium\ntagName = DIV\nrole = link\n===\nlabel = 44\nname = Honeycrisp Apple Giant Ultra-Premium\ntagName = DIV\nrole = link\n===\nlabel = 45\nname = Image of Apple Red\ntagName = DIV\nrole = link\n===\nlabel = 46\nname = Apple Red\ntagName = DIV\nrole = link\n", + "response": "{\n \"thought\": \"The search results for 'apples' are displayed. The results include links to Wikipedia, Washington Apples, and Apple Inc., as well as sponsored products like Organic Rainbow Apples, Gala Apples, and Honeycrisp Apple Giant Ultra-Premium.\",\n \"action\": {\n \"name\": \"finish\"\n }\n}", + "usage": { + "completion_tokens": 69, + "prompt_tokens": 3227, + "total_tokens": 3296 + } + } +] \ No newline at end of file diff --git a/example_results/testGoogle--1/screenshot_1715891070.png b/example_results/testGoogle--1/screenshot_1715891070.png new file mode 100644 index 0000000..7c6a31d Binary files /dev/null and b/example_results/testGoogle--1/screenshot_1715891070.png differ diff --git a/example_results/testGoogle--1/screenshot_1715891079.png b/example_results/testGoogle--1/screenshot_1715891079.png new file mode 100644 index 0000000..162ef37 Binary files /dev/null and b/example_results/testGoogle--1/screenshot_1715891079.png differ diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1ac0d9b..bce9d77 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -5532,7 +5532,6 @@ packages: /inherits@2.0.4: resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} - dev: true /ini@4.1.1: resolution: {integrity: sha512-QQnnxNyfvmHFIsj7gkPcYymR8Jdw/o7mp5ZFihxn6h8Ci6fh3Dx4E1gPjpQEpIuPo9XVNY/ZUwh4BPMjGyL01g==} @@ -5798,6 +5797,10 @@ packages: get-intrinsic: 1.2.4 dev: true + /isarray@1.0.0: + resolution: {integrity: sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==} + dev: false + /isarray@2.0.5: resolution: {integrity: sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==} dev: true @@ -6457,6 +6460,15 @@ packages: object.values: 1.1.7 dev: true + /jszip@3.10.1: + resolution: {integrity: sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==} + dependencies: + lie: 3.3.0 + pako: 1.0.11 + readable-stream: 2.3.8 + setimmediate: 1.0.5 + dev: false + /keyv@4.5.4: resolution: {integrity: sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==} dependencies: @@ -6492,6 +6504,12 @@ packages: type-check: 0.4.0 dev: true + /lie@3.3.0: + resolution: {integrity: sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==} + dependencies: + immediate: 3.0.6 + dev: false + /lilconfig@2.1.0: resolution: {integrity: sha512-utWOt/GHzuUxnLKxB6dk81RoOeoNeHgbrXiuGk4yyF5qlRz+iIVWu56E2fqGHFrXz0QNUhLB/8nKqvRH66JKGQ==} engines: {node: '>=10'} @@ -7071,6 +7089,10 @@ packages: engines: {node: '>=6'} dev: true + /pako@1.0.11: + resolution: {integrity: sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==} + dev: false + /parent-module@1.0.1: resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==} engines: {node: '>=6'} @@ -7316,6 +7338,10 @@ packages: engines: {node: '>=6'} dev: false + /process-nextick-args@2.0.1: + resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==} + dev: false + /prompts@2.4.2: resolution: {integrity: sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==} engines: {node: '>= 6'} @@ -7566,6 +7592,18 @@ packages: path-type: 3.0.0 dev: true + /readable-stream@2.3.8: + resolution: {integrity: sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==} + dependencies: + core-util-is: 1.0.3 + inherits: 2.0.4 + isarray: 1.0.0 + process-nextick-args: 2.0.1 + safe-buffer: 5.1.2 + string_decoder: 1.1.1 + util-deprecate: 1.0.2 + dev: false + /readdirp@3.6.0: resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==} engines: {node: '>=8.10.0'} @@ -7767,6 +7805,10 @@ packages: isarray: 2.0.5 dev: true + /safe-buffer@5.1.2: + resolution: {integrity: sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==} + dev: false + /safe-buffer@5.2.1: resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==} dev: true @@ -7820,6 +7862,18 @@ packages: engines: {node: '>=0.10.0'} dev: false + /selenium-webdriver@4.20.0: + resolution: {integrity: sha512-s/G44lGQ1xB3tmtX6NNPomlkpL6CxLdmAvp/AGWWwi4qv5Te1+qji7tPSyr6gyuoPpdYiof1rKnWe3luy0MrYA==} + engines: {node: '>= 14.20.0'} + dependencies: + jszip: 3.10.1 + tmp: 0.2.3 + ws: 8.16.0 + transitivePeerDependencies: + - bufferutil + - utf-8-validate + dev: false + /semver@5.7.2: resolution: {integrity: sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==} hasBin: true @@ -7870,6 +7924,10 @@ packages: engines: {node: '>=6.9'} dev: false + /setimmediate@1.0.5: + resolution: {integrity: sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==} + dev: false + /shebang-command@1.2.0: resolution: {integrity: sha512-EV3L1+UQWGor21OmnvojK36mhg+TyIKDh3iFBKBohr5xeXIhNBcx8oWdgkTEEQ+BEFFYdLRuqMfd5L84N1V5Vg==} engines: {node: '>=0.10.0'} @@ -8142,6 +8200,12 @@ packages: es-abstract: 1.22.3 dev: true + /string_decoder@1.1.1: + resolution: {integrity: sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==} + dependencies: + safe-buffer: 5.1.2 + dev: false + /strip-ansi@6.0.1: resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} engines: {node: '>=8'} @@ -8358,6 +8422,11 @@ packages: resolution: {integrity: sha512-lBN9zLN/oAf68o3zNXYrdCt1kP8WsiGW8Oo2ka41b2IM5JL/S1CTyX1rW0mb/zSuJun0ZUrDxx4sqvYS2FWzPA==} dev: false + /tmp@0.2.3: + resolution: {integrity: sha512-nZD7m9iCPC5g0pYmcaxogYKggSfLsdxl8of3Q/oIbqCqLLIO9IAF0GWjX1z9NZRHPiXv8Wex4yDCaZsgEw0Y8w==} + engines: {node: '>=14.14'} + dev: false + /tmpl@1.0.5: resolution: {integrity: sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==} dev: true @@ -8956,7 +9025,6 @@ packages: optional: true utf-8-validate: optional: true - dev: true /xml-name-validator@4.0.0: resolution: {integrity: sha512-ICP2e+jsHvAj2E2lIHxa5tjXRlKDJo4IdvPvCXbXQGdzSfmSpNVyIKMvoZHjDY9DP0zV17iI85o90vRFXNccRw==} diff --git a/process_log.py b/process_log.py new file mode 100644 index 0000000..bc052d0 --- /dev/null +++ b/process_log.py @@ -0,0 +1,32 @@ +import re + +def process_log_file(log_file_path): + with open(log_file_path, 'r') as file: + log_data = file.readlines() + + task_status_pattern = re.compile(r"Task ([\w\s-]+--\d+) status: (script-error|success|error|fail|doc-unload-max-retry|js-script-timeout|webdriver-error|python-script-error)") + tasks = [] + + for line in log_data: + match = task_status_pattern.search(line) + if match: + task_id = match.group(1) + status = match.group(2) + tasks.append((task_id, status)) + + return tasks + +def write_task_status(tasks): + header = "Task_id\tTask_status" + rows = [f"{task_id}\t{status}" for task_id, status in tasks] + return header + "\n" + "\n".join(rows) + +log_file_path = 'webwand_test_log.txt' +tasks = process_log_file(log_file_path) +formatted_output = write_task_status(tasks) + +output_file_path = 'tasks_status.txt' +with open(output_file_path, 'w') as output_file: + output_file.write(formatted_output) + +print(f"Tasks results are saved to {output_file_path}") diff --git a/src/common/App.tsx b/src/common/App.tsx index a8ab096..5a03038 100644 --- a/src/common/App.tsx +++ b/src/common/App.tsx @@ -6,10 +6,11 @@ import { HStack, IconButton, Icon, + useToast, } from "@chakra-ui/react"; import { SettingsIcon } from "@chakra-ui/icons"; import { FaDiscord, FaGithub } from "react-icons/fa6"; -import { useState } from "react"; +import { useState, useEffect, useCallback } from "react"; import { useAppState } from "../state/store"; import SetAPIKey from "./SetAPIKey"; import TaskUI from "./TaskUI"; @@ -19,8 +20,53 @@ const App = () => { const hasAPIKey = useAppState( (state) => state.settings.anthropicKey || state.settings.openAIKey, ); + const { updateSettings } = useAppState((state) => ({ + updateSettings: state.settings.actions.update, + })); + const taskState = useAppState((state) => ({ + runTask: state.currentTask.actions.runTask, + setInstructions: state.ui.actions.setInstructions, + })); const [inSettingsView, setInSettingsView] = useState(false); + const toast = useToast(); + const toastError = useCallback( + (message: string) => { + toast({ + title: "Error", + description: message, + status: "error", + duration: 5000, + isClosable: true, + }); + }, + [toast], + ); + + useEffect(() => { + const handleMessage = (message: { type: string; value: string }) => { + switch (message.type) { + case "API_KEY": + updateSettings({ openAIKey: message.value }); + break; + case "SET_TASK": + taskState.setInstructions(message.value); + break; + case "RUN_TASK": + taskState.runTask(toastError); + break; + default: + console.log("Unhandled message type:", message.type); + } + }; + + chrome.runtime.onMessage.addListener(handleMessage); + + return () => { + chrome.runtime.onMessage.removeListener(handleMessage); + }; + }); + return ( diff --git a/src/constants.ts b/src/constants.ts index dbad56e..4bdf00a 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -2,6 +2,9 @@ export const TAXY_ELEMENT_SELECTOR = "data-taxy-node-id"; export const VISIBLE_TEXT_ATTRIBUTE_NAME = "data-web-wand-visible-text"; export const ARIA_LABEL_ATTRIBUTE_NAME = "data-web-wand-aria-label"; export const WEB_WAND_LABEL_ATTRIBUTE_NAME = "data-web-wand-label"; +export const SetAPIKey = "SetAPIKey"; +export const SetTask = "SetTask"; +export const RunTask = "RunTask"; // read from env export const debugMode = import.meta.env.VITE_DEBUG_MODE === "true"; diff --git a/src/helpers/aiSdkUtils.ts b/src/helpers/aiSdkUtils.ts index 0b4d684..cfba064 100644 --- a/src/helpers/aiSdkUtils.ts +++ b/src/helpers/aiSdkUtils.ts @@ -17,7 +17,7 @@ function isSupportedModel(value: string): value is SupportedModels { return enumValues(SupportedModels).includes(value as SupportedModels); } -export const DEFAULT_MODEL = SupportedModels.Gpt4Turbo; +export const DEFAULT_MODEL = SupportedModels.Gpt4O; export const DisplayName = { [SupportedModels.Gpt35Turbo16k]: "GPT-3.5 Turbo (16k)", diff --git a/src/helpers/buildAnnotatedScreenshots.ts b/src/helpers/buildAnnotatedScreenshots.ts index 3fe283c..f460a6a 100644 --- a/src/helpers/buildAnnotatedScreenshots.ts +++ b/src/helpers/buildAnnotatedScreenshots.ts @@ -7,7 +7,7 @@ import { type Knowledge } from "./knowledge"; export default async function buildAnnotatedScreenshots( tabId: number, knowledge: Knowledge, -): Promise<[string, LabelData[]]> { +): Promise<[string, string, LabelData[]]> { const imgDataRaw = await chrome.tabs.captureVisibleTab({ format: "png", }); @@ -23,5 +23,5 @@ export default async function buildAnnotatedScreenshots( await sleep(300); await callRPCWithTab(tabId, "removeLabels", []); - return [imgData, labelData]; + return [imgDataRaw, imgData, labelData]; } diff --git a/src/pages/background/index.ts b/src/pages/background/index.ts index e58b2cc..4b92c1e 100644 --- a/src/pages/background/index.ts +++ b/src/pages/background/index.ts @@ -1,3 +1,4 @@ +import { findActiveTab } from "@root/src/helpers/browserUtils"; import reloadOnUpdate from "virtual:reload-on-update-in-background-script"; import "webextension-polyfill"; @@ -9,14 +10,12 @@ reloadOnUpdate("pages/background"); */ reloadOnUpdate("pages/content/style.scss"); -console.log("background loaded"); - // Allows users to open the side panel by clicking on the action toolbar icon chrome.sidePanel .setPanelBehavior({ openPanelOnActionClick: true }) .catch((error) => console.error(error)); -chrome.runtime.onMessage.addListener((message) => { +chrome.runtime.onMessage.addListener(async (message) => { if (message.action === "injectFunctions") { if (message.tabId == null) { console.log("no active tab found"); @@ -28,5 +27,26 @@ chrome.runtime.onMessage.addListener((message) => { }); } return true; + } else if (message.action === "updateHistory") { + // Forward message to content script + const tab = await findActiveTab(); + if (tab?.id !== undefined) { + console.log("sending updateHistory message to content script"); + chrome.tabs.sendMessage(tab.id, message); + } + } else if (message.action === "sendScreenshot") { + const imageDataBase64 = message.imgData.split(",")[1] || message.imgData; + const tab = await findActiveTab(); + if (tab?.id !== undefined) { + console.log("sending sendScreenshot message to content script"); + chrome.tabs.sendMessage(tab.id, { + action: message.action, + status: message.status, + imgData: imageDataBase64, + }); + } + } else { + // Broadcast to other parts of the extension + chrome.runtime.sendMessage(message); } }); diff --git a/src/pages/content/injected.ts b/src/pages/content/injected.ts index d585e04..4b09ec3 100644 --- a/src/pages/content/injected.ts +++ b/src/pages/content/injected.ts @@ -1,5 +1,70 @@ // The content script runs inside each page this extension is enabled on +import { RunTask, SetAPIKey, SetTask } from "@root/src/constants"; import { initializeRPC } from "./domOperations"; initializeRPC(); + +document.addEventListener(SetAPIKey, function (event) { + if (isCustomEvent(event)) { + const customEvent = event as CustomEvent; + chrome.runtime.sendMessage({ + type: "API_KEY", + value: customEvent.detail.value, + }); + } +}); + +document.addEventListener(SetTask, function (event) { + if (isCustomEvent(event)) { + const customEvent = event as CustomEvent; + chrome.runtime.sendMessage({ + type: "SET_TASK", + value: customEvent.detail.value, + }); + } +}); + +document.addEventListener(RunTask, function () { + chrome.runtime.sendMessage({ type: "RUN_TASK" }); +}); + +// Listen for messages from the background script +chrome.runtime.onMessage.addListener(function (message) { + switch (message.action) { + case "updateHistory": + console.log("sending status and history event"); + dispatchCustomEvent("TaskUpdate", { + type: "history", + status: message.status, + data: message.history, + errorMessage: message.error, + }); + break; + case "sendScreenshot": + console.log("sending screenshot event"); + dispatchCustomEvent("TaskUpdate", { + type: "screenshot", + status: message.status, + data: message.imgData, + }); + break; + } +}); + +type CustomEventDetail = { + type: string; + status: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + data: any; + errorMessage?: string; +}; + +function dispatchCustomEvent(eventType: string, detail: CustomEventDetail) { + const event = new CustomEvent(eventType, { detail }); + document.dispatchEvent(event); +} + +function isCustomEvent(event: Event): event is CustomEvent { + return "detail" in event; +} diff --git a/src/state/currentTask.ts b/src/state/currentTask.ts index 93e3979..93c3299 100644 --- a/src/state/currentTask.ts +++ b/src/state/currentTask.ts @@ -42,6 +42,7 @@ export type CurrentTaskSlice = { history: TaskHistoryEntry[]; status: "idle" | "running" | "success" | "error" | "interrupted"; knowledgeInUse: Knowledge | null; + errorMessage: string; actionStatus: | "idle" | "attaching-debugger" @@ -73,6 +74,7 @@ export const createCurrentTaskSlice: MyStateCreator = ( status: "idle", actionStatus: "idle", knowledgeInUse: null, + errorMessage: "", actions: { runTask: async (onError) => { const voiceMode = get().settings.voiceMode; @@ -132,6 +134,7 @@ export const createCurrentTaskSlice: MyStateCreator = ( if (query == null) { set((state) => { state.currentTask.status = "error"; + state.currentTask.errorMessage = "Query is null."; }); return false; } @@ -208,10 +211,13 @@ export const createCurrentTaskSlice: MyStateCreator = ( }); setActionStatus("annotating-page"); - const [imgData, labelData] = await buildAnnotatedScreenshots( - tabId, - knowledge, - ); + const [imgDataRaw, imgData, labelData] = + await buildAnnotatedScreenshots(tabId, knowledge); + chrome.runtime.sendMessage({ + action: "sendScreenshot", + status: get().currentTask.status, + imgData: imgDataRaw, + }); const viewportPercentage = await callRPCWithTab( tabId, "getViewportPercentage", @@ -236,6 +242,7 @@ export const createCurrentTaskSlice: MyStateCreator = ( if (!pageDOM) { set((state) => { state.currentTask.status = "error"; + state.currentTask.errorMessage = "No page dom found"; }); break; } @@ -260,7 +267,8 @@ export const createCurrentTaskSlice: MyStateCreator = ( // While testing let's automatically stop after 50 actions to avoid // infinite loops if (get().currentTask.history.length >= 50) { - break; + throw new Error("Max number of actions reached"); + // break; } setActionStatus("waiting"); @@ -274,6 +282,7 @@ export const createCurrentTaskSlice: MyStateCreator = ( onError(e.message); set((state) => { state.currentTask.status = "error"; + state.currentTask.errorMessage = e.message; }); } finally { await detachAllDebuggers(); @@ -309,7 +318,8 @@ export const createCurrentTaskSlice: MyStateCreator = ( new URL(activeTab.url ?? ""), customKnowledgeBase, ); - const [imgData, labelData] = await buildAnnotatedScreenshots( + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const [imgDataRaw, imgData, labelData] = await buildAnnotatedScreenshots( tabId, knowledge, ); diff --git a/src/state/settings.ts b/src/state/settings.ts index 8d461ea..e019b7b 100644 --- a/src/state/settings.ts +++ b/src/state/settings.ts @@ -19,7 +19,7 @@ export const createSettingsSlice: MyStateCreator = (set) => ({ anthropicKey: undefined, openAIBaseUrl: undefined, anthropicBaseUrl: undefined, - selectedModel: SupportedModels.Gpt4Turbo, + selectedModel: SupportedModels.Gpt4O, voiceMode: false, customKnowledgeBase: {}, actions: { diff --git a/src/state/store.ts b/src/state/store.ts index 9188007..ea688fe 100644 --- a/src/state/store.ts +++ b/src/state/store.ts @@ -1,7 +1,12 @@ import { merge } from "lodash"; import { create, StateCreator } from "zustand"; import { immer } from "zustand/middleware/immer"; -import { createJSONStorage, devtools, persist } from "zustand/middleware"; +import { + createJSONStorage, + devtools, + persist, + subscribeWithSelector, +} from "zustand/middleware"; import { createCurrentTaskSlice, CurrentTaskSlice } from "./currentTask"; import { createUiSlice, UiSlice } from "./ui"; import { createSettingsSlice, SettingsSlice } from "./settings"; @@ -15,50 +20,72 @@ export type StoreType = { export type MyStateCreator = StateCreator< StoreType, - [["zustand/immer", never]], + [["zustand/immer", never], ["zustand/subscribeWithSelector", never]], [], T >; export const useAppState = create()( - persist( - immer( - devtools((...a) => ({ - currentTask: createCurrentTaskSlice(...a), - ui: createUiSlice(...a), - settings: createSettingsSlice(...a), - })), - ), - { - name: "app-state", - storage: createJSONStorage(() => localStorage), - partialize: (state) => ({ - // Stuff we want to persist - ui: { - instructions: state.ui.instructions, - }, - settings: { - openAIKey: state.settings.openAIKey, - anthropicKey: state.settings.anthropicKey, - openAIBaseUrl: state.settings.openAIBaseUrl, - anthropicBaseUrl: state.settings.anthropicBaseUrl, - selectedModel: state.settings.selectedModel, - voiceMode: state.settings.voiceMode, - customKnowledgeBase: state.settings.customKnowledgeBase, + subscribeWithSelector( + persist( + immer( + devtools((...a) => ({ + currentTask: createCurrentTaskSlice(...a), + ui: createUiSlice(...a), + settings: createSettingsSlice(...a), + })), + ), + { + name: "app-state", + storage: createJSONStorage(() => localStorage), + partialize: (state) => ({ + // Stuff we want to persist + ui: { + instructions: state.ui.instructions, + }, + settings: { + openAIKey: state.settings.openAIKey, + anthropicKey: state.settings.anthropicKey, + openAIBaseUrl: state.settings.openAIBaseUrl, + anthropicBaseUrl: state.settings.anthropicBaseUrl, + selectedModel: state.settings.selectedModel, + voiceMode: state.settings.voiceMode, + customKnowledgeBase: state.settings.customKnowledgeBase, + }, + }), + merge: (persistedState, currentState) => { + const result = merge(currentState, persistedState); + result.settings.selectedModel = findBestMatchingModel( + result.settings.selectedModel, + result.settings.openAIKey, + result.settings.anthropicKey, + ); + return result; }, - }), - merge: (persistedState, currentState) => { - const result = merge(currentState, persistedState); - result.settings.selectedModel = findBestMatchingModel( - result.settings.selectedModel, - result.settings.openAIKey, - result.settings.anthropicKey, - ); - return result; }, - }, + ), ), ); +useAppState.subscribe( + (state) => state.currentTask.status, + // eslint-disable-next-line @typescript-eslint/no-unused-vars + (status, previousStatus) => { + const message = { + action: "updateHistory", + status, + history: useAppState.getState().currentTask.history, + error: "", + }; + + if (status === "error") { + message.error = useAppState.getState().currentTask.errorMessage; + } + + chrome.runtime.sendMessage(message); + }, + { fireImmediately: true }, +); + // @ts-expect-error used for debugging window.getState = useAppState.getState; diff --git a/tasks_test.jsonl b/tasks_test.jsonl new file mode 100644 index 0000000..33b0653 --- /dev/null +++ b/tasks_test.jsonl @@ -0,0 +1,7 @@ +{"web_name": "Coursera", "id": "Coursera--31", "ques": "Search for the course 'Exploring Quantum Physics' on Coursera, offered by the University of Maryland, College Park. Identify the percentage (rounded) of 5-star ratings in the reviews.", "web": "https://www.coursera.org/"} +{"web_name": "Coursera", "id": "Coursera--32", "ques": "Search for 'Data Analysis' courses on Coursera. Apply filters to find courses that are 'Beginner Level' and have a duration ranging from 1 to 3 months. Determine the total count of courses that match these specifications.", "web": "https://www.coursera.org/"} +{"web_name": "Coursera", "id": "Coursera--33", "ques": "Find a beginner level Coursera course related to \"Internet of Things (IoT)\" with a high rating. Provide the course name, instructor's name, and a brief summary of the skills that will be taught.", "web": "https://www.coursera.org/"} +{"web_name": "Coursera", "id": "Coursera--34", "ques": "Find the course on Coursera named 'Essentials of Global Health'. Determine the instructor of this course and summarize his bio, note if there are any additional courses he offers on Coursera.", "web": "https://www.coursera.org/"} +{"web_name": "Coursera", "id": "Coursera--35", "ques": "Find a Coursera course on Sustainable Agriculture practices, and detail the course's objectives and the background of the lead instructor.", "web": "https://www.coursera.org/"} +{"web_name": "Coursera", "id": "Coursera--36", "ques": "Browse Coursera, which universities offer Master of Advanced Study in Engineering degrees? Tell me what is the latest application deadline for this degree?", "web": "https://www.coursera.org/"} +{"web_name": "Coursera", "id": "Coursera--39", "ques": "Find the Space Safety course offered by TUM on Coursera. How many videos are there in module 2? What is the name of each video?", "web": "https://www.coursera.org/"} \ No newline at end of file diff --git a/tasks_test_example.jsonl b/tasks_test_example.jsonl new file mode 100644 index 0000000..760c7f0 --- /dev/null +++ b/tasks_test_example.jsonl @@ -0,0 +1,2 @@ +{"web_name": "Google", "id": "Google--0", "ques": "Search for bananas", "web": "https://www.google.com/"} +{"web_name": "Google", "id": "Google--1", "ques": "Search for apples", "web": "https://www.google.com/"} \ No newline at end of file diff --git a/webwand_test_log_example.txt b/webwand_test_log_example.txt new file mode 100644 index 0000000..aa6f9c0 --- /dev/null +++ b/webwand_test_log_example.txt @@ -0,0 +1,26 @@ +2024-05-16 16:24:05,963:INFO:====== WebDriver manager ====== +2024-05-16 16:24:06,036:INFO:Get LATEST chromedriver version for google-chrome +2024-05-16 16:24:06,090:INFO:Get LATEST chromedriver version for google-chrome +2024-05-16 16:24:06,137:INFO:Driver [/Users/lingjie/.wdm/drivers/chromedriver/mac64/124.0.6367.207/chromedriver-mac-arm64/chromedriver] found in cache +2024-05-16 16:24:07,094:INFO:------------------------------------- +2024-05-16 16:24:08,209:INFO:Start running task Google--0 Search for bananas +2024-05-16 16:24:08,209:INFO:Dispatched event SetAPIKey +2024-05-16 16:24:08,211:INFO:Dispatched event SetTask +2024-05-16 16:24:08,213:INFO:Dispatched event RunTask +2024-05-16 16:24:08,215:INFO:Adding task listeners for task Google--0 +2024-05-16 16:24:11,312:INFO:Screenshot saved for task Google--0 +2024-05-16 16:24:20,689:INFO:Screenshot saved for task Google--0 +2024-05-16 16:24:26,987:INFO:History saved for task Google--0 +2024-05-16 16:24:26,987:INFO:Task Google--0 took 18.77788782119751 seconds to complete. +2024-05-16 16:24:26,987:INFO:Task Google--0 status: success +2024-05-16 16:24:26,987:INFO:------------------------------------- +2024-05-16 16:24:27,648:INFO:Start running task Google--1 Search for apples +2024-05-16 16:24:27,648:INFO:Dispatched event SetAPIKey +2024-05-16 16:24:27,652:INFO:Dispatched event SetTask +2024-05-16 16:24:27,654:INFO:Dispatched event RunTask +2024-05-16 16:24:27,656:INFO:Adding task listeners for task Google--1 +2024-05-16 16:24:30,641:INFO:Screenshot saved for task Google--1 +2024-05-16 16:24:39,938:INFO:Screenshot saved for task Google--1 +2024-05-16 16:24:47,633:INFO:History saved for task Google--1 +2024-05-16 16:24:47,633:INFO:Task Google--1 took 19.985090017318726 seconds to complete. +2024-05-16 16:24:47,633:INFO:Task Google--1 status: success