From 68c146b514afdcf9edfe55d01549360aef079d53 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Tue, 20 Feb 2024 20:41:32 -0800 Subject: [PATCH 1/4] add custom chain benchmarking --- .../extraction/chat_extraction.ipynb | 2 +- .../notebooks/extraction/custom-chain.ipynb | 372 ++++++++++++++++++ docs/source/notebooks/extraction/email.ipynb | 2 +- 3 files changed, 374 insertions(+), 2 deletions(-) create mode 100644 docs/source/notebooks/extraction/custom-chain.ipynb diff --git a/docs/source/notebooks/extraction/chat_extraction.ipynb b/docs/source/notebooks/extraction/chat_extraction.ipynb index 61409ec..fc908ce 100644 --- a/docs/source/notebooks/extraction/chat_extraction.ipynb +++ b/docs/source/notebooks/extraction/chat_extraction.ipynb @@ -3310,7 +3310,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.2" + "version": "3.11.1" } }, "nbformat": 4, diff --git a/docs/source/notebooks/extraction/custom-chain.ipynb b/docs/source/notebooks/extraction/custom-chain.ipynb new file mode 100644 index 0000000..6388fad --- /dev/null +++ b/docs/source/notebooks/extraction/custom-chain.ipynb @@ -0,0 +1,372 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7e8fc49a-e8b2-404b-a059-e9f668c460e5", + "metadata": {}, + "source": [ + "# Custom Chain\n", + "\n", + "This notebook shows how to evaluate a custom chain on ALL evaluation tasks.\n", + "\n", + "We will first define a `create_chain` function that creates a custom chain given a schema to extract. We will then iterate over all benchmark tasks for extraction and run our chain over them.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "758872ec-911b-4b62-99c3-6e6b73fad8e6", + "metadata": {}, + "outputs": [], + "source": [ + "#!pip install -U langchain-benchmarks langchain langchain-openai rapidfuzz" + ] + }, + { + "cell_type": "markdown", + "id": "101b0520-2a07-4fab-8cf5-59f81f55359b", + "metadata": {}, + "source": [ + "## Get the Benchmarks\n", + "\n", + "First, let's load the relevant benchmarks." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "86912590-a90a-4351-8ab4-89192cdee1e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
Name Type Dataset ID Description
Email ExtractionExtractionTaska1742786-bde5-4f51-a1d8-e148e5251ddbA dataset of 42 real emails deduped from a spam folder, with semantic HTML tags removed, as well as a script for initial extraction and formatting of other emails from an arbitrary .mbox file like the one exported by Gmail.\n", + "\n", + "Some additional cleanup of the data was done by hand after the initial pass.\n", + "\n", + "See https://github.com/jacoblee93/oss-model-extraction-evals.
Chat Extraction ExtractionTask00f4444c-9460-4a82-b87a-f50096f1cfefA dataset meant to test the ability of an LLM to extract and infer\n", + "structured information from a dialogue. The dialogue is between a user and a support\n", + "engineer. Outputs should be structured as a JSON object and test both the ability\n", + "of the LLM to correctly structure the information and its ability to perform simple \n", + "classification tasks.
" + ], + "text/plain": [ + "Registry(tasks=[ExtractionTask(name='Email Extraction', dataset_id='https://smith.langchain.com/public/a1742786-bde5-4f51-a1d8-e148e5251ddb/d', description='A dataset of 42 real emails deduped from a spam folder, with semantic HTML tags removed, as well as a script for initial extraction and formatting of other emails from an arbitrary .mbox file like the one exported by Gmail.\\n\\nSome additional cleanup of the data was done by hand after the initial pass.\\n\\nSee https://github.com/jacoblee93/oss-model-extraction-evals.\\n ', schema=, instructions=ChatPromptTemplate(input_variables=['input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are an expert researcher.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='What can you tell me about the following email? Make sure to extract the question in the correct format. Here is the email:\\n ```\\n{input}\\n```'))])), ExtractionTask(name='Chat Extraction', dataset_id='https://smith.langchain.com/public/00f4444c-9460-4a82-b87a-f50096f1cfef/d', description='A dataset meant to test the ability of an LLM to extract and infer\\nstructured information from a dialogue. The dialogue is between a user and a support\\nengineer. Outputs should be structured as a JSON object and test both the ability\\nof the LLM to correctly structure the information and its ability to perform simple \\nclassification tasks.', schema=, instructions=ChatPromptTemplate(input_variables=['dialogue'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpdesk assistant responsible with extracting information and generating tickets. Dialogues are between a user and a support engineer.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['dialogue'], template='Generate a ticket for the following question-response pair:\\n\\n{dialogue}\\n'))]))])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_benchmarks import registry, clone_public_dataset\n", + "\n", + "registry.filter(Type=\"ExtractionTask\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "95f7df8b-5b50-409c-b7c4-190e96b3fbe1", + "metadata": {}, + "outputs": [], + "source": [ + "task = registry[\"Email Extraction\"]" + ] + }, + { + "cell_type": "markdown", + "id": "e91c9d74-598e-46c9-b50d-3163dc63588e", + "metadata": {}, + "source": [ + "Each task has instructions (which are a prompt) as well as a schema. You do not need to use the instructions but they may be helpful for quickly bootstrapping a default prompt." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "77a19239-f700-4e43-97a5-7ab7c14603ad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ChatPromptTemplate(input_variables=['input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are an expert researcher.')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='What can you tell me about the following email? Make sure to extract the question in the correct format. Here is the email:\\n ```\\n{input}\\n```'))])" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "task.instructions" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "937c72b8-af2d-4f12-9314-4bea05297557", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "langchain_benchmarks.extraction.tasks.email_task.Email" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "task.schema" + ] + }, + { + "cell_type": "markdown", + "id": "cd1cc226-50dc-4b1c-ad78-ecaf8b381c5b", + "metadata": {}, + "source": [ + "## Define Chain Creation Function\n", + "\n", + "Here is where we put our logic for extracting things. We will make this function take in a prompt and an output schema (although it can really take in anything, you just need to modify the logic where it is called below)." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "2532e9d6-df61-45a4-9d11-a625747fcd7c", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_openai import ChatOpenAI\n", + "from langchain.output_parsers.openai_tools import JsonOutputToolsParser" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "361e11de-2070-4549-81a3-d4ec87bd2f40", + "metadata": {}, + "outputs": [], + "source": [ + "def create_extraction_chain(prompt, schema):\n", + " llm = ChatOpenAI(model=\"gpt-4-turbo-preview\", temperature=0).bind_tools(\n", + " tools=[schema],\n", + " )\n", + " \n", + " output_parser = JsonOutputToolsParser()\n", + " extraction_chain = prompt | llm | output_parser | (lambda x: {\"output\": x[0]['args']})\n", + " return extraction_chain" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "ae64eb6f-cfa2-49b3-8225-d77d2aec308a", + "metadata": {}, + "outputs": [], + "source": [ + "## Loop over tasks\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "a0f4b7d9-fa20-4053-aed5-94ebad97e6f2", + "metadata": {}, + "outputs": [], + "source": [ + "chains_to_eval = [\n", + " (\"openai-tools\", create_extraction_chain)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "665cabd0-fbf3-4f5a-91e2-6692f671bdb7", + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "\n", + "from langsmith.client import Client\n", + "from langchain_benchmarks.extraction import get_eval_config\n", + "from langchain_benchmarks.extraction.tasks.chat_extraction import get_eval_config as get_chat_eval_config\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "0b7b763d-6f3e-4b1c-9482-94e08f58ab7b", + "metadata": {}, + "outputs": [], + "source": [ + "eval_configs = {\n", + " \"Email Extraction\": get_eval_config(),\n", + " \"Chat Extraction\": get_chat_eval_config()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "8a15c7ae-2079-46d4-811a-5e6b5afdf860", + "metadata": {}, + "outputs": [], + "source": [ + "prompts = {task.name: task.instructions for task in registry.filter(Type=\"ExtractionTask\")}" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "5342ef85-3d2c-4cbc-aabb-c539a635fab8", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.messages import SystemMessage, HumanMessage\n", + "\n", + "_email_template = \"\"\"What can you tell me about the following email? Make sure to extract the question in the correct format. Here is the email:\\n ```\\n{input}\\n```\"\"\"\n", + "def email_extraction_formatting(inputs):\n", + " return [HumanMessage(content=_email_template.format(input=inputs[\"input\"]))]\n", + "\n", + "_chat_template = \"\"\"Generate a ticket for the following question-response pair:\\n\\n{dialogue}\\n'\"\"\"\n", + "_chat_instructions = \"\"\"You are a helpdesk assistant responsible with extracting information and generating tickets. Dialogues are between a user and a support engineer.\"\"\"\n", + "\n", + "def format_run(dialogue_input: dict):\n", + " question = dialogue_input[\"question\"]\n", + " answer = dialogue_input[\"answer\"]\n", + " return {\n", + " \"dialogue\": f\"\\n{question}\\n\\n\"\n", + " f\"\\n{answer}\\n\"\n", + " }\n", + "\n", + "def chat_extraction_formatting(inputs):\n", + " dialogue = format_run(inputs)[\"dialogue\"]\n", + " return [\n", + " SystemMessage(content=_chat_instructions),\n", + " HumanMessage(content=_chat_template.format(dialogue=dialogue))\n", + " ]\n", + "\n", + "prompt_formatting = {\n", + " \"Email Extraction\": email_extraction_formatting,\n", + " \"Chat Extraction\": chat_extraction_formatting\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "0a69f9a5-0d85-4446-bc05-63b2573c1c24", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset Email Extraction already exists. Skipping.\n", + "You can access the dataset at https://smith.langchain.com/o/97591f89-2916-48d3-804e-20cab23f91aa/datasets/ccbb1190-dc59-45c8-8f5d-7a7a00fa4c4d.\n", + "Dataset Chat Extraction already exists. Skipping.\n", + "You can access the dataset at https://smith.langchain.com/o/97591f89-2916-48d3-804e-20cab23f91aa/datasets/b8637606-8ac0-4bab-9ad5-29796196cbbc.\n", + "\n", + "Benchmarking Chat Extraction on openai-tools\n", + "View the evaluation results for project 'openai-tools-Chat Extraction-2024-02-20T20:39:20.189708' at:\n", + "https://smith.langchain.com/o/97591f89-2916-48d3-804e-20cab23f91aa/datasets/b8637606-8ac0-4bab-9ad5-29796196cbbc/compare?selectedSessions=7c1213e1-7dcb-4d2b-b252-04e51e3ed82e\n", + "\n", + "View all tests for Dataset Chat Extraction at:\n", + "https://smith.langchain.com/o/97591f89-2916-48d3-804e-20cab23f91aa/datasets/b8637606-8ac0-4bab-9ad5-29796196cbbc\n", + "[------------------------------------------------->] 27/27" + ] + } + ], + "source": [ + "import uuid\n", + "\n", + "client = Client() # Launch langsmith client for cloning datasets\n", + "today = datetime.datetime.today().isoformat()\n", + "\n", + "for task in registry.filter(Type=\"ExtractionTask\"):\n", + "\n", + " dataset_name = task.name\n", + " clone_public_dataset(task.dataset_id, dataset_name=dataset_name)\n", + " dataset = client.read_dataset(dataset_name=dataset_name)\n", + "\n", + " for name, chain_factory in chains_to_eval:\n", + " if task.name == \"Email Extraction\":\n", + " continue\n", + " print()\n", + " print(f\"Benchmarking {task.name} on {name}\")\n", + " eval_config = eval_configs[task.name]\n", + "\n", + " chain = chain_factory(prompt_formatting[task.name], task.schema)\n", + " project_name = f\"{name}-{task.name}-{today}\"\n", + " client.run_on_dataset(\n", + " dataset_name=dataset_name,\n", + " llm_or_chain_factory=chain,\n", + " evaluation=eval_config,\n", + " verbose=False,\n", + " project_name=project_name,\n", + " tags=[name],\n", + " concurrency_level=5,\n", + " project_metadata={\n", + " \"name\": name,\n", + " \"task\": task.name,\n", + " \"date\": today,\n", + " },\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5ee3318-a528-4766-a12a-887863633438", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58302cdd-9fe9-43a5-8d11-7077dd2c47d8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/notebooks/extraction/email.ipynb b/docs/source/notebooks/extraction/email.ipynb index e974521..0555488 100644 --- a/docs/source/notebooks/extraction/email.ipynb +++ b/docs/source/notebooks/extraction/email.ipynb @@ -1194,7 +1194,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.2" + "version": "3.11.1" } }, "nbformat": 4, From 00cf87f59b6938a61081d26bfb8fbb79ab2575bb Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Tue, 20 Feb 2024 20:42:10 -0800 Subject: [PATCH 2/4] cr --- docs/source/notebooks/extraction/custom-chain.ipynb | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/source/notebooks/extraction/custom-chain.ipynb b/docs/source/notebooks/extraction/custom-chain.ipynb index 6388fad..a78d851 100644 --- a/docs/source/notebooks/extraction/custom-chain.ipynb +++ b/docs/source/notebooks/extraction/custom-chain.ipynb @@ -307,8 +307,6 @@ " dataset = client.read_dataset(dataset_name=dataset_name)\n", "\n", " for name, chain_factory in chains_to_eval:\n", - " if task.name == \"Email Extraction\":\n", - " continue\n", " print()\n", " print(f\"Benchmarking {task.name} on {name}\")\n", " eval_config = eval_configs[task.name]\n", From 1518c0ea438dede02fef5cf6bf7a26daf6dc44b4 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Tue, 20 Feb 2024 20:43:56 -0800 Subject: [PATCH 3/4] cr --- .../notebooks/extraction/custom-chain.ipynb | 40 ++++++++++++------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/docs/source/notebooks/extraction/custom-chain.ipynb b/docs/source/notebooks/extraction/custom-chain.ipynb index a78d851..f3e1357 100644 --- a/docs/source/notebooks/extraction/custom-chain.ipynb +++ b/docs/source/notebooks/extraction/custom-chain.ipynb @@ -69,7 +69,7 @@ } ], "source": [ - "from langchain_benchmarks import registry, clone_public_dataset\n", + "from langchain_benchmarks import clone_public_dataset, registry\n", "\n", "registry.filter(Type=\"ExtractionTask\")" ] @@ -151,8 +151,8 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_openai import ChatOpenAI\n", - "from langchain.output_parsers.openai_tools import JsonOutputToolsParser" + "from langchain.output_parsers.openai_tools import JsonOutputToolsParser\n", + "from langchain_openai import ChatOpenAI" ] }, { @@ -166,9 +166,11 @@ " llm = ChatOpenAI(model=\"gpt-4-turbo-preview\", temperature=0).bind_tools(\n", " tools=[schema],\n", " )\n", - " \n", + "\n", " output_parser = JsonOutputToolsParser()\n", - " extraction_chain = prompt | llm | output_parser | (lambda x: {\"output\": x[0]['args']})\n", + " extraction_chain = (\n", + " prompt | llm | output_parser | (lambda x: {\"output\": x[0][\"args\"]})\n", + " )\n", " return extraction_chain" ] }, @@ -190,9 +192,7 @@ "metadata": {}, "outputs": [], "source": [ - "chains_to_eval = [\n", - " (\"openai-tools\", create_extraction_chain)\n", - "]" + "chains_to_eval = [(\"openai-tools\", create_extraction_chain)]" ] }, { @@ -205,8 +205,11 @@ "import datetime\n", "\n", "from langsmith.client import Client\n", + "\n", "from langchain_benchmarks.extraction import get_eval_config\n", - "from langchain_benchmarks.extraction.tasks.chat_extraction import get_eval_config as get_chat_eval_config\n" + "from langchain_benchmarks.extraction.tasks.chat_extraction import (\n", + " get_eval_config as get_chat_eval_config,\n", + ")" ] }, { @@ -218,7 +221,7 @@ "source": [ "eval_configs = {\n", " \"Email Extraction\": get_eval_config(),\n", - " \"Chat Extraction\": get_chat_eval_config()\n", + " \"Chat Extraction\": get_chat_eval_config(),\n", "}" ] }, @@ -229,7 +232,9 @@ "metadata": {}, "outputs": [], "source": [ - "prompts = {task.name: task.instructions for task in registry.filter(Type=\"ExtractionTask\")}" + "prompts = {\n", + " task.name: task.instructions for task in registry.filter(Type=\"ExtractionTask\")\n", + "}" ] }, { @@ -239,15 +244,19 @@ "metadata": {}, "outputs": [], "source": [ - "from langchain_core.messages import SystemMessage, HumanMessage\n", + "from langchain_core.messages import HumanMessage, SystemMessage\n", "\n", "_email_template = \"\"\"What can you tell me about the following email? Make sure to extract the question in the correct format. Here is the email:\\n ```\\n{input}\\n```\"\"\"\n", + "\n", + "\n", "def email_extraction_formatting(inputs):\n", " return [HumanMessage(content=_email_template.format(input=inputs[\"input\"]))]\n", "\n", + "\n", "_chat_template = \"\"\"Generate a ticket for the following question-response pair:\\n\\n{dialogue}\\n'\"\"\"\n", "_chat_instructions = \"\"\"You are a helpdesk assistant responsible with extracting information and generating tickets. Dialogues are between a user and a support engineer.\"\"\"\n", "\n", + "\n", "def format_run(dialogue_input: dict):\n", " question = dialogue_input[\"question\"]\n", " answer = dialogue_input[\"answer\"]\n", @@ -256,16 +265,18 @@ " f\"\\n{answer}\\n\"\n", " }\n", "\n", + "\n", "def chat_extraction_formatting(inputs):\n", " dialogue = format_run(inputs)[\"dialogue\"]\n", " return [\n", " SystemMessage(content=_chat_instructions),\n", - " HumanMessage(content=_chat_template.format(dialogue=dialogue))\n", + " HumanMessage(content=_chat_template.format(dialogue=dialogue)),\n", " ]\n", "\n", + "\n", "prompt_formatting = {\n", " \"Email Extraction\": email_extraction_formatting,\n", - " \"Chat Extraction\": chat_extraction_formatting\n", + " \"Chat Extraction\": chat_extraction_formatting,\n", "}" ] }, @@ -301,7 +312,6 @@ "today = datetime.datetime.today().isoformat()\n", "\n", "for task in registry.filter(Type=\"ExtractionTask\"):\n", - "\n", " dataset_name = task.name\n", " clone_public_dataset(task.dataset_id, dataset_name=dataset_name)\n", " dataset = client.read_dataset(dataset_name=dataset_name)\n", From 6e613a57e1fc8ba2a56ac14886bfc257a711a48b Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Tue, 20 Feb 2024 20:46:07 -0800 Subject: [PATCH 4/4] cr --- docs/source/notebooks/extraction/custom-chain.ipynb | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/source/notebooks/extraction/custom-chain.ipynb b/docs/source/notebooks/extraction/custom-chain.ipynb index f3e1357..02239b8 100644 --- a/docs/source/notebooks/extraction/custom-chain.ipynb +++ b/docs/source/notebooks/extraction/custom-chain.ipynb @@ -175,14 +175,13 @@ ] }, { - "cell_type": "code", - "execution_count": 41, - "id": "ae64eb6f-cfa2-49b3-8225-d77d2aec308a", + "cell_type": "markdown", + "id": "cb2c6bf8-33e3-43d4-a878-c07749c51d51", "metadata": {}, - "outputs": [], "source": [ "## Loop over tasks\n", - "\n" + "\n", + "Here we loop over the tasks with our chains to evaluate" ] }, {