From 6fa1b4a47e7670cc0cd62c7cee7ed91d45e54e61 Mon Sep 17 00:00:00 2001 From: Lance Martin <122662504+rlancemartin@users.noreply.github.com> Date: Sun, 18 May 2025 14:10:06 -0700 Subject: [PATCH] Use package imports --- README.md | 12 +- eval/evaluate_triage.py | 2 +- notebooks/agent.ipynb | 660 +--- notebooks/evaluation.ipynb | 585 +--- notebooks/hitl.ipynb | 1375 +-------- notebooks/memory.ipynb | 1770 +---------- src/email_assistant/email_assistant.py | 10 +- src/email_assistant/email_assistant_hitl.py | 10 +- .../email_assistant_hitl_memory.py | 10 +- .../email_assistant_hitl_memory_gmail.py | 12 +- src/email_assistant/tools/__init__.py | 6 +- src/email_assistant/tools/base.py | 6 +- src/email_assistant/tools/default/__init__.py | 6 +- src/email_assistant/tools/gmail/__init__.py | 4 +- tests/hitl_testing.ipynb | 2129 +------------ tests/memory_testing.ipynb | 2641 +---------------- tests/test_response.py | 8 +- 17 files changed, 50 insertions(+), 9196 deletions(-) diff --git a/README.md b/README.md index 98b4fee..b821f90 100644 --- a/README.md +++ b/README.md @@ -44,17 +44,19 @@ export LANGSMITH_TRACING=true export OPENAI_API_KEY=your_openai_api_key ``` -### Create a virtual environment and activate it +### Create a virtual environment and install dependencies ```shell $ python3 -m venv .venv $ source .venv/bin/activate -# Ensure you have a recent version of pip (required for editable installs with pyproject.toml) -$ python3 -m pip install --upgrade pip -# Install the package in editable mode -$ pip install -e . +# Install uv for reproducible environments +$ pip install uv +# Install the project and dev dependencies using the lockfile +$ uv sync --extra dev ``` +This installs the project in editable mode (distribution package name `interrupt_workshop`, import package name `email_assistant`), so you can import `email_assistant` anywhere once the virtual environment is activated. + ## Structure The repo is organized into the 4 sections, with a notebook for each and accompanying code in the `src/email_assistant` directory. diff --git a/eval/evaluate_triage.py b/eval/evaluate_triage.py index 251fdc0..01170e8 100644 --- a/eval/evaluate_triage.py +++ b/eval/evaluate_triage.py @@ -7,7 +7,7 @@ from eval.email_dataset import examples_triage -from src.email_assistant.email_assistant import email_assistant +from email_assistant.email_assistant import email_assistant # Client client = Client() diff --git a/notebooks/agent.ipynb b/notebooks/agent.ipynb index d9cf310..a6eb800 100644 --- a/notebooks/agent.ipynb +++ b/notebooks/agent.ipynb @@ -1,659 +1 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "7bb66df4", - "metadata": {}, - "source": [ - "# Building Agents \n", - " \n", - "> Note: Optionally, see [these slides](https://docs.google.com/presentation/d/13c0L1CQWAL7fuCXakOqjkvoodfynPJI4Hw_4H76okVU/edit?usp=sharing) and [langgraph_101.ipynb](langgraph_101.ipynb) for context before diving into this notebook!\n", - "\n", - "We're going to build an email assistant from scratch, starting here with 1) the agent architecture (using [LangGraph](https://langchain-ai.github.io/langgraph/)) and following with 2) testing (using [LangSmith](https://docs.smith.langchain.com/)), 3) human-in-the-loop, and 4) memory. This diagram show how these pieces will fit together:\n", - "\n", - "![overview-img](img/overview.png)" - ] - }, - { - "cell_type": "markdown", - "id": "19d34429", - "metadata": {}, - "source": [ - "#### Load environment variables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "46c9f78e", - "metadata": {}, - "outputs": [], - "source": [ - "from dotenv import load_dotenv\n", - "load_dotenv(\"../.env\")" - ] - }, - { - "cell_type": "markdown", - "id": "54a69e9a", - "metadata": {}, - "source": [ - "## Tool Definition\n", - "\n", - "Let's start by defining some simple tools that an email assistant will use with the `@tool` decorator:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f2b708ec", - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Literal\n", - "from datetime import datetime\n", - "from pydantic import BaseModel\n", - "from langchain_core.tools import tool\n", - "\n", - "@tool\n", - "def write_email(to: str, subject: str, content: str) -> str:\n", - " \"\"\"Write and send an email.\"\"\"\n", - " # Placeholder response - in real app would send email\n", - " return f\"Email sent to {to} with subject '{subject}' and content: {content}\"\n", - "\n", - "@tool\n", - "def schedule_meeting(\n", - " attendees: list[str], subject: str, duration_minutes: int, preferred_day: datetime, start_time: int\n", - ") -> str:\n", - " \"\"\"Schedule a calendar meeting.\"\"\"\n", - " # Placeholder response - in real app would check calendar and schedule\n", - " date_str = preferred_day.strftime(\"%A, %B %d, %Y\")\n", - " return f\"Meeting '{subject}' scheduled on {date_str} at {start_time} for {duration_minutes} minutes with {len(attendees)} attendees\"\n", - "\n", - "@tool\n", - "def check_calendar_availability(day: str) -> str:\n", - " \"\"\"Check calendar availability for a given day.\"\"\"\n", - " # Placeholder response - in real app would check actual calendar\n", - " return f\"Available times on {day}: 9:00 AM, 2:00 PM, 4:00 PM\"\n", - "\n", - "@tool\n", - "class Done(BaseModel):\n", - " \"\"\"E-mail has been sent.\"\"\"\n", - " done: bool" - ] - }, - { - "cell_type": "markdown", - "id": "2911c929-5c41-4dcd-9cc8-21a8ff82b769", - "metadata": {}, - "source": [ - "## Building our email assistant\n", - "\n", - "We'll combine a [router and agent](https://langchain-ai.github.io/langgraph/tutorials/workflows/) to build our email assistant.\n", - "\n", - "![agent_workflow_img](img/email_workflow.png)\n", - "\n", - "### Router\n", - "\n", - "The routing step handles the triage decision. \n", - "\n", - "The triage router only focuses on the triage decision, while the agent focuses *only* on the response. \n", - "\n", - "#### State\n", - "\n", - "When building an agent, it's important to consider the information that you want to track over time. We'll use LangGraph's pre-built [`MessagesState` object](https://langchain-ai.github.io/langgraph/concepts/low_level/#messagesstate), which is a just dictionary with a `messages` key that appends messages returned by nodes [as its update logic](https://langchain-ai.github.io/langgraph/concepts/low_level/#reducers). However, LangGraph gives you flexibility to track other information. We'll define a custom `State` object that extends `MessagesState` and adds a `classification_decision` key:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "692537ec-f09e-4086-81e4-9c517273b854", - "metadata": {}, - "outputs": [], - "source": [ - "from langgraph.graph import MessagesState\n", - "\n", - "class State(MessagesState):\n", - " # We can add a specific key to our state for the email input\n", - " email_input: dict\n", - " classification_decision: Literal[\"ignore\", \"respond\", \"notify\"]" - ] - }, - { - "cell_type": "markdown", - "id": "d6cd1647-6d58-4aae-b954-6a9c5790c20c", - "metadata": {}, - "source": [ - "#### Triage node\n", - "\n", - "We define a python function with our triage routing logic.\n", - "\n", - "> **Note:** here we change to the parent directory (`%cd ..`) to access our project's module structure, which contains reusable prompts and components. The autoreload extensions ensure any changes to these modules are automatically reflected in the notebook without requiring kernel restarts. This allows us to organize our prompts in a dedicated module rather than defining them inline, making them easier to maintain and reuse across the notebooks! You can see all these files in: `src/email_assistant`\n", - "\n", - "For this, we use [structured outputs](https://python.langchain.com/docs/concepts/structured_outputs/) with a Pydantic model, which is particularly useful for defining structured output schemas because it offers type hints and validation. The descriptions in the pydantic model are important because they get passed as part JSON schema to the LLM to inform the output coercion." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8adf520b-adf5-4a7b-b7a8-b8c23720c03f", - "metadata": {}, - "outputs": [], - "source": [ - "%cd ..\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "from pydantic import BaseModel, Field\n", - "from email_assistant.utils import parse_email, format_email_markdown\n", - "from email_assistant.prompts import triage_system_prompt, triage_user_prompt, default_triage_instructions, default_background\n", - "from langchain.chat_models import init_chat_model\n", - "from langgraph.graph import END\n", - "from langgraph.types import Command" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2c2c2ff0-da93-4731-b5b6-0ccd59e0e783", - "metadata": {}, - "outputs": [], - "source": [ - "triage_system_prompt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f3a1ad2c-40a2-42d0-a4b8-7a25df825fad", - "metadata": {}, - "outputs": [], - "source": [ - "triage_user_prompt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "69b0df31-b9d2-423f-ba07-67eb0643c2ba", - "metadata": {}, - "outputs": [], - "source": [ - "default_background" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4b3ea767-6ac1-4562-8ca6-5fa451495786", - "metadata": {}, - "outputs": [], - "source": [ - "default_triage_instructions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c54ae6a6-94d9-4160-8d45-18f4d29aa600", - "metadata": {}, - "outputs": [], - "source": [ - "class RouterSchema(BaseModel):\n", - " \"\"\"Analyze the unread email and route it according to its content.\"\"\"\n", - "\n", - " reasoning: str = Field(\n", - " description=\"Step-by-step reasoning behind the classification.\"\n", - " )\n", - " classification: Literal[\"ignore\", \"respond\", \"notify\"] = Field(\n", - " description=\"The classification of an email: 'ignore' for irrelevant emails, \"\n", - " \"'notify' for important information that doesn't need a response, \"\n", - " \"'respond' for emails that need a reply\",\n", - " )\n", - "\n", - "# Initialize the LLM for use with router / structured output\n", - "llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0)\n", - "llm_router = llm.with_structured_output(RouterSchema) \n", - "\n", - "def triage_router(state: State) -> Command[Literal[\"response_agent\", END]]:\n", - " \"\"\"Analyze email content to decide if we should respond, notify, or ignore.\"\"\"\n", - " \n", - " author, to, subject, email_thread = parse_email(state[\"email_input\"])\n", - " system_prompt = triage_system_prompt.format(\n", - " background=default_background,\n", - " triage_instructions=default_triage_instructions\n", - " )\n", - "\n", - " user_prompt = triage_user_prompt.format(\n", - " author=author, to=to, subject=subject, email_thread=email_thread\n", - " )\n", - "\n", - " result = llm_router.invoke(\n", - " [\n", - " {\"role\": \"system\", \"content\": system_prompt},\n", - " {\"role\": \"user\", \"content\": user_prompt},\n", - " ]\n", - " )\n", - " \n", - " if result.classification == \"respond\":\n", - " print(\"📧 Classification: RESPOND - This email requires a response\")\n", - " goto = \"response_agent\"\n", - " update = {\n", - " \"messages\": [\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": f\"Respond to the email: \\n\\n{format_email_markdown(subject, author, to, email_thread)}\",\n", - " }\n", - " ],\n", - " \"classification_decision\": result.classification,\n", - " }\n", - " \n", - " elif result.classification == \"ignore\":\n", - " print(\"🚫 Classification: IGNORE - This email can be safely ignored\")\n", - " goto = END\n", - " update = {\n", - " \"classification_decision\": result.classification,\n", - " }\n", - " \n", - " elif result.classification == \"notify\":\n", - " print(\"🔔 Classification: NOTIFY - This email contains important information\")\n", - " # For now, we go to END. But we will add to this later!\n", - " goto = END\n", - " update = {\n", - " \"classification_decision\": result.classification,\n", - " }\n", - " \n", - " else:\n", - " raise ValueError(f\"Invalid classification: {result.classification}\")\n", - " return Command(goto=goto, update=update)" - ] - }, - { - "cell_type": "markdown", - "id": "272d8715", - "metadata": {}, - "source": [ - "We use [Command](https://langchain-ai.github.io/langgraph/how-tos/command/) objects in LangGraph to both update the state and select the next node to visit. This is a useful alternative to edges.\n", - "\n", - "### Agent\n", - "\n", - "Now, let's build the agent.\n", - "\n", - "#### LLM node\n", - "\n", - "Here, we define the LLM decision-making node. This node takes in the current state, calls the LLM, and updates `messages` with the LLM output. \n", - "\n", - "We [enforce tool use with OpenAI](https://python.langchain.com/docs/how_to/tool_choice/) by setting `tool_choice=\"required\"`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e842b3c-06f5-440f-8159-995503ef3a99", - "metadata": {}, - "outputs": [], - "source": [ - "from src.email_assistant.tools.default.prompt_templates import AGENT_TOOLS_PROMPT\n", - "from src.email_assistant.prompts import agent_system_prompt, default_response_preferences, default_cal_preferences" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8f69c6fc-70aa-48f1-8312-2b1818469a1b", - "metadata": {}, - "outputs": [], - "source": [ - "AGENT_TOOLS_PROMPT" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9052fced-3fdb-4cd2-ac88-e2ccdce14e7c", - "metadata": {}, - "outputs": [], - "source": [ - "agent_system_prompt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6f2c120f", - "metadata": {}, - "outputs": [], - "source": [ - "# Collect all tools\n", - "tools = [write_email, schedule_meeting, check_calendar_availability, Done]\n", - "tools_by_name = {tool.name: tool for tool in tools}\n", - "\n", - "# Initialize the LLM, enforcing tool use\n", - "llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0)\n", - "llm_with_tools = llm.bind_tools(tools, tool_choice=\"required\")\n", - "\n", - "def llm_call(state: State):\n", - " \"\"\"LLM decides whether to call a tool or not\"\"\"\n", - "\n", - " return {\n", - " \"messages\": [\n", - " # Invoke the LLM\n", - " llm_with_tools.invoke(\n", - " # Add the system prompt\n", - " [ \n", - " {\"role\": \"system\", \"content\": agent_system_prompt.format(\n", - " tools_prompt=AGENT_TOOLS_PROMPT,\n", - " background=default_background,\n", - " response_preferences=default_response_preferences,\n", - " cal_preferences=default_cal_preferences, \n", - " )}\n", - " ]\n", - " # Add the current messages to the prompt\n", - " + state[\"messages\"]\n", - " )\n", - " ]\n", - " }" - ] - }, - { - "cell_type": "markdown", - "id": "9f05d11a", - "metadata": {}, - "source": [ - "#### Tool handler node\n", - "\n", - "After the LLM makes a decision, we need to execute the chosen tool. \n", - "\n", - "The `tool_handler` node executes the tool. We can see that nodes can update the graph state to capture any important state changes, such as the classification decision." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "43eb6dc2", - "metadata": {}, - "outputs": [], - "source": [ - "def tool_handler(state: dict):\n", - " \"\"\"Performs the tool call.\"\"\"\n", - "\n", - " # List for tool messages\n", - " result = []\n", - " \n", - " # Iterate through tool calls\n", - " for tool_call in state[\"messages\"][-1].tool_calls:\n", - " # Get the tool\n", - " tool = tools_by_name[tool_call[\"name\"]]\n", - " # Run it\n", - " observation = tool.invoke(tool_call[\"args\"])\n", - " # Create a tool message\n", - " result.append({\"role\": \"tool\", \"content\" : observation, \"tool_call_id\": tool_call[\"id\"]})\n", - " \n", - " # Add it to our messages\n", - " return {\"messages\": result}" - ] - }, - { - "cell_type": "markdown", - "id": "4721dede", - "metadata": {}, - "source": [ - "#### Conditional Routing\n", - "\n", - "Our agent needs to decide when to continue using tools and when to stop. This conditional routing function directs the agent to either continue or terminate." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c7cbea7", - "metadata": {}, - "outputs": [], - "source": [ - "def should_continue(state: State) -> Literal[\"tool_handler\", END]:\n", - " \"\"\"Route to tool handler, or end if Done tool called.\"\"\"\n", - " \n", - " # Get the last message\n", - " messages = state[\"messages\"]\n", - " last_message = messages[-1]\n", - " \n", - " # If the last message is a tool call, check if it's a Done tool call\n", - " if last_message.tool_calls:\n", - " for tool_call in last_message.tool_calls: \n", - " if tool_call[\"name\"] == \"Done\":\n", - " return END\n", - " else:\n", - " return \"tool_handler\"" - ] - }, - { - "cell_type": "markdown", - "id": "6eb4ede8", - "metadata": {}, - "source": [ - "#### Agent Graph\n", - "\n", - "Finally, we can assemble all components:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f81df767", - "metadata": {}, - "outputs": [], - "source": [ - "from langgraph.graph import StateGraph, START, END\n", - "from email_assistant.utils import show_graph\n", - "\n", - "# Build workflow\n", - "overall_workflow = StateGraph(State)\n", - "\n", - "# Add nodes\n", - "overall_workflow.add_node(\"llm_call\", llm_call)\n", - "overall_workflow.add_node(\"tool_handler\", tool_handler)\n", - "\n", - "# Add edges\n", - "overall_workflow.add_edge(START, \"llm_call\")\n", - "overall_workflow.add_conditional_edges(\n", - " \"llm_call\",\n", - " should_continue,\n", - " {\n", - " \"tool_handler\": \"tool_handler\",\n", - " END: END,\n", - " },\n", - ")\n", - "overall_workflow.add_edge(\"tool_handler\", \"llm_call\")\n", - "\n", - "# Compile the agent\n", - "agent = overall_workflow.compile()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "617f6373-bf48-44c2-ba33-000c9f22b067", - "metadata": {}, - "outputs": [], - "source": [ - "# View\n", - "show_graph(agent)" - ] - }, - { - "cell_type": "markdown", - "id": "dc8367c4", - "metadata": {}, - "source": [ - "This creates a graph that:\n", - "1. Starts with an LLM decision\n", - "2. Conditionally routes to tool execution or termination\n", - "3. After tool execution, returns to LLM for the next decision\n", - "4. Repeats until completion or no tool is called\n" - ] - }, - { - "cell_type": "markdown", - "id": "b2b3406d-496d-43c9-942e-c5ce7e3a8321", - "metadata": {}, - "source": [ - "### Combine workflow with our agent\n", - "\n", - "We can combine the router and the agent." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "697f2548-b5a5-4fb6-8aed-226369e53e25", - "metadata": {}, - "outputs": [], - "source": [ - "overall_workflow = (\n", - " StateGraph(State)\n", - " .add_node(triage_router)\n", - " .add_node(\"response_agent\", agent)\n", - " .add_edge(START, \"triage_router\")\n", - ").compile()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2dd6dcc4-6346-4d41-ae36-61f3fc83b7a7", - "metadata": {}, - "outputs": [], - "source": [ - "show_graph(overall_workflow, xray=True)" - ] - }, - { - "cell_type": "markdown", - "id": "2091d5cc", - "metadata": {}, - "source": [ - "This is a higher-level composition where:\n", - "1. First, the triage router analyzes the email\n", - "2. If needed, the response agent handles crafting a response\n", - "3. The workflow ends when either the triage decides no response is needed or the response agent completes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "070f18a6", - "metadata": {}, - "outputs": [], - "source": [ - "email_input = {\n", - " \"author\": \"System Admin \",\n", - " \"to\": \"Development Team \",\n", - " \"subject\": \"Scheduled maintenance - database downtime\",\n", - " \"email_thread\": \"Hi team,\\n\\nThis is a reminder that we'll be performing scheduled maintenance on the production database tonight from 2AM to 4AM EST. During this time, all database services will be unavailable.\\n\\nPlease plan your work accordingly and ensure no critical deployments are scheduled during this window.\\n\\nThanks,\\nSystem Admin Team\"\n", - "}\n", - "\n", - "# Run the agent\n", - "response = overall_workflow.invoke({\"email_input\": email_input})\n", - "for m in response[\"messages\"]:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7a50ae0a-7bd1-4e69-90be-781b1e77b4dd", - "metadata": {}, - "outputs": [], - "source": [ - "email_input = {\n", - " \"author\": \"Alice Smith \",\n", - " \"to\": \"John Doe \",\n", - " \"subject\": \"Quick question about API documentation\",\n", - " \"email_thread\": \"Hi John,\\nI was reviewing the API documentation for the new authentication service and noticed a few endpoints seem to be missing from the specs. Could you help clarify if this was intentional or if we should update the docs?\\nSpecifically, I'm looking at:\\n- /auth/refresh\\n- /auth/validate\\nThanks!\\nAlice\"\n", - "}\n", - "\n", - "# Run the agent\n", - "response = overall_workflow.invoke({\"email_input\": email_input})\n", - "for m in response[\"messages\"]:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "f631f61f", - "metadata": {}, - "source": [ - "## Testing with Local Deployment\n", - "\n", - "You can find the file for our agent in the `src/email_assistant` directory:\n", - "\n", - "* `src/email_assistant/email_assistant.py`\n", - "\n", - "You can test them locally in LangGraph Studio by running:\n", - "\n", - "```\n", - "! langgraph dev\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "12752016", - "metadata": { - "lines_to_next_cell": 0 - }, - "source": [ - "Example e-mail you can test:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "08ee005a", - "metadata": {}, - "outputs": [], - "source": [ - "{\n", - " \"author\": \"Alice Smith \",\n", - " \"to\": \"John Doe \",\n", - " \"subject\": \"Quick question about API documentation\",\n", - " \"email_thread\": \"Hi John,\\nI was reviewing the API documentation for the new authentication service and noticed a few endpoints seem to be missing from the specs. Could you help clarify if this was intentional or if we should update the docs?\\nSpecifically, I'm looking at:\\n- /auth/refresh\\n- /auth/validate\\nThanks!\\nAlice\"\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "d09e33b6", - "metadata": {}, - "source": [ - "![studio-img](img/studio.png)" - ] - } - ], - "metadata": { - "jupytext": { - "cell_metadata_filter": "-all", - "main_language": "python", - "notebook_metadata_filter": "-all" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} +{"cells": [{"cell_type": "markdown", "id": "7bb66df4", "metadata": {}, "source": ["# Building Agents \n", " \n", "> Note: Optionally, see [these slides](https://docs.google.com/presentation/d/13c0L1CQWAL7fuCXakOqjkvoodfynPJI4Hw_4H76okVU/edit?usp=sharing) and [langgraph_101.ipynb](langgraph_101.ipynb) for context before diving into this notebook!\n", "\n", "We're going to build an email assistant from scratch, starting here with 1) the agent architecture (using [LangGraph](https://langchain-ai.github.io/langgraph/)) and following with 2) testing (using [LangSmith](https://docs.smith.langchain.com/)), 3) human-in-the-loop, and 4) memory. This diagram show how these pieces will fit together:\n", "\n", "![overview-img](img/overview.png)"]}, {"cell_type": "markdown", "id": "19d34429", "metadata": {}, "source": ["#### Load environment variables"]}, {"cell_type": "code", "execution_count": null, "id": "46c9f78e", "metadata": {}, "outputs": [], "source": ["from dotenv import load_dotenv\n", "load_dotenv(\"../.env\")"]}, {"cell_type": "markdown", "id": "54a69e9a", "metadata": {}, "source": ["## Tool Definition\n", "\n", "Let's start by defining some simple tools that an email assistant will use with the `@tool` decorator:"]}, {"cell_type": "code", "execution_count": null, "id": "f2b708ec", "metadata": {}, "outputs": [], "source": ["from typing import Literal\n", "from datetime import datetime\n", "from pydantic import BaseModel\n", "from langchain_core.tools import tool\n", "\n", "@tool\n", "def write_email(to: str, subject: str, content: str) -> str:\n", " \"\"\"Write and send an email.\"\"\"\n", " # Placeholder response - in real app would send email\n", " return f\"Email sent to {to} with subject '{subject}' and content: {content}\"\n", "\n", "@tool\n", "def schedule_meeting(\n", " attendees: list[str], subject: str, duration_minutes: int, preferred_day: datetime, start_time: int\n", ") -> str:\n", " \"\"\"Schedule a calendar meeting.\"\"\"\n", " # Placeholder response - in real app would check calendar and schedule\n", " date_str = preferred_day.strftime(\"%A, %B %d, %Y\")\n", " return f\"Meeting '{subject}' scheduled on {date_str} at {start_time} for {duration_minutes} minutes with {len(attendees)} attendees\"\n", "\n", "@tool\n", "def check_calendar_availability(day: str) -> str:\n", " \"\"\"Check calendar availability for a given day.\"\"\"\n", " # Placeholder response - in real app would check actual calendar\n", " return f\"Available times on {day}: 9:00 AM, 2:00 PM, 4:00 PM\"\n", "\n", "@tool\n", "class Done(BaseModel):\n", " \"\"\"E-mail has been sent.\"\"\"\n", " done: bool"]}, {"cell_type": "markdown", "id": "2911c929-5c41-4dcd-9cc8-21a8ff82b769", "metadata": {}, "source": ["## Building our email assistant\n", "\n", "We'll combine a [router and agent](https://langchain-ai.github.io/langgraph/tutorials/workflows/) to build our email assistant.\n", "\n", "![agent_workflow_img](img/email_workflow.png)\n", "\n", "### Router\n", "\n", "The routing step handles the triage decision. \n", "\n", "The triage router only focuses on the triage decision, while the agent focuses *only* on the response. \n", "\n", "#### State\n", "\n", "When building an agent, it's important to consider the information that you want to track over time. We'll use LangGraph's pre-built [`MessagesState` object](https://langchain-ai.github.io/langgraph/concepts/low_level/#messagesstate), which is a just dictionary with a `messages` key that appends messages returned by nodes [as its update logic](https://langchain-ai.github.io/langgraph/concepts/low_level/#reducers). However, LangGraph gives you flexibility to track other information. We'll define a custom `State` object that extends `MessagesState` and adds a `classification_decision` key:"]}, {"cell_type": "code", "execution_count": null, "id": "692537ec-f09e-4086-81e4-9c517273b854", "metadata": {}, "outputs": [], "source": ["from langgraph.graph import MessagesState\n", "\n", "class State(MessagesState):\n", " # We can add a specific key to our state for the email input\n", " email_input: dict\n", " classification_decision: Literal[\"ignore\", \"respond\", \"notify\"]"]}, {"cell_type": "markdown", "id": "d6cd1647-6d58-4aae-b954-6a9c5790c20c", "metadata": {}, "source": ["#### Triage node\n", "\n", "We define a python function with our triage routing logic.\n", "\n", "> **Note:** This notebook relies on the package being installed in editable mode so we can import `email_assistant` directly. The autoreload extensions ensure any changes to these modules are automatically reflected without restarting the kernel. This keeps prompts organized in `src/email_assistant`.\n", "\n", "For this, we use [structured outputs](https://python.langchain.com/docs/concepts/structured_outputs/) with a Pydantic model, which is particularly useful for defining structured output schemas because it offers type hints and validation. The descriptions in the pydantic model are important because they get passed as part JSON schema to the LLM to inform the output coercion."]}, {"cell_type": "code", "execution_count": null, "id": "8adf520b-adf5-4a7b-b7a8-b8c23720c03f", "metadata": {}, "outputs": [], "source": ["%load_ext autoreload\n", "%autoreload 2\n", "\n", "from pydantic import BaseModel, Field\n", "from email_assistant.utils import parse_email, format_email_markdown\n", "from email_assistant.prompts import triage_system_prompt, triage_user_prompt, default_triage_instructions, default_background\n", "from langchain.chat_models import init_chat_model\n", "from langgraph.graph import END\n", "from langgraph.types import Command"]}, {"cell_type": "code", "execution_count": null, "id": "2c2c2ff0-da93-4731-b5b6-0ccd59e0e783", "metadata": {}, "outputs": [], "source": ["triage_system_prompt"]}, {"cell_type": "code", "execution_count": null, "id": "f3a1ad2c-40a2-42d0-a4b8-7a25df825fad", "metadata": {}, "outputs": [], "source": ["triage_user_prompt"]}, {"cell_type": "code", "execution_count": null, "id": "69b0df31-b9d2-423f-ba07-67eb0643c2ba", "metadata": {}, "outputs": [], "source": ["default_background"]}, {"cell_type": "code", "execution_count": null, "id": "4b3ea767-6ac1-4562-8ca6-5fa451495786", "metadata": {}, "outputs": [], "source": ["default_triage_instructions"]}, {"cell_type": "code", "execution_count": null, "id": "c54ae6a6-94d9-4160-8d45-18f4d29aa600", "metadata": {}, "outputs": [], "source": ["class RouterSchema(BaseModel):\n", " \"\"\"Analyze the unread email and route it according to its content.\"\"\"\n", "\n", " reasoning: str = Field(\n", " description=\"Step-by-step reasoning behind the classification.\"\n", " )\n", " classification: Literal[\"ignore\", \"respond\", \"notify\"] = Field(\n", " description=\"The classification of an email: 'ignore' for irrelevant emails, \"\n", " \"'notify' for important information that doesn't need a response, \"\n", " \"'respond' for emails that need a reply\",\n", " )\n", "\n", "# Initialize the LLM for use with router / structured output\n", "llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0)\n", "llm_router = llm.with_structured_output(RouterSchema) \n", "\n", "def triage_router(state: State) -> Command[Literal[\"response_agent\", END]]:\n", " \"\"\"Analyze email content to decide if we should respond, notify, or ignore.\"\"\"\n", " \n", " author, to, subject, email_thread = parse_email(state[\"email_input\"])\n", " system_prompt = triage_system_prompt.format(\n", " background=default_background,\n", " triage_instructions=default_triage_instructions\n", " )\n", "\n", " user_prompt = triage_user_prompt.format(\n", " author=author, to=to, subject=subject, email_thread=email_thread\n", " )\n", "\n", " result = llm_router.invoke(\n", " [\n", " {\"role\": \"system\", \"content\": system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt},\n", " ]\n", " )\n", " \n", " if result.classification == \"respond\":\n", " print(\"\ud83d\udce7 Classification: RESPOND - This email requires a response\")\n", " goto = \"response_agent\"\n", " update = {\n", " \"messages\": [\n", " {\n", " \"role\": \"user\",\n", " \"content\": f\"Respond to the email: \\n\\n{format_email_markdown(subject, author, to, email_thread)}\",\n", " }\n", " ],\n", " \"classification_decision\": result.classification,\n", " }\n", " \n", " elif result.classification == \"ignore\":\n", " print(\"\ud83d\udeab Classification: IGNORE - This email can be safely ignored\")\n", " goto = END\n", " update = {\n", " \"classification_decision\": result.classification,\n", " }\n", " \n", " elif result.classification == \"notify\":\n", " print(\"\ud83d\udd14 Classification: NOTIFY - This email contains important information\")\n", " # For now, we go to END. But we will add to this later!\n", " goto = END\n", " update = {\n", " \"classification_decision\": result.classification,\n", " }\n", " \n", " else:\n", " raise ValueError(f\"Invalid classification: {result.classification}\")\n", " return Command(goto=goto, update=update)"]}, {"cell_type": "markdown", "id": "272d8715", "metadata": {}, "source": ["We use [Command](https://langchain-ai.github.io/langgraph/how-tos/command/) objects in LangGraph to both update the state and select the next node to visit. This is a useful alternative to edges.\n", "\n", "### Agent\n", "\n", "Now, let's build the agent.\n", "\n", "#### LLM node\n", "\n", "Here, we define the LLM decision-making node. This node takes in the current state, calls the LLM, and updates `messages` with the LLM output. \n", "\n", "We [enforce tool use with OpenAI](https://python.langchain.com/docs/how_to/tool_choice/) by setting `tool_choice=\"required\"`."]}, {"cell_type": "code", "execution_count": null, "id": "1e842b3c-06f5-440f-8159-995503ef3a99", "metadata": {}, "outputs": [], "source": ["from email_assistant.tools.default.prompt_templates import AGENT_TOOLS_PROMPT\n", "from email_assistant.prompts import agent_system_prompt, default_response_preferences, default_cal_preferences"]}, {"cell_type": "code", "execution_count": null, "id": "8f69c6fc-70aa-48f1-8312-2b1818469a1b", "metadata": {}, "outputs": [], "source": ["AGENT_TOOLS_PROMPT"]}, {"cell_type": "code", "execution_count": null, "id": "9052fced-3fdb-4cd2-ac88-e2ccdce14e7c", "metadata": {}, "outputs": [], "source": ["agent_system_prompt"]}, {"cell_type": "code", "execution_count": null, "id": "6f2c120f", "metadata": {}, "outputs": [], "source": ["# Collect all tools\n", "tools = [write_email, schedule_meeting, check_calendar_availability, Done]\n", "tools_by_name = {tool.name: tool for tool in tools}\n", "\n", "# Initialize the LLM, enforcing tool use\n", "llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0)\n", "llm_with_tools = llm.bind_tools(tools, tool_choice=\"required\")\n", "\n", "def llm_call(state: State):\n", " \"\"\"LLM decides whether to call a tool or not\"\"\"\n", "\n", " return {\n", " \"messages\": [\n", " # Invoke the LLM\n", " llm_with_tools.invoke(\n", " # Add the system prompt\n", " [ \n", " {\"role\": \"system\", \"content\": agent_system_prompt.format(\n", " tools_prompt=AGENT_TOOLS_PROMPT,\n", " background=default_background,\n", " response_preferences=default_response_preferences,\n", " cal_preferences=default_cal_preferences, \n", " )}\n", " ]\n", " # Add the current messages to the prompt\n", " + state[\"messages\"]\n", " )\n", " ]\n", " }"]}, {"cell_type": "markdown", "id": "9f05d11a", "metadata": {}, "source": ["#### Tool handler node\n", "\n", "After the LLM makes a decision, we need to execute the chosen tool. \n", "\n", "The `tool_handler` node executes the tool. We can see that nodes can update the graph state to capture any important state changes, such as the classification decision."]}, {"cell_type": "code", "execution_count": null, "id": "43eb6dc2", "metadata": {}, "outputs": [], "source": ["def tool_handler(state: dict):\n", " \"\"\"Performs the tool call.\"\"\"\n", "\n", " # List for tool messages\n", " result = []\n", " \n", " # Iterate through tool calls\n", " for tool_call in state[\"messages\"][-1].tool_calls:\n", " # Get the tool\n", " tool = tools_by_name[tool_call[\"name\"]]\n", " # Run it\n", " observation = tool.invoke(tool_call[\"args\"])\n", " # Create a tool message\n", " result.append({\"role\": \"tool\", \"content\" : observation, \"tool_call_id\": tool_call[\"id\"]})\n", " \n", " # Add it to our messages\n", " return {\"messages\": result}"]}, {"cell_type": "markdown", "id": "4721dede", "metadata": {}, "source": ["#### Conditional Routing\n", "\n", "Our agent needs to decide when to continue using tools and when to stop. This conditional routing function directs the agent to either continue or terminate."]}, {"cell_type": "code", "execution_count": null, "id": "7c7cbea7", "metadata": {}, "outputs": [], "source": ["def should_continue(state: State) -> Literal[\"tool_handler\", END]:\n", " \"\"\"Route to tool handler, or end if Done tool called.\"\"\"\n", " \n", " # Get the last message\n", " messages = state[\"messages\"]\n", " last_message = messages[-1]\n", " \n", " # If the last message is a tool call, check if it's a Done tool call\n", " if last_message.tool_calls:\n", " for tool_call in last_message.tool_calls: \n", " if tool_call[\"name\"] == \"Done\":\n", " return END\n", " else:\n", " return \"tool_handler\""]}, {"cell_type": "markdown", "id": "6eb4ede8", "metadata": {}, "source": ["#### Agent Graph\n", "\n", "Finally, we can assemble all components:"]}, {"cell_type": "code", "execution_count": null, "id": "f81df767", "metadata": {}, "outputs": [], "source": ["from langgraph.graph import StateGraph, START, END\n", "from email_assistant.utils import show_graph\n", "\n", "# Build workflow\n", "overall_workflow = StateGraph(State)\n", "\n", "# Add nodes\n", "overall_workflow.add_node(\"llm_call\", llm_call)\n", "overall_workflow.add_node(\"tool_handler\", tool_handler)\n", "\n", "# Add edges\n", "overall_workflow.add_edge(START, \"llm_call\")\n", "overall_workflow.add_conditional_edges(\n", " \"llm_call\",\n", " should_continue,\n", " {\n", " \"tool_handler\": \"tool_handler\",\n", " END: END,\n", " },\n", ")\n", "overall_workflow.add_edge(\"tool_handler\", \"llm_call\")\n", "\n", "# Compile the agent\n", "agent = overall_workflow.compile()"]}, {"cell_type": "code", "execution_count": null, "id": "617f6373-bf48-44c2-ba33-000c9f22b067", "metadata": {}, "outputs": [], "source": ["# View\n", "show_graph(agent)"]}, {"cell_type": "markdown", "id": "dc8367c4", "metadata": {}, "source": ["This creates a graph that:\n", "1. Starts with an LLM decision\n", "2. Conditionally routes to tool execution or termination\n", "3. After tool execution, returns to LLM for the next decision\n", "4. Repeats until completion or no tool is called\n"]}, {"cell_type": "markdown", "id": "b2b3406d-496d-43c9-942e-c5ce7e3a8321", "metadata": {}, "source": ["### Combine workflow with our agent\n", "\n", "We can combine the router and the agent."]}, {"cell_type": "code", "execution_count": null, "id": "697f2548-b5a5-4fb6-8aed-226369e53e25", "metadata": {}, "outputs": [], "source": ["overall_workflow = (\n", " StateGraph(State)\n", " .add_node(triage_router)\n", " .add_node(\"response_agent\", agent)\n", " .add_edge(START, \"triage_router\")\n", ").compile()"]}, {"cell_type": "code", "execution_count": null, "id": "2dd6dcc4-6346-4d41-ae36-61f3fc83b7a7", "metadata": {}, "outputs": [], "source": ["show_graph(overall_workflow, xray=True)"]}, {"cell_type": "markdown", "id": "2091d5cc", "metadata": {}, "source": ["This is a higher-level composition where:\n", "1. First, the triage router analyzes the email\n", "2. If needed, the response agent handles crafting a response\n", "3. The workflow ends when either the triage decides no response is needed or the response agent completes"]}, {"cell_type": "code", "execution_count": null, "id": "070f18a6", "metadata": {}, "outputs": [], "source": ["email_input = {\n", " \"author\": \"System Admin \",\n", " \"to\": \"Development Team \",\n", " \"subject\": \"Scheduled maintenance - database downtime\",\n", " \"email_thread\": \"Hi team,\\n\\nThis is a reminder that we'll be performing scheduled maintenance on the production database tonight from 2AM to 4AM EST. During this time, all database services will be unavailable.\\n\\nPlease plan your work accordingly and ensure no critical deployments are scheduled during this window.\\n\\nThanks,\\nSystem Admin Team\"\n", "}\n", "\n", "# Run the agent\n", "response = overall_workflow.invoke({\"email_input\": email_input})\n", "for m in response[\"messages\"]:\n", " m.pretty_print()"]}, {"cell_type": "code", "execution_count": null, "id": "7a50ae0a-7bd1-4e69-90be-781b1e77b4dd", "metadata": {}, "outputs": [], "source": ["email_input = {\n", " \"author\": \"Alice Smith \",\n", " \"to\": \"John Doe \",\n", " \"subject\": \"Quick question about API documentation\",\n", " \"email_thread\": \"Hi John,\\nI was reviewing the API documentation for the new authentication service and noticed a few endpoints seem to be missing from the specs. Could you help clarify if this was intentional or if we should update the docs?\\nSpecifically, I'm looking at:\\n- /auth/refresh\\n- /auth/validate\\nThanks!\\nAlice\"\n", "}\n", "\n", "# Run the agent\n", "response = overall_workflow.invoke({\"email_input\": email_input})\n", "for m in response[\"messages\"]:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "f631f61f", "metadata": {}, "source": ["## Testing with Local Deployment\n", "\n", "You can find the file for our agent in the `src/email_assistant` directory:\n", "\n", "* `src/email_assistant/email_assistant.py`\n", "\n", "You can test them locally in LangGraph Studio by running:\n", "\n", "```\n", "! langgraph dev\n", "```"]}, {"cell_type": "markdown", "id": "12752016", "metadata": {"lines_to_next_cell": 0}, "source": ["Example e-mail you can test:"]}, {"cell_type": "code", "execution_count": null, "id": "08ee005a", "metadata": {}, "outputs": [], "source": ["{\n", " \"author\": \"Alice Smith \",\n", " \"to\": \"John Doe \",\n", " \"subject\": \"Quick question about API documentation\",\n", " \"email_thread\": \"Hi John,\\nI was reviewing the API documentation for the new authentication service and noticed a few endpoints seem to be missing from the specs. Could you help clarify if this was intentional or if we should update the docs?\\nSpecifically, I'm looking at:\\n- /auth/refresh\\n- /auth/validate\\nThanks!\\nAlice\"\n", "}"]}, {"cell_type": "markdown", "id": "d09e33b6", "metadata": {}, "source": ["![studio-img](img/studio.png)"]}], "metadata": {"jupytext": {"cell_metadata_filter": "-all", "main_language": "python", "notebook_metadata_filter": "-all"}, "kernelspec": {"display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6"}}, "nbformat": 4, "nbformat_minor": 5} diff --git a/notebooks/evaluation.ipynb b/notebooks/evaluation.ipynb index c8ca4ea..164df6f 100644 --- a/notebooks/evaluation.ipynb +++ b/notebooks/evaluation.ipynb @@ -1,584 +1 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "68e21aa1", - "metadata": {}, - "source": [ - "# Evaluating Agents\n", - "\n", - "We have an email assistant that uses a router to triage emails and then passes the email to the agent for response generation. How can we be sure that it will work well in production? This is why testing is important: it guides our decisions about our agent architecture with quantifiable metrics like response quality, token usage, latency, or triage accuracy. [LangSmith](https://docs.smith.langchain.com/) offers two primary ways to test agents. \n", - "\n", - "![overview-img](img/overview_eval.png)" - ] - }, - { - "cell_type": "markdown", - "id": "4d7f7048", - "metadata": {}, - "source": [ - "#### Load Environment Variables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c47d4c3d", - "metadata": {}, - "outputs": [], - "source": [ - "from dotenv import load_dotenv\n", - "load_dotenv(\"../.env\")" - ] - }, - { - "cell_type": "markdown", - "id": "2005c34d", - "metadata": {}, - "source": [ - "## How to run Evaluations\n", - "\n", - "#### Pytest / Vitest\n", - "\n", - "[Pytest](https://docs.pytest.org/en/stable/) and Vitest are well known to many developers as a powerful tools for writing tests within the Python and JavaScript ecosystems. LangSmith integrates with these frameworks to allow you to write and run tests that log results to LangSmith. For this notebook, we'll use Pytest.\n", - "* Pytest is a great way to get started for developers who are already familiar with their framework. \n", - "* Pytest is great for more complex evaluations, where each agent test case requires specific checks and success criteria that are harder to generalize.\n", - "\n", - "#### LangSmith Datasets \n", - "\n", - "You can also create a dataset [in LangSmith](https://docs.smith.langchain.com/evaluation) and run our assistant against the dataset using the LangSmith evaluate API.\n", - "* LangSmith datasets are great for teams who are collaboratively building out their test suite. \n", - "* You can leverage production traces, annotation queues, synthetic data generation, and more, to add examples to an ever-growing golden dataset.\n", - "* LangSmith datasets are great when you can define evaluators that can be applied to every test case in the dataset (ex. similarity, exact match accuracy, etc.)" - ] - }, - { - "cell_type": "markdown", - "id": "10b7c989", - "metadata": {}, - "source": [ - "## Test Cases\n", - "\n", - "Testing often starts with defining the test cases, which can be a challenging process. In this case, we'll just define a set of example emails we want to handle along with a few things to test. You can see the test cases in `eval/email_dataset.py`, which contains the following:\n", - "\n", - "1. **Input Emails**: A collection of diverse email examples\n", - "2. **Ground Truth Classifications**: `Respond`, `Notify`, `Ignore`\n", - "3. **Expected Tool Calls**: Tools called for each email that requires a response\n", - "4. **Response Criteria**: What makes a good response for emails requiring replies\n", - "\n", - "Note that we have both\n", - "- End to end \"integration\" tests (e.g. Input Emails -> Agent -> Final Output vs Response Criteria)\n", - "- Tests for specific steps in our workflow (e.g. Input Emails -> Agent -> Classification vs Ground Truth Classification)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f8fdc2b8", - "metadata": {}, - "outputs": [], - "source": [ - "%cd ..\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "from eval.email_dataset import email_inputs, expected_tool_calls, triage_outputs_list, response_criteria_list\n", - "\n", - "test_case_ix = 0\n", - "\n", - "print(\"Email Input:\", email_inputs[test_case_ix])\n", - "print(\"Expected Triage Output:\", triage_outputs_list[test_case_ix])\n", - "print(\"Expected Tool Calls:\", expected_tool_calls[test_case_ix])\n", - "print(\"Response Criteria:\", response_criteria_list[test_case_ix])" - ] - }, - { - "cell_type": "markdown", - "id": "2337bd7c", - "metadata": {}, - "source": [ - "## Pytest Example\n", - "\n", - "Let's take a look at how we can write a test for a specific part of our workflow with Pytest. We will test whether our `email_assistant` makes the right tool calls when responding to the emails." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ae92fe30", - "metadata": {}, - "outputs": [], - "source": [ - "import pytest\n", - "from eval.email_dataset import email_inputs, expected_tool_calls\n", - "from email_assistant.utils import format_messages_string\n", - "from email_assistant.email_assistant import email_assistant\n", - "from email_assistant.utils import extract_tool_calls\n", - "\n", - "from langsmith import testing as t\n", - "from dotenv import load_dotenv\n", - "\n", - "load_dotenv(\".env\", override=True)\n", - "\n", - "@pytest.mark.langsmith\n", - "@pytest.mark.parametrize(\n", - " \"email_input, expected_calls\",\n", - " [ # Pick some examples with e-mail reply expected\n", - " (email_inputs[0],expected_tool_calls[0]),\n", - " (email_inputs[3],expected_tool_calls[3]),\n", - " ],\n", - ")\n", - "def test_email_dataset_tool_calls(email_input, expected_calls):\n", - " \"\"\"Test if email processing contains expected tool calls.\"\"\"\n", - " # Run the email assistant\n", - " messages = [{\"role\": \"user\", \"content\": str(email_input)}]\n", - " result = email_assistant.invoke({\"messages\": messages})\n", - " \n", - " # Extract tool calls from messages list\n", - " extracted_tool_calls = extract_tool_calls(result['messages'])\n", - " \n", - " # Check if all expected tool calls are in the extracted ones\n", - " missing_calls = [call for call in expected_calls if call.lower() not in extracted_tool_calls]\n", - " \n", - " t.log_outputs({\n", - " \"missing_calls\": missing_calls,\n", - " \"extracted_tool_calls\": extracted_tool_calls,\n", - " \"response\": format_messages_string(result['messages'])\n", - " })\n", - "\n", - " # Test passes if no expected calls are missing\n", - " assert len(missing_calls) == 0" - ] - }, - { - "cell_type": "markdown", - "id": "700aba2a", - "metadata": {}, - "source": [ - "You'll notice a few things. \n", - "- To [run with Pytest and log test results to LangSmith](https://docs.smith.langchain.com/evaluation/how_to_guides/pytest), we only need to add the `@pytest.mark.langsmith ` decorator to our function and place it in a file, as you see in `notebooks/test_tools.py`. This will log the test results to LangSmith.\n", - "- Second, we can pass dataset examples to the test function as shown [here](https://docs.smith.langchain.com/evaluation/how_to_guides/pytest#parametrize-with-pytestmarkparametrize) via `@pytest.mark.parametrize`. \n", - "\n", - "#### Running Pytest\n", - "We can run the test from the command line. We've defined the above code in a python file. From the project root, run:\n", - "\n", - "`! LANGSMITH_TEST_SUITE='Email assistant: Test Tools For Interrupt' pytest notebooks/test_tools.py`" - ] - }, - { - "cell_type": "markdown", - "id": "53165e98", - "metadata": {}, - "source": [ - "#### Viewing Experiment Result\n", - "\n", - "We can view the results in the LangSmith UI. The `assert len(missing_calls) == 0` is logged to the `Pass` column in LangSmith. The `log_outputs` are passed to the `Outputs` column and function arguments are passed to the `Inputs` column. Each input passed in `@pytest.mark.parametrize(` is a separate row logged to the `LANGSMITH_TEST_SUITE` project name in LangSmith, which is found under `Datasets & Experiments`.\n", - "\n", - "![Test Results](img/test_result.png)" - ] - }, - { - "cell_type": "markdown", - "id": "fd325e27", - "metadata": {}, - "source": [ - "## LangSmith Datasets Example\n", - "\n", - "![overview-img](img/eval_detail.png)\n", - "\n", - "Let's take a look at how we can run evaluations with LangSmith datasets. In the previous example with Pytest, we evaluated the tool calling accuracy of the email assistant. Now, the dataset that we're going to evaluate here is specifically for the triage step of the email assistant, in classifying whether an email requires a response.\n", - "\n", - "#### Dataset Definition \n", - "\n", - "We can [create a dataset in LangSmith](https://docs.smith.langchain.com/evaluation/how_to_guides/manage_datasets_programmatically#create-a-dataset) with the LangSmith SDK. The below code creates a dataset with the test cases in the `eval/email_dataset.py` file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7ea997ac", - "metadata": {}, - "outputs": [], - "source": [ - "from langsmith import Client\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from eval.email_dataset import examples_triage\n", - "\n", - "# Initialize LangSmith client\n", - "client = Client()\n", - "\n", - "# Dataset name\n", - "dataset_name = \"Interrupt Workshop: E-mail Triage Dataset\"\n", - "\n", - "# Create dataset if it doesn't exist\n", - "if not client.has_dataset(dataset_name=dataset_name):\n", - " dataset = client.create_dataset(\n", - " dataset_name=dataset_name, \n", - " description=\"A dataset of e-mails and their triage decisions.\"\n", - " )\n", - " # Add examples to the dataset\n", - " client.create_examples(dataset_id=dataset.id, examples=examples_triage)" - ] - }, - { - "cell_type": "markdown", - "id": "0b2df606", - "metadata": {}, - "source": [ - "#### Target Function\n", - "\n", - "The dataset has the following structure, with an e-mail input and a ground truth triage classification for the e-mail as output." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "09c8b418-49fd-4fe9-adbb-7aeec1e8cf04", - "metadata": {}, - "outputs": [], - "source": [ - "# NOTE: This is just an example, this cell won't run\n", - "examples_triage = [\n", - " {\n", - " \"inputs\": {\"email_input\": email_input_1},\n", - " \"outputs\": {\"classification\": triage_output_1}, # NOTE: This becomes the reference_output in the created dataset\n", - " }, ...\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f7d7e83f-3006-4386-9230-786545c7b1a1", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Dataset Example Input (inputs):\", examples_triage[0]['inputs'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f292f070-7af6-4370-9338-e90bfd6b3d42", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Dataset Example Reference Output (reference_outputs):\", examples_triage[0]['outputs'])" - ] - }, - { - "cell_type": "markdown", - "id": "8290e820", - "metadata": {}, - "source": [ - "We define a function that takes the dataset inputs and passes them to our email assistant. The LangSmith [evaluate API](https://docs.smith.langchain.com/evaluation) passes the `inputs` dict to this function. This function then returns a dict with the agent's output. Because we are evaluating the triage step, we only need to return the classification decision. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0b9d1ded", - "metadata": {}, - "outputs": [], - "source": [ - "def target_email_assistant(inputs: dict) -> dict:\n", - " \"\"\"Process an email through the workflow-based email assistant.\"\"\"\n", - " response = email_assistant.invoke({\"email_input\": inputs[\"email_input\"]})\n", - " return {\"classification_decision\": response['classification_decision']}" - ] - }, - { - "cell_type": "markdown", - "id": "5ba6ec4c", - "metadata": {}, - "source": [ - "#### Evaluator Function \n", - "\n", - "Now, we create an evaluator function. What do we want to evaluate? We have reference outputs in our dataset and agent outputs defined in the functions above.\n", - "\n", - "* Reference outputs: `\"reference_outputs\": {\"classification\": triage_output_1} ...`\n", - "* Agent outputs: `\"outputs\": {\"classification_decision\": agent_output_1} ...`\n", - "\n", - "We want to evaluate if the agent's output matches the reference output. So we simply need a an evaluator function that compares the two, where `outputs` is the agent's output and `reference_outputs` is the reference output from the dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4fee7532", - "metadata": {}, - "outputs": [], - "source": [ - "def classification_evaluator(outputs: dict, reference_outputs: dict) -> bool:\n", - " \"\"\"Check if the answer exactly matches the expected answer.\"\"\"\n", - " return outputs[\"classification_decision\"].lower() == reference_outputs[\"classification\"].lower()" - ] - }, - { - "cell_type": "markdown", - "id": "50fd2de9", - "metadata": {}, - "source": [ - "### Running Evaluation\n", - "\n", - "Now, the question is: how are these things hooked together? The evaluate API takes care of it for us. It passes the `inputs` dict from our dataset the target function. It passes the `reference_outputs` dict from our dataset to the evaluator function. And it passes the `outputs` of our agent to the evaluator function. \n", - "\n", - "Note this is similar to what we did with Pytest: in Pytest, we passed in the dataset example inputs and reference outputs to the test function with `@pytest.mark.parametrize`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6807306d", - "metadata": {}, - "outputs": [], - "source": [ - "experiment_results_workflow = client.evaluate(\n", - " # Run agent \n", - " target_email_assistant,\n", - " # Dataset name \n", - " data=dataset_name,\n", - " # Evaluator\n", - " evaluators=[classification_evaluator],\n", - " # Name of the experiment\n", - " experiment_prefix=\"E-mail assistant workflow\", \n", - " # Number of concurrent evaluations\n", - " max_concurrency=2, \n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "76baff88", - "metadata": {}, - "source": [ - "We can view the results from both experiments in the LangSmith UI.\n", - "\n", - "![Test Results](img/eval.png)" - ] - }, - { - "cell_type": "markdown", - "id": "c5146b52", - "metadata": {}, - "source": [ - "## LLM-as-Judge Evaluation\n", - "\n", - "We've shown unit tests for the triage step (using evaluate()) and tool calling (using Pytest). \n", - "\n", - "We'll showcase how you could use an LLM as a judge to evaluate our agent's execution against a set of success criteria. \n", - "\n", - "![types](img/eval_types.png)\n", - "\n", - "First, we define a structured output schema for our LLM grader that contains a grade and justification for the grade." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e1d342b8", - "metadata": {}, - "outputs": [], - "source": [ - "from pydantic import BaseModel, Field\n", - "from langchain.chat_models import init_chat_model\n", - "\n", - "class CriteriaGrade(BaseModel):\n", - " \"\"\"Score the response against specific criteria.\"\"\"\n", - " grade: bool = Field(description=\"Does the response meet the provided criteria?\")\n", - " justification: str = Field(description=\"The justification for the grade and score, including specific examples from the response.\")\n", - "\n", - "# Create a global LLM for evaluation to avoid recreating it for each test\n", - "criteria_eval_llm = init_chat_model(\"openai:gpt-4o\")\n", - "criteria_eval_structured_llm = criteria_eval_llm.with_structured_output(CriteriaGrade)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bec02b18", - "metadata": {}, - "outputs": [], - "source": [ - "email_input = email_inputs[0]\n", - "print(\"Email Input:\", email_input)\n", - "success_criteria = response_criteria_list[0]\n", - "print(\"Success Criteria:\", success_criteria)" - ] - }, - { - "cell_type": "markdown", - "id": "38390ccd", - "metadata": {}, - "source": [ - "Our Email Assistant is invoked with the email input and the response is formatted into a string. These are all then passed to the LLM grader to receive a grade and justification for the grade." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cbff28fc", - "metadata": {}, - "outputs": [], - "source": [ - "response = email_assistant.invoke({\"email_input\": email_input})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d64619fb", - "metadata": {}, - "outputs": [], - "source": [ - "from eval.prompts import RESPONSE_CRITERIA_SYSTEM_PROMPT\n", - "\n", - "all_messages_str = format_messages_string(response['messages'])\n", - "eval_result = criteria_eval_structured_llm.invoke([\n", - " {\"role\": \"system\",\n", - " \"content\": RESPONSE_CRITERIA_SYSTEM_PROMPT},\n", - " {\"role\": \"user\",\n", - " \"content\": f\"\"\"\\n\\n Response criteria: {success_criteria} \\n\\n Assistant's response: \\n\\n {all_messages_str} \\n\\n Evaluate whether the assistant's response meets the criteria and provide justification for your evaluation.\"\"\"}\n", - " ])\n", - "eval_result" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "64275647-6fdb-4bf3-806b-4dbc770cbd6f", - "metadata": {}, - "outputs": [], - "source": [ - "RESPONSE_CRITERIA_SYSTEM_PROMPT" - ] - }, - { - "cell_type": "markdown", - "id": "7994952c", - "metadata": {}, - "source": [ - "We can see that the LLM grader returns an eval result with a schema matching our `CriteriaGrade` base model." - ] - }, - { - "cell_type": "markdown", - "id": "0b44111d", - "metadata": {}, - "source": [ - "## Running against a Larger Test Suite\n", - "Now that we've seen how to evaluate our agent using Pytest and evaluate(), and seen an example of using an LLM as a judge, we can use evaluations over a bigger test suite to get a better sense of how our agent performs over a wider variety of examples." - ] - }, - { - "cell_type": "markdown", - "id": "9280d5ae-3070-4131-8763-454073176081", - "metadata": {}, - "source": [ - "Let's run our email_assistant against a larger test suite.\n", - "```\n", - "! LANGSMITH_TEST_SUITE='Email assistant: Test Full Response Interrupt' LANGSMITH_EXPERIMENT='email_assistant' pytest tests/test_response.py --agent-module email_assistant\n", - "```\n", - "\n", - "In `test_response.py`, you can see a few things. \n", - "\n", - "We pass our dataset examples into functions that will run pytest and log to our `LANGSMITH_TEST_SUITE`:\n", - "\n", - "```\n", - "# Reference output key\n", - "@pytest.mark.langsmith(output_keys=[\"criteria\"])\n", - "# Variable names and a list of tuples with the test cases\n", - "# Each test case is (email_input, email_name, criteria, expected_calls)\n", - "@pytest.mark.parametrize(\"email_input,email_name,criteria,expected_calls\",create_response_test_cases())\n", - "def test_response_criteria_evaluation(email_input, email_name, criteria, expected_calls):\n", - "```\n", - "\n", - "We use LLM-as-judge with a grading schema:\n", - "```\n", - "class CriteriaGrade(BaseModel):\n", - " \"\"\"Score the response against specific criteria.\"\"\"\n", - " grade: bool = Field(description=\"Does the response meet the provided criteria?\")\n", - " justification: str = Field(description=\"The justification for the grade and score, including specific examples from the response.\")\n", - "```\n", - "\n", - "\n", - "We evaluate the agent response relative to the criteria:\n", - "```\n", - " # Evaluate against criteria\n", - " eval_result = criteria_eval_structured_llm.invoke([\n", - " {\"role\": \"system\",\n", - " \"content\": RESPONSE_CRITERIA_SYSTEM_PROMPT},\n", - " {\"role\": \"user\",\n", - " \"content\": f\"\"\"\\n\\n Response criteria: {criteria} \\n\\n Assistant's response: \\n\\n {all_messages_str} \\n\\n Evaluate whether the assistant's response meets the criteria and provide justification for your evaluation.\"\"\"}\n", - " ])\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "ca836fbf", - "metadata": {}, - "source": [ - "Now let's take a look at this experiment in the LangSmith UI and look into what our agent did well, and what it could improve on.\n", - "\n", - "#### Getting Results\n", - "\n", - "We can also get the results of the evaluation by reading the tracing project associated with our experiment. This is great for creating custom visualizations of our agent's performance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "70b655f8", - "metadata": { - "lines_to_next_cell": 0 - }, - "outputs": [], - "source": [ - "# TODO: Copy your experiment name here\n", - "experiment_name = \"email_assistant:8286b3b8\"\n", - "email_assistant_experiment_results = client.read_project(project_name=experiment_name, include_stats=True)\n", - "\n", - "print(\"Latency p50:\", email_assistant_experiment_results.latency_p50)\n", - "print(\"Latency p99:\", email_assistant_experiment_results.latency_p99)\n", - "print(\"Token Usage:\", email_assistant_experiment_results.total_tokens)\n", - "print(\"Feedback Stats:\", email_assistant_experiment_results.feedback_stats)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0ccdfaa6", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "jupytext": { - "cell_metadata_filter": "-all", - "main_language": "python", - "notebook_metadata_filter": "-all" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} +{"cells": [{"cell_type": "markdown", "id": "68e21aa1", "metadata": {}, "source": ["# Evaluating Agents\n", "\n", "We have an email assistant that uses a router to triage emails and then passes the email to the agent for response generation. How can we be sure that it will work well in production? This is why testing is important: it guides our decisions about our agent architecture with quantifiable metrics like response quality, token usage, latency, or triage accuracy. [LangSmith](https://docs.smith.langchain.com/) offers two primary ways to test agents. \n", "\n", "![overview-img](img/overview_eval.png)"]}, {"cell_type": "markdown", "id": "4d7f7048", "metadata": {}, "source": ["#### Load Environment Variables"]}, {"cell_type": "code", "execution_count": null, "id": "c47d4c3d", "metadata": {}, "outputs": [], "source": ["from dotenv import load_dotenv\n", "load_dotenv(\"../.env\")"]}, {"cell_type": "markdown", "id": "2005c34d", "metadata": {}, "source": ["## How to run Evaluations\n", "\n", "#### Pytest / Vitest\n", "\n", "[Pytest](https://docs.pytest.org/en/stable/) and Vitest are well known to many developers as a powerful tools for writing tests within the Python and JavaScript ecosystems. LangSmith integrates with these frameworks to allow you to write and run tests that log results to LangSmith. For this notebook, we'll use Pytest.\n", "* Pytest is a great way to get started for developers who are already familiar with their framework. \n", "* Pytest is great for more complex evaluations, where each agent test case requires specific checks and success criteria that are harder to generalize.\n", "\n", "#### LangSmith Datasets \n", "\n", "You can also create a dataset [in LangSmith](https://docs.smith.langchain.com/evaluation) and run our assistant against the dataset using the LangSmith evaluate API.\n", "* LangSmith datasets are great for teams who are collaboratively building out their test suite. \n", "* You can leverage production traces, annotation queues, synthetic data generation, and more, to add examples to an ever-growing golden dataset.\n", "* LangSmith datasets are great when you can define evaluators that can be applied to every test case in the dataset (ex. similarity, exact match accuracy, etc.)"]}, {"cell_type": "markdown", "id": "10b7c989", "metadata": {}, "source": ["## Test Cases\n", "\n", "Testing often starts with defining the test cases, which can be a challenging process. In this case, we'll just define a set of example emails we want to handle along with a few things to test. You can see the test cases in `eval/email_dataset.py`, which contains the following:\n", "\n", "1. **Input Emails**: A collection of diverse email examples\n", "2. **Ground Truth Classifications**: `Respond`, `Notify`, `Ignore`\n", "3. **Expected Tool Calls**: Tools called for each email that requires a response\n", "4. **Response Criteria**: What makes a good response for emails requiring replies\n", "\n", "Note that we have both\n", "- End to end \"integration\" tests (e.g. Input Emails -> Agent -> Final Output vs Response Criteria)\n", "- Tests for specific steps in our workflow (e.g. Input Emails -> Agent -> Classification vs Ground Truth Classification)\n"]}, {"cell_type": "code", "execution_count": null, "id": "f8fdc2b8", "metadata": {}, "outputs": [], "source": ["%load_ext autoreload\n", "%autoreload 2\n", "\n", "from eval.email_dataset import email_inputs, expected_tool_calls, triage_outputs_list, response_criteria_list\n", "\n", "test_case_ix = 0\n", "\n", "print(\"Email Input:\", email_inputs[test_case_ix])\n", "print(\"Expected Triage Output:\", triage_outputs_list[test_case_ix])\n", "print(\"Expected Tool Calls:\", expected_tool_calls[test_case_ix])\n", "print(\"Response Criteria:\", response_criteria_list[test_case_ix])"]}, {"cell_type": "markdown", "id": "2337bd7c", "metadata": {}, "source": ["## Pytest Example\n", "\n", "Let's take a look at how we can write a test for a specific part of our workflow with Pytest. We will test whether our `email_assistant` makes the right tool calls when responding to the emails."]}, {"cell_type": "code", "execution_count": null, "id": "ae92fe30", "metadata": {}, "outputs": [], "source": ["import pytest\n", "from eval.email_dataset import email_inputs, expected_tool_calls\n", "from email_assistant.utils import format_messages_string\n", "from email_assistant.email_assistant import email_assistant\n", "from email_assistant.utils import extract_tool_calls\n", "\n", "from langsmith import testing as t\n", "from dotenv import load_dotenv\n", "\n", "load_dotenv(\".env\", override=True)\n", "\n", "@pytest.mark.langsmith\n", "@pytest.mark.parametrize(\n", " \"email_input, expected_calls\",\n", " [ # Pick some examples with e-mail reply expected\n", " (email_inputs[0],expected_tool_calls[0]),\n", " (email_inputs[3],expected_tool_calls[3]),\n", " ],\n", ")\n", "def test_email_dataset_tool_calls(email_input, expected_calls):\n", " \"\"\"Test if email processing contains expected tool calls.\"\"\"\n", " # Run the email assistant\n", " messages = [{\"role\": \"user\", \"content\": str(email_input)}]\n", " result = email_assistant.invoke({\"messages\": messages})\n", " \n", " # Extract tool calls from messages list\n", " extracted_tool_calls = extract_tool_calls(result['messages'])\n", " \n", " # Check if all expected tool calls are in the extracted ones\n", " missing_calls = [call for call in expected_calls if call.lower() not in extracted_tool_calls]\n", " \n", " t.log_outputs({\n", " \"missing_calls\": missing_calls,\n", " \"extracted_tool_calls\": extracted_tool_calls,\n", " \"response\": format_messages_string(result['messages'])\n", " })\n", "\n", " # Test passes if no expected calls are missing\n", " assert len(missing_calls) == 0"]}, {"cell_type": "markdown", "id": "700aba2a", "metadata": {}, "source": ["You'll notice a few things. \n", "- To [run with Pytest and log test results to LangSmith](https://docs.smith.langchain.com/evaluation/how_to_guides/pytest), we only need to add the `@pytest.mark.langsmith ` decorator to our function and place it in a file, as you see in `notebooks/test_tools.py`. This will log the test results to LangSmith.\n", "- Second, we can pass dataset examples to the test function as shown [here](https://docs.smith.langchain.com/evaluation/how_to_guides/pytest#parametrize-with-pytestmarkparametrize) via `@pytest.mark.parametrize`. \n", "\n", "#### Running Pytest\n", "We can run the test from the command line. We've defined the above code in a python file. From the project root, run:\n", "\n", "`! LANGSMITH_TEST_SUITE='Email assistant: Test Tools For Interrupt' pytest notebooks/test_tools.py`"]}, {"cell_type": "markdown", "id": "53165e98", "metadata": {}, "source": ["#### Viewing Experiment Result\n", "\n", "We can view the results in the LangSmith UI. The `assert len(missing_calls) == 0` is logged to the `Pass` column in LangSmith. The `log_outputs` are passed to the `Outputs` column and function arguments are passed to the `Inputs` column. Each input passed in `@pytest.mark.parametrize(` is a separate row logged to the `LANGSMITH_TEST_SUITE` project name in LangSmith, which is found under `Datasets & Experiments`.\n", "\n", "![Test Results](img/test_result.png)"]}, {"cell_type": "markdown", "id": "fd325e27", "metadata": {}, "source": ["## LangSmith Datasets Example\n", "\n", "![overview-img](img/eval_detail.png)\n", "\n", "Let's take a look at how we can run evaluations with LangSmith datasets. In the previous example with Pytest, we evaluated the tool calling accuracy of the email assistant. Now, the dataset that we're going to evaluate here is specifically for the triage step of the email assistant, in classifying whether an email requires a response.\n", "\n", "#### Dataset Definition \n", "\n", "We can [create a dataset in LangSmith](https://docs.smith.langchain.com/evaluation/how_to_guides/manage_datasets_programmatically#create-a-dataset) with the LangSmith SDK. The below code creates a dataset with the test cases in the `eval/email_dataset.py` file."]}, {"cell_type": "code", "execution_count": null, "id": "7ea997ac", "metadata": {}, "outputs": [], "source": ["from langsmith import Client\n", "import matplotlib.pyplot as plt\n", "\n", "from eval.email_dataset import examples_triage\n", "\n", "# Initialize LangSmith client\n", "client = Client()\n", "\n", "# Dataset name\n", "dataset_name = \"Interrupt Workshop: E-mail Triage Dataset\"\n", "\n", "# Create dataset if it doesn't exist\n", "if not client.has_dataset(dataset_name=dataset_name):\n", " dataset = client.create_dataset(\n", " dataset_name=dataset_name, \n", " description=\"A dataset of e-mails and their triage decisions.\"\n", " )\n", " # Add examples to the dataset\n", " client.create_examples(dataset_id=dataset.id, examples=examples_triage)"]}, {"cell_type": "markdown", "id": "0b2df606", "metadata": {}, "source": ["#### Target Function\n", "\n", "The dataset has the following structure, with an e-mail input and a ground truth triage classification for the e-mail as output."]}, {"cell_type": "code", "execution_count": null, "id": "09c8b418-49fd-4fe9-adbb-7aeec1e8cf04", "metadata": {}, "outputs": [], "source": ["# NOTE: This is just an example, this cell won't run\n", "examples_triage = [\n", " {\n", " \"inputs\": {\"email_input\": email_input_1},\n", " \"outputs\": {\"classification\": triage_output_1}, # NOTE: This becomes the reference_output in the created dataset\n", " }, ...\n", "]"]}, {"cell_type": "code", "execution_count": null, "id": "f7d7e83f-3006-4386-9230-786545c7b1a1", "metadata": {}, "outputs": [], "source": ["print(\"Dataset Example Input (inputs):\", examples_triage[0]['inputs'])"]}, {"cell_type": "code", "execution_count": null, "id": "f292f070-7af6-4370-9338-e90bfd6b3d42", "metadata": {}, "outputs": [], "source": ["print(\"Dataset Example Reference Output (reference_outputs):\", examples_triage[0]['outputs'])"]}, {"cell_type": "markdown", "id": "8290e820", "metadata": {}, "source": ["We define a function that takes the dataset inputs and passes them to our email assistant. The LangSmith [evaluate API](https://docs.smith.langchain.com/evaluation) passes the `inputs` dict to this function. This function then returns a dict with the agent's output. Because we are evaluating the triage step, we only need to return the classification decision. "]}, {"cell_type": "code", "execution_count": null, "id": "0b9d1ded", "metadata": {}, "outputs": [], "source": ["def target_email_assistant(inputs: dict) -> dict:\n", " \"\"\"Process an email through the workflow-based email assistant.\"\"\"\n", " response = email_assistant.invoke({\"email_input\": inputs[\"email_input\"]})\n", " return {\"classification_decision\": response['classification_decision']}"]}, {"cell_type": "markdown", "id": "5ba6ec4c", "metadata": {}, "source": ["#### Evaluator Function \n", "\n", "Now, we create an evaluator function. What do we want to evaluate? We have reference outputs in our dataset and agent outputs defined in the functions above.\n", "\n", "* Reference outputs: `\"reference_outputs\": {\"classification\": triage_output_1} ...`\n", "* Agent outputs: `\"outputs\": {\"classification_decision\": agent_output_1} ...`\n", "\n", "We want to evaluate if the agent's output matches the reference output. So we simply need a an evaluator function that compares the two, where `outputs` is the agent's output and `reference_outputs` is the reference output from the dataset."]}, {"cell_type": "code", "execution_count": null, "id": "4fee7532", "metadata": {}, "outputs": [], "source": ["def classification_evaluator(outputs: dict, reference_outputs: dict) -> bool:\n", " \"\"\"Check if the answer exactly matches the expected answer.\"\"\"\n", " return outputs[\"classification_decision\"].lower() == reference_outputs[\"classification\"].lower()"]}, {"cell_type": "markdown", "id": "50fd2de9", "metadata": {}, "source": ["### Running Evaluation\n", "\n", "Now, the question is: how are these things hooked together? The evaluate API takes care of it for us. It passes the `inputs` dict from our dataset the target function. It passes the `reference_outputs` dict from our dataset to the evaluator function. And it passes the `outputs` of our agent to the evaluator function. \n", "\n", "Note this is similar to what we did with Pytest: in Pytest, we passed in the dataset example inputs and reference outputs to the test function with `@pytest.mark.parametrize`."]}, {"cell_type": "code", "execution_count": null, "id": "6807306d", "metadata": {}, "outputs": [], "source": ["experiment_results_workflow = client.evaluate(\n", " # Run agent \n", " target_email_assistant,\n", " # Dataset name \n", " data=dataset_name,\n", " # Evaluator\n", " evaluators=[classification_evaluator],\n", " # Name of the experiment\n", " experiment_prefix=\"E-mail assistant workflow\", \n", " # Number of concurrent evaluations\n", " max_concurrency=2, \n", ")"]}, {"cell_type": "markdown", "id": "76baff88", "metadata": {}, "source": ["We can view the results from both experiments in the LangSmith UI.\n", "\n", "![Test Results](img/eval.png)"]}, {"cell_type": "markdown", "id": "c5146b52", "metadata": {}, "source": ["## LLM-as-Judge Evaluation\n", "\n", "We've shown unit tests for the triage step (using evaluate()) and tool calling (using Pytest). \n", "\n", "We'll showcase how you could use an LLM as a judge to evaluate our agent's execution against a set of success criteria. \n", "\n", "![types](img/eval_types.png)\n", "\n", "First, we define a structured output schema for our LLM grader that contains a grade and justification for the grade."]}, {"cell_type": "code", "execution_count": null, "id": "e1d342b8", "metadata": {}, "outputs": [], "source": ["from pydantic import BaseModel, Field\n", "from langchain.chat_models import init_chat_model\n", "\n", "class CriteriaGrade(BaseModel):\n", " \"\"\"Score the response against specific criteria.\"\"\"\n", " grade: bool = Field(description=\"Does the response meet the provided criteria?\")\n", " justification: str = Field(description=\"The justification for the grade and score, including specific examples from the response.\")\n", "\n", "# Create a global LLM for evaluation to avoid recreating it for each test\n", "criteria_eval_llm = init_chat_model(\"openai:gpt-4o\")\n", "criteria_eval_structured_llm = criteria_eval_llm.with_structured_output(CriteriaGrade)"]}, {"cell_type": "code", "execution_count": null, "id": "bec02b18", "metadata": {}, "outputs": [], "source": ["email_input = email_inputs[0]\n", "print(\"Email Input:\", email_input)\n", "success_criteria = response_criteria_list[0]\n", "print(\"Success Criteria:\", success_criteria)"]}, {"cell_type": "markdown", "id": "38390ccd", "metadata": {}, "source": ["Our Email Assistant is invoked with the email input and the response is formatted into a string. These are all then passed to the LLM grader to receive a grade and justification for the grade."]}, {"cell_type": "code", "execution_count": null, "id": "cbff28fc", "metadata": {}, "outputs": [], "source": ["response = email_assistant.invoke({\"email_input\": email_input})"]}, {"cell_type": "code", "execution_count": null, "id": "d64619fb", "metadata": {}, "outputs": [], "source": ["from eval.prompts import RESPONSE_CRITERIA_SYSTEM_PROMPT\n", "\n", "all_messages_str = format_messages_string(response['messages'])\n", "eval_result = criteria_eval_structured_llm.invoke([\n", " {\"role\": \"system\",\n", " \"content\": RESPONSE_CRITERIA_SYSTEM_PROMPT},\n", " {\"role\": \"user\",\n", " \"content\": f\"\"\"\\n\\n Response criteria: {success_criteria} \\n\\n Assistant's response: \\n\\n {all_messages_str} \\n\\n Evaluate whether the assistant's response meets the criteria and provide justification for your evaluation.\"\"\"}\n", " ])\n", "eval_result"]}, {"cell_type": "code", "execution_count": null, "id": "64275647-6fdb-4bf3-806b-4dbc770cbd6f", "metadata": {}, "outputs": [], "source": ["RESPONSE_CRITERIA_SYSTEM_PROMPT"]}, {"cell_type": "markdown", "id": "7994952c", "metadata": {}, "source": ["We can see that the LLM grader returns an eval result with a schema matching our `CriteriaGrade` base model."]}, {"cell_type": "markdown", "id": "0b44111d", "metadata": {}, "source": ["## Running against a Larger Test Suite\n", "Now that we've seen how to evaluate our agent using Pytest and evaluate(), and seen an example of using an LLM as a judge, we can use evaluations over a bigger test suite to get a better sense of how our agent performs over a wider variety of examples."]}, {"cell_type": "markdown", "id": "9280d5ae-3070-4131-8763-454073176081", "metadata": {}, "source": ["Let's run our email_assistant against a larger test suite.\n", "```\n", "! LANGSMITH_TEST_SUITE='Email assistant: Test Full Response Interrupt' LANGSMITH_EXPERIMENT='email_assistant' pytest tests/test_response.py --agent-module email_assistant\n", "```\n", "\n", "In `test_response.py`, you can see a few things. \n", "\n", "We pass our dataset examples into functions that will run pytest and log to our `LANGSMITH_TEST_SUITE`:\n", "\n", "```\n", "# Reference output key\n", "@pytest.mark.langsmith(output_keys=[\"criteria\"])\n", "# Variable names and a list of tuples with the test cases\n", "# Each test case is (email_input, email_name, criteria, expected_calls)\n", "@pytest.mark.parametrize(\"email_input,email_name,criteria,expected_calls\",create_response_test_cases())\n", "def test_response_criteria_evaluation(email_input, email_name, criteria, expected_calls):\n", "```\n", "\n", "We use LLM-as-judge with a grading schema:\n", "```\n", "class CriteriaGrade(BaseModel):\n", " \"\"\"Score the response against specific criteria.\"\"\"\n", " grade: bool = Field(description=\"Does the response meet the provided criteria?\")\n", " justification: str = Field(description=\"The justification for the grade and score, including specific examples from the response.\")\n", "```\n", "\n", "\n", "We evaluate the agent response relative to the criteria:\n", "```\n", " # Evaluate against criteria\n", " eval_result = criteria_eval_structured_llm.invoke([\n", " {\"role\": \"system\",\n", " \"content\": RESPONSE_CRITERIA_SYSTEM_PROMPT},\n", " {\"role\": \"user\",\n", " \"content\": f\"\"\"\\n\\n Response criteria: {criteria} \\n\\n Assistant's response: \\n\\n {all_messages_str} \\n\\n Evaluate whether the assistant's response meets the criteria and provide justification for your evaluation.\"\"\"}\n", " ])\n", "```"]}, {"cell_type": "markdown", "id": "ca836fbf", "metadata": {}, "source": ["Now let's take a look at this experiment in the LangSmith UI and look into what our agent did well, and what it could improve on.\n", "\n", "#### Getting Results\n", "\n", "We can also get the results of the evaluation by reading the tracing project associated with our experiment. This is great for creating custom visualizations of our agent's performance."]}, {"cell_type": "code", "execution_count": null, "id": "70b655f8", "metadata": {"lines_to_next_cell": 0}, "outputs": [], "source": ["# TODO: Copy your experiment name here\n", "experiment_name = \"email_assistant:8286b3b8\"\n", "email_assistant_experiment_results = client.read_project(project_name=experiment_name, include_stats=True)\n", "\n", "print(\"Latency p50:\", email_assistant_experiment_results.latency_p50)\n", "print(\"Latency p99:\", email_assistant_experiment_results.latency_p99)\n", "print(\"Token Usage:\", email_assistant_experiment_results.total_tokens)\n", "print(\"Feedback Stats:\", email_assistant_experiment_results.feedback_stats)"]}, {"cell_type": "code", "execution_count": null, "id": "0ccdfaa6", "metadata": {}, "outputs": [], "source": []}], "metadata": {"jupytext": {"cell_metadata_filter": "-all", "main_language": "python", "notebook_metadata_filter": "-all"}, "kernelspec": {"display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6"}}, "nbformat": 4, "nbformat_minor": 5} diff --git a/notebooks/hitl.ipynb b/notebooks/hitl.ipynb index 17024a6..5374aec 100644 --- a/notebooks/hitl.ipynb +++ b/notebooks/hitl.ipynb @@ -1,1374 +1 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "07c57479", - "metadata": {}, - "source": [ - "# Agents with Human-in-the-Loop\n", - "\n", - "We have an email assistant that uses a router to triage emails and then passes the email to the agent for response generation. We've also evaluated it. But do we fully *trust* it to manage our inbox autonomously? For such a sensitive task, human-in-the-loop (HITL) is important! Here we'll show how to add a human-in-the-loop to our email assistant so that we can review specific tool calls. \n", - "\n", - "![overview-img](img/overview_hitl.png)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "c8f73f12", - "metadata": {}, - "source": [ - "We're going to show how to make the graph *pause* at specific points and await human input.\n", - "\n", - "![overview-img](img/hitl_schematic.png)" - ] - }, - { - "cell_type": "markdown", - "id": "52e3532e", - "metadata": {}, - "source": [ - "#### Load Environment Variables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a57594a7", - "metadata": {}, - "outputs": [], - "source": [ - "from dotenv import load_dotenv\n", - "load_dotenv(\"../.env\")" - ] - }, - { - "cell_type": "markdown", - "id": "2566464d", - "metadata": {}, - "source": [ - "## Adding HITL to our email assistant\n", - "\n", - "Let's add HITL to our email assistant. \n", - "\n", - "We can start with tools, just as we did before. \n", - "\n", - "But now, we'll add a new tool Question that allows the assistant to ask the user a question." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6d4dfb07", - "metadata": {}, - "outputs": [], - "source": [ - "%cd ..\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "from typing import Literal\n", - "from datetime import datetime\n", - "from pydantic import BaseModel\n", - "\n", - "from langchain.chat_models import init_chat_model\n", - "from langchain_core.tools import tool\n", - "\n", - "from langgraph.graph import StateGraph, START, END\n", - "from langgraph.types import interrupt, Command\n", - "\n", - "from email_assistant.prompts import triage_system_prompt, triage_user_prompt, agent_system_prompt_hitl, default_background, default_triage_instructions, default_response_preferences, default_cal_preferences\n", - "from src.email_assistant.tools.default.prompt_templates import HITL_TOOLS_PROMPT\n", - "from email_assistant.schemas import State, RouterSchema, StateInput\n", - "from email_assistant.utils import parse_email, format_for_display, format_email_markdown\n", - "\n", - "# Agent tools \n", - "@tool\n", - "def write_email(to: str, subject: str, content: str) -> str:\n", - " \"\"\"Write and send an email.\"\"\"\n", - " # Placeholder response - in real app would send email\n", - " return f\"Email sent to {to} with subject '{subject}' and content: {content}\"\n", - "\n", - "@tool\n", - "def schedule_meeting(\n", - " attendees: list[str], subject: str, duration_minutes: int, preferred_day: datetime, start_time: int\n", - ") -> str:\n", - " \"\"\"Schedule a calendar meeting.\"\"\"\n", - " # Placeholder response - in real app would check calendar and schedule\n", - " date_str = preferred_day.strftime(\"%A, %B %d, %Y\")\n", - " return f\"Meeting '{subject}' scheduled on {date_str} at {start_time} for {duration_minutes} minutes with {len(attendees)} attendees\"\n", - "\n", - "@tool\n", - "def check_calendar_availability(day: str) -> str:\n", - " \"\"\"Check calendar availability for a given day.\"\"\"\n", - " # Placeholder response - in real app would check actual calendar\n", - " return f\"Available times on {day}: 9:00 AM, 2:00 PM, 4:00 PM\"\n", - "\n", - "@tool\n", - "# This is new! \n", - "class Question(BaseModel):\n", - " \"\"\"Question to ask user.\"\"\"\n", - " content: str\n", - " \n", - "@tool\n", - "class Done(BaseModel):\n", - " \"\"\"E-mail has been sent.\"\"\"\n", - " done: bool\n", - "\n", - "# All tools available to the agent\n", - "tools = [\n", - " write_email, \n", - " schedule_meeting, \n", - " check_calendar_availability, \n", - " Question, \n", - " Done,\n", - "]\n", - "\n", - "tools_by_name = {tool.name: tool for tool in tools}\n", - "\n", - "# Initialize the LLM for use with router / structured output\n", - "llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0)\n", - "llm_router = llm.with_structured_output(RouterSchema) \n", - "\n", - "# Initialize the LLM, enforcing tool use (of any available tools) for agent\n", - "llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0)\n", - "llm_with_tools = llm.bind_tools(tools, tool_choice=\"required\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf05b260-9809-4f32-807b-abe1632e4181", - "metadata": {}, - "outputs": [], - "source": [ - "HITL_TOOLS_PROMPT" - ] - }, - { - "cell_type": "markdown", - "id": "b9f8f334", - "metadata": {}, - "source": [ - "#### Triage node\n", - "\n", - "We define a python function with our triage routing logic, just as we did before.\n", - "\n", - "But, if the classification is `notify`, we want to interrupt the graph to allow the user to review the email! \n", - "\n", - "So we go to a new node, `triage_interrupt_handler`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "65efb689", - "metadata": {}, - "outputs": [], - "source": [ - "def triage_router(state: State) -> Command[Literal[\"triage_interrupt_handler\", \"response_agent\", \"__end__\"]]:\n", - " \"\"\"Analyze email content to decide if we should respond, notify, or ignore.\"\"\"\n", - "\n", - " # Parse the email input\n", - " author, to, subject, email_thread = parse_email(state[\"email_input\"])\n", - " user_prompt = triage_user_prompt.format(\n", - " author=author, to=to, subject=subject, email_thread=email_thread\n", - " )\n", - "\n", - " # Create email markdown for Agent Inbox in case of notification \n", - " email_markdown = format_email_markdown(subject, author, to, email_thread)\n", - "\n", - " # Format system prompt with background and triage instructions\n", - " system_prompt = triage_system_prompt.format(\n", - " background=default_background,\n", - " triage_instructions=default_triage_instructions\n", - " )\n", - "\n", - " # Run the router LLM\n", - " result = llm_router.invoke(\n", - " [\n", - " {\"role\": \"system\", \"content\": system_prompt},\n", - " {\"role\": \"user\", \"content\": user_prompt},\n", - " ]\n", - " )\n", - "\n", - " # Decision\n", - " classification = result.classification\n", - "\n", - " # Process the classification decision\n", - " if classification == \"respond\":\n", - " print(\"📧 Classification: RESPOND - This email requires a response\")\n", - " # Next node\n", - " goto = \"response_agent\"\n", - " # Update the state\n", - " update = {\n", - " \"classification_decision\": result.classification,\n", - " \"messages\": [{\"role\": \"user\",\n", - " \"content\": f\"Respond to the email: {email_markdown}\"\n", - " }],\n", - " }\n", - " elif classification == \"ignore\":\n", - " print(\"🚫 Classification: IGNORE - This email can be safely ignored\")\n", - " # Next node\n", - " goto = END\n", - " # Update the state\n", - " update = {\n", - " \"classification_decision\": classification,\n", - " }\n", - "\n", - " elif classification == \"notify\":\n", - " print(\"🔔 Classification: NOTIFY - This email contains important information\") \n", - " # This is new! \n", - " goto = \"triage_interrupt_handler\"\n", - " # Update the state\n", - " update = {\n", - " \"classification_decision\": classification,\n", - " }\n", - "\n", - " else:\n", - " raise ValueError(f\"Invalid classification: {classification}\")\n", - " return Command(goto=goto, update=update)" - ] - }, - { - "cell_type": "markdown", - "id": "4a1f564a", - "metadata": {}, - "source": [ - "#### Triage Interrupt Handler\n", - "\n", - "If the decision is to `notify` the user, we interrupt the graph! \n", - "\n", - "![overview-img](img/HITL_flow_triage.png)\n", - "\n", - "For this, we add a new node, `triage_interrupt_handler`, that will: \n", - "\n", - "1. Show the classification to the user if it is `notify`: We'll pass a `dict` to the interrupt that contains our classification. \n", - "2. Allow the user to respond to the decision: We'll design the code to handle what we will get back from Agent Inbox. \n", - "\n", - "As you can see [here](https://github.com/langchain-ai/agent-inbox?tab=readme-ov-file#what-do-the-fields-mean), we format our interrupt with specific fields so that it can be viewed in Agent Inbox:\n", - "\n", - "* `action_request`: The action and arguments for the interrupt with `action` (the action name) and `args` (the tool call arguments). This is rendered in the Agent Inbox as the main header for the interrupt event.\n", - "* `config`: Configures which interaction types are allowed, and specific UI elements for each. \n", - "* `description`: Should be detailed, and may be markdown. This will be rendered in the Agent Inbox as the description\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "203346bb", - "metadata": {}, - "outputs": [], - "source": [ - "def triage_interrupt_handler(state: State) -> Command[Literal[\"response_agent\", \"__end__\"]]:\n", - " \"\"\"Handles interrupts from the triage step.\"\"\"\n", - " \n", - " # Parse the email input\n", - " author, to, subject, email_thread = parse_email(state[\"email_input\"])\n", - "\n", - " # Create email markdown for Agent Inbox in case of notification \n", - " email_markdown = format_email_markdown(subject, author, to, email_thread)\n", - "\n", - " # Create messages\n", - " messages = [{\"role\": \"user\",\n", - " \"content\": f\"Email to notify user about: {email_markdown}\"\n", - " }]\n", - "\n", - " # Create interrupt that is shown to the user\n", - " request = {\n", - " \"action_request\": {\n", - " \"action\": f\"Email Assistant: {state['classification_decision']}\",\n", - " \"args\": {}\n", - " },\n", - " \"config\": {\n", - " \"allow_ignore\": True, \n", - " \"allow_respond\": True, \n", - " \"allow_edit\": False, \n", - " \"allow_accept\": False, \n", - " },\n", - " # Email to show in Agent Inbox\n", - " \"description\": email_markdown,\n", - " }\n", - "\n", - " # Agent Inbox responds with a list of dicts with a single key `type` that can be `accept`, `edit`, `ignore`, or `response`. \n", - " response = interrupt([request])[0]\n", - "\n", - " # If user provides feedback, go to response agent and use feedback to respond to email \n", - " if response[\"type\"] == \"response\":\n", - " # Add feedback to messages \n", - " user_input = response[\"args\"]\n", - " # Used by the response agent\n", - " messages.append({\"role\": \"user\",\n", - " \"content\": f\"User wants to reply to the email. Use this feedback to respond: {user_input}\"\n", - " })\n", - " # Go to response agent\n", - " goto = \"response_agent\"\n", - "\n", - " # If user ignores email, go to END\n", - " elif response[\"type\"] == \"ignore\":\n", - " goto = END\n", - "\n", - " # Catch all other responses\n", - " else:\n", - " raise ValueError(f\"Invalid response: {response}\")\n", - "\n", - " # Update the state \n", - " update = {\n", - " \"messages\": messages,\n", - " }\n", - "\n", - " return Command(goto=goto, update=update)" - ] - }, - { - "cell_type": "markdown", - "id": "8613e4c4", - "metadata": {}, - "source": [ - "#### LLM call\n", - "\n", - "The `llm_call` node is the same as before:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "036aba96", - "metadata": {}, - "outputs": [], - "source": [ - "def llm_call(state: State):\n", - " \"\"\"LLM decides whether to call a tool or not.\"\"\"\n", - "\n", - " return {\n", - " \"messages\": [\n", - " llm_with_tools.invoke(\n", - " [\n", - " {\"role\": \"system\", \"content\": agent_system_prompt_hitl.format(tools_prompt=HITL_TOOLS_PROMPT, \n", - " background=default_background,\n", - " response_preferences=default_response_preferences, \n", - " cal_preferences=default_cal_preferences)}\n", - " ]\n", - " + state[\"messages\"]\n", - " )\n", - " ]\n", - " }" - ] - }, - { - "cell_type": "markdown", - "id": "397516ee", - "metadata": {}, - "source": [ - "#### Interrupt Handler\n", - "\n", - "The `interrupt_handler` is the core HITL component of our response agent. \n", - "\n", - "Its job is to examine the tool calls that the LLM wants to make and determine which ones need human review before execution. Here's how it works:\n", - "\n", - "1. **Tool Selection**: The handler maintains a list of \"HITL tools\" that require human approval:\n", - " - `write_email`: Since sending emails has significant external impact\n", - " - `schedule_meeting`: Since scheduling meetings affects calendars\n", - " - `Question`: Since asking users questions requires direct interaction\n", - "\n", - "2. **Direct Execution**: Tools not in the HITL list (like `check_calendar_availability`) are executed immediately without interruption. This allows low-risk operations to proceed automatically.\n", - "\n", - "3. **Context Preparation**: For tools requiring review, the handler:\n", - " - Retrieves the original email for context\n", - " - Formats the tool call details for clear display\n", - " - Configures which interaction types are allowed for each tool type\n", - "\n", - "4. **Interrupt Creation**: The handler creates a structured interrupt request with:\n", - " - The action name and arguments\n", - " - Configuration for allowed interaction types\n", - " - A description that includes both the original email and the proposed action\n", - "\n", - "5. **Response Processing**: After the interrupt, the handler processes the human response:\n", - " - **Accept**: Executes the tool with original arguments\n", - " - **Edit**: Updates the tool call with edited arguments and then executes\n", - " - **Ignore**: Cancels the tool execution\n", - " - **Response**: Records feedback without execution\n", - "\n", - "This handler ensures humans have oversight of all significant actions while allowing routine operations to proceed automatically. \n", - "\n", - "The ability to edit tool arguments (like email content or meeting details) gives users precise control over the assistant's actions.\n", - "\n", - "We can visualize the overall flow: \n", - "\n", - "![overview-img](img/HITL_flow.png)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f41929d5", - "metadata": {}, - "outputs": [], - "source": [ - "def interrupt_handler(state: State) -> Command[Literal[\"llm_call\", \"__end__\"]]:\n", - " \"\"\"Creates an interrupt for human review of tool calls\"\"\"\n", - " \n", - " # Store messages\n", - " result = []\n", - "\n", - " # Go to the LLM call node next\n", - " goto = \"llm_call\"\n", - "\n", - " # Iterate over the tool calls in the last message\n", - " for tool_call in state[\"messages\"][-1].tool_calls:\n", - " \n", - " # Allowed tools for HITL\n", - " hitl_tools = [\"write_email\", \"schedule_meeting\", \"Question\"]\n", - " \n", - " # If tool is not in our HITL list, execute it directly without interruption\n", - " if tool_call[\"name\"] not in hitl_tools:\n", - "\n", - " # Execute search_memory and other tools without interruption\n", - " tool = tools_by_name[tool_call[\"name\"]]\n", - " observation = tool.invoke(tool_call[\"args\"])\n", - " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": tool_call[\"id\"]})\n", - " continue\n", - " \n", - " # Get original email from email_input in state\n", - " email_input = state[\"email_input\"]\n", - " author, to, subject, email_thread = parse_email(email_input)\n", - " original_email_markdown = format_email_markdown(subject, author, to, email_thread)\n", - " \n", - " # Format tool call for display and prepend the original email\n", - " tool_display = format_for_display(state, tool_call)\n", - " description = original_email_markdown + tool_display\n", - "\n", - " # Configure what actions are allowed in Agent Inbox\n", - " if tool_call[\"name\"] == \"write_email\":\n", - " config = {\n", - " \"allow_ignore\": True,\n", - " \"allow_respond\": True,\n", - " \"allow_edit\": True,\n", - " \"allow_accept\": True,\n", - " }\n", - " elif tool_call[\"name\"] == \"schedule_meeting\":\n", - " config = {\n", - " \"allow_ignore\": True,\n", - " \"allow_respond\": True,\n", - " \"allow_edit\": True,\n", - " \"allow_accept\": True,\n", - " }\n", - " elif tool_call[\"name\"] == \"Question\":\n", - " config = {\n", - " \"allow_ignore\": True,\n", - " \"allow_respond\": True,\n", - " \"allow_edit\": False,\n", - " \"allow_accept\": False,\n", - " }\n", - " else:\n", - " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", - "\n", - " # Create the interrupt request\n", - " request = {\n", - " \"action_request\": {\n", - " \"action\": tool_call[\"name\"],\n", - " \"args\": tool_call[\"args\"]\n", - " },\n", - " \"config\": config,\n", - " \"description\": description,\n", - " }\n", - "\n", - " # Send to Agent Inbox and wait for response\n", - " response = interrupt([request])[0]\n", - "\n", - " # Handle the responses \n", - " if response[\"type\"] == \"accept\":\n", - "\n", - " # Execute the tool with original args\n", - " tool = tools_by_name[tool_call[\"name\"]]\n", - " observation = tool.invoke(tool_call[\"args\"])\n", - " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": tool_call[\"id\"]})\n", - " \n", - " elif response[\"type\"] == \"edit\":\n", - "\n", - " # Tool selection \n", - " tool = tools_by_name[tool_call[\"name\"]]\n", - " \n", - " # Get edited args from Agent Inbox\n", - " edited_args = response[\"args\"][\"args\"]\n", - "\n", - " # Update the AI message's tool call with edited content (reference to the message in the state)\n", - " ai_message = state[\"messages\"][-1] # Get the most recent message from the state\n", - " current_id = tool_call[\"id\"] # Store the ID of the tool call being edited\n", - " \n", - " # Create a new list of tool calls by filtering out the one being edited and adding the updated version\n", - " # This avoids modifying the original list directly (immutable approach)\n", - " updated_tool_calls = [tc for tc in ai_message.tool_calls if tc[\"id\"] != current_id] + [\n", - " {\"type\": \"tool_call\", \"name\": tool_call[\"name\"], \"args\": edited_args, \"id\": current_id}\n", - " ]\n", - "\n", - " # Create a new copy of the message with updated tool calls rather than modifying the original\n", - " # This ensures state immutability and prevents side effects in other parts of the code\n", - " result.append(ai_message.model_copy(update={\"tool_calls\": updated_tool_calls}))\n", - "\n", - " # Update the write_email tool call with the edited content from Agent Inbox\n", - " if tool_call[\"name\"] == \"write_email\":\n", - " \n", - " # Execute the tool with edited args\n", - " observation = tool.invoke(edited_args)\n", - " \n", - " # Add only the tool response message\n", - " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": current_id})\n", - " \n", - " # Update the schedule_meeting tool call with the edited content from Agent Inbox\n", - " elif tool_call[\"name\"] == \"schedule_meeting\":\n", - " \n", - " \n", - " # Execute the tool with edited args\n", - " observation = tool.invoke(edited_args)\n", - " \n", - " # Add only the tool response message\n", - " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": current_id})\n", - " \n", - " # Catch all other tool calls\n", - " else:\n", - " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", - "\n", - " elif response[\"type\"] == \"ignore\":\n", - " if tool_call[\"name\"] == \"write_email\":\n", - " # Don't execute the tool, and tell the agent how to proceed\n", - " result.append({\"role\": \"tool\", \"content\": \"User ignored this email draft. Ignore this email and end the workflow.\", \"tool_call_id\": tool_call[\"id\"]})\n", - " # Go to END\n", - " goto = END\n", - " elif tool_call[\"name\"] == \"schedule_meeting\":\n", - " # Don't execute the tool, and tell the agent how to proceed\n", - " result.append({\"role\": \"tool\", \"content\": \"User ignored this calendar meeting draft. Ignore this email and end the workflow.\", \"tool_call_id\": tool_call[\"id\"]})\n", - " # Go to END\n", - " goto = END\n", - " elif tool_call[\"name\"] == \"Question\":\n", - " # Don't execute the tool, and tell the agent how to proceed\n", - " result.append({\"role\": \"tool\", \"content\": \"User ignored this question. Ignore this email and end the workflow.\", \"tool_call_id\": tool_call[\"id\"]})\n", - " # Go to END\n", - " goto = END\n", - " else:\n", - " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", - " \n", - " elif response[\"type\"] == \"response\":\n", - " # User provided feedback\n", - " user_feedback = response[\"args\"]\n", - " if tool_call[\"name\"] == \"write_email\":\n", - " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", - " result.append({\"role\": \"tool\", \"content\": f\"User gave feedback, which can we incorporate into the email. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", - " elif tool_call[\"name\"] == \"schedule_meeting\":\n", - " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", - " result.append({\"role\": \"tool\", \"content\": f\"User gave feedback, which can we incorporate into the meeting request. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", - " elif tool_call[\"name\"] == \"Question\": \n", - " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", - " result.append({\"role\": \"tool\", \"content\": f\"User answered the question, which can we can use for any follow up actions. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", - " else:\n", - " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", - "\n", - " # Catch all other responses\n", - " else:\n", - " raise ValueError(f\"Invalid response: {response}\")\n", - " \n", - " # Update the state \n", - " update = {\n", - " \"messages\": result,\n", - " }\n", - "\n", - " return Command(goto=goto, update=update)" - ] - }, - { - "cell_type": "markdown", - "id": "164b0897", - "metadata": {}, - "source": [ - "Now, let's compile the graph. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3b6d1013", - "metadata": {}, - "outputs": [], - "source": [ - "from email_assistant.utils import show_graph\n", - "\n", - "# Conditional edge function\n", - "def should_continue(state: State) -> Literal[\"interrupt_handler\", END]:\n", - " \"\"\"Route to tool handler, or end if Done tool called\"\"\"\n", - " messages = state[\"messages\"]\n", - " last_message = messages[-1]\n", - " if last_message.tool_calls:\n", - " for tool_call in last_message.tool_calls: \n", - " if tool_call[\"name\"] == \"Done\":\n", - " return END\n", - " else:\n", - " return \"interrupt_handler\"\n", - "\n", - "# Build workflow\n", - "agent_builder = StateGraph(State)\n", - "\n", - "# Add nodes\n", - "agent_builder.add_node(\"llm_call\", llm_call)\n", - "agent_builder.add_node(\"interrupt_handler\", interrupt_handler)\n", - "\n", - "# Add edges\n", - "agent_builder.add_edge(START, \"llm_call\")\n", - "agent_builder.add_conditional_edges(\n", - " \"llm_call\",\n", - " should_continue,\n", - " {\n", - " \"interrupt_handler\": \"interrupt_handler\",\n", - " END: END,\n", - " },\n", - ")\n", - "\n", - "# Compile the agent\n", - "response_agent = agent_builder.compile()\n", - "\n", - "# Build overall workflow\n", - "overall_workflow = (\n", - " StateGraph(State, input=StateInput)\n", - " .add_node(triage_router)\n", - " .add_node(triage_interrupt_handler)\n", - " .add_node(\"response_agent\", response_agent)\n", - " .add_edge(START, \"triage_router\")\n", - " \n", - ")\n", - "\n", - "email_assistant = overall_workflow.compile()\n", - "show_graph(email_assistant, xray=True)" - ] - }, - { - "cell_type": "markdown", - "id": "d747dcda", - "metadata": {}, - "source": [ - "#### Review of HITL Patterns\n", - "\n", - "**Triage Interruption** When an email is classified as \"notify\", the system interrupts to show the email to the human user\n", - "- *User Decision*: User can choose to ignore the notification or provide feedback to respond to the email\n", - "- *Flow Control*: If ignored, workflow ends; if user provides feedback, it flows to the Response Agent\n", - "\n", - "**Write Email**: System shows proposed email draft for human review\n", - "- *User Decision and Flow Control*: ignore (end workflow), respond with feedback, accept draft as-is, or edit draft\n", - "\n", - "**Schedule Meeting**: System shows proposed meeting details for human review\n", - "- *User Decision and Flow Control*: ignore (end workflow), respond with feedback, accept meeting details as-is, or edit details\n", - "\n", - "**Question**: System asks user a question to clarify information\n", - "- *User Decision and Flow Control*: ignore (end workflow) or respond with an answer\n", - "\n", - "### Interrupts Allow Us to Review and Accept Tool Calls" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c12b2097", - "metadata": {}, - "outputs": [], - "source": [ - "import uuid\n", - "from langgraph.checkpoint.memory import InMemorySaver\n", - "\n", - "# Email to respond to\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph with checkpointer\n", - "checkpointer = InMemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_1 = uuid.uuid4()\n", - "thread_config_1 = {\"configurable\": {\"thread_id\": thread_id_1}}\n", - "\n", - "# Run the graph until a tool call that we choose to interrupt\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_1):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "5546ad46", - "metadata": {}, - "source": [ - "What happened? We hit the [interrupt](https://langchain-ai.github.io/langgraph/concepts/interrupts/), which paused execution at the tool call. You can see the `action` (tool call name) and `args` (tool call arguments) that we interrupted displayed to the user.\n", - "\n", - "Now, how do we handle the interrupt? This is where the `Command` interface comes in. [The `Command` object has several powerful capabilities](https://langchain-ai.github.io/langgraph/how-tos/command/). We used it to direct the flow of the graph in prior notebooks: \n", - "- `goto`: Specifies which node to route to next\n", - "- `update`: Modifies the state before continuing execution\n", - "\n", - "Here, we'll use it to resume the graph from the interrupted state:\n", - "- `resume`: Provides the value to return from the interrupt call\n", - "\n", - "We can return whatever value our graph is designed to handle. In our case, the graph is designed to handle a list of dicts with a single key `type` that can be `accept`, `edit`, `ignore`, or `response`. So, we can simply pass `{\"type\": \"accept\"}` to the `resume` argument in order to tell the graph that we accept the tool call." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "50b1f772", - "metadata": {}, - "outputs": [], - "source": [ - "from langgraph.types import Command\n", - "\n", - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fd321c51", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c77baa35", - "metadata": {}, - "outputs": [], - "source": [ - "state = graph.get_state(thread_config_1)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "86b1ba30", - "metadata": {}, - "source": [ - "\n", - "\n", - "### Interrupts Allow Us to Edit Tool Calls\n", - "\n", - "This test demonstrates how human modification works in the HITL flow:\n", - "1. We start with the same tax planning email as before\n", - "2. The agent proposes a meeting with the same parameters\n", - "3. This time, the user EDITS the meeting proposal to change:\n", - " - Duration from 45 to 30 minutes\n", - " - Meeting subject is made more concise\n", - "4. The agent adapts to these changes when drafting the email\n", - "5. The user further EDITS the email to be shorter and less formal\n", - "6. The workflow completes with both modifications incorporated\n", - "\n", - "This scenario showcases one of the most powerful aspects of HITL: \n", - "\n", - "* Users can make precise modifications to agent actions before they are executed, ensuring the final outcome matches their preferences without having to handle all the details themselves." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3bfca1b4", - "metadata": {}, - "outputs": [], - "source": [ - "# Same email as before\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph with new thread\n", - "checkpointer = InMemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_2 = uuid.uuid4()\n", - "thread_config_2 = {\"configurable\": {\"thread_id\": thread_id_2}}\n", - "\n", - "# Run the graph until the first interrupt - will be classified as \"respond\" and the agent will create a write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_2):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "706ac0a6", - "metadata": {}, - "source": [ - "Edit the `schedule_meeting` tool call\n", - "\n", - "When the agent proposes the initial meeting schedule, we now simulate the user making modifications through the edit functionality. This demonstrates how the `edit` response type works:\n", - "\n", - "1. The user receives the same meeting proposal as in the previous test\n", - "2. Instead of accepting, they modify the parameters:\n", - " - Reducing duration from 45 to 30 minutes\n", - " - Keeping the same day and time\n", - "3. The `edit` response includes the complete set of modified arguments\n", - "4. The interrupt handler replaces the original tool arguments with these edited ones\n", - "5. The tool is executed with the user's modifications\n", - "\n", - "This shows how edit capability gives users precise control over agent actions while still letting the agent handle the execution details." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7175fedb", - "metadata": {}, - "outputs": [], - "source": [ - "# Now simulate user editing the schedule_meeting tool call\n", - "print(\"\\nSimulating user editing the schedule_meeting tool call...\")\n", - "edited_schedule_args = {\n", - " \"attendees\": [\"pm@client.com\", \"lance@company.com\"],\n", - " \"subject\": \"Tax Planning Discussion\",\n", - " \"duration_minutes\": 30, # Changed from 45 to 30\n", - " \"preferred_day\": \"2025-05-06\",\n", - " \"start_time\": 14 \n", - "}\n", - "\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_schedule_args}}]), config=thread_config_2):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "9757706b", - "metadata": {}, - "source": [ - "Edit the `write_email` tool call\n", - "\n", - "After accepting the modified meeting schedule, the agent drafts an email reflecting the 30-minute duration. Now we demonstrate how editing works with email content:\n", - "\n", - "1. The agent has adapted its email to mention the shorter 30-minute duration\n", - "2. We simulate the user wanting an even more significant change to the email:\n", - " - Completely rewriting the content to be shorter and less formal\n", - " - Changing the meeting day mentioned in the email (showing how users can correct agent mistakes)\n", - " - Requesting confirmation rather than stating the meeting as definite\n", - "3. The `edit` response contains the complete new email content\n", - "4. The tool arguments are updated with this edited content\n", - "5. The email is sent with the user's preferred wording\n", - "\n", - "This example shows the power of HITL for complex communication tasks - the agent handles the structure and initial content, while humans can refine tone, style, and substance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a0604d53", - "metadata": {}, - "outputs": [], - "source": [ - "# Now simulate user editing the write_email tool call\n", - "print(\"\\nSimulating user editing the write_email tool call...\")\n", - "edited_email_args = {\n", - " \"to\": \"pm@client.com\",\n", - " \"subject\": \"Re: Tax season let's schedule call\",\n", - " \"content\": \"Hello Project Manager,\\n\\nThank you for reaching out about tax planning. I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\\n\\nBest regards,\\nLance Martin\"\n", - "}\n", - "\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_email_args}}]), config=thread_config_2):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "ac279101", - "metadata": {}, - "source": [ - "Look at the full message history, and see trace, to view the edited tool calls:\n", - "\n", - "https://smith.langchain.com/public/21769510-d57a-41e4-b5c7-0ddb23c237d8/r" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6d3e9be7", - "metadata": {}, - "outputs": [], - "source": [ - "state = graph.get_state(thread_config_2)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "da4c39e9", - "metadata": {}, - "source": [ - "### Interrupts Allow Us to Provide Feedback on Tool Calls\n", - "\n", - "This test set demonstrates the \"response\" capability - providing feedback without editing or accepting:\n", - "\n", - "1. First, we test feedback for meeting scheduling:\n", - " - The user provides specific preferences (30 minutes instead of 45, and afternoon meetings)\n", - " - The agent incorporates this feedback into a revised proposal\n", - " - The user then accepts the revised meeting schedule\n", - "\n", - "2. Second, we test feedback for email drafting:\n", - " - The user requests a shorter, less formal email with a specific closing statement\n", - " - The agent completely rewrites the email according to this guidance\n", - " - The user accepts the new draft\n", - "\n", - "3. Lastly, we test feedback for questions:\n", - " - For the brunch invitation, the user answers the question with additional context\n", - " - The agent uses this information to draft an appropriate email response\n", - " - The workflow proceeds with the user's input integrated\n", - "\n", - "The \"response\" capability bridges the gap between acceptance and editing - users can guide the agent without having to write the full content themselves. This is especially powerful for:\n", - "- Adjusting tone and style\n", - "- Adding context the agent missed\n", - "- Redirecting the agent's approach\n", - "- Answering questions in a way that shapes the next steps" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3c4b3517", - "metadata": {}, - "outputs": [], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = InMemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_5 = uuid.uuid4()\n", - "thread_config_5 = {\"configurable\": {\"thread_id\": thread_id_5}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "ae2bea0a", - "metadata": {}, - "source": [ - "Provide feedback for the `schedule_meeting` tool call\n", - "\n", - "Now we explore the feedback capability for meeting scheduling:\n", - "\n", - "1. The agent proposes the standard 45-minute meeting on Tuesday at 2:00 PM\n", - "2. Instead of accepting or editing, we provide feedback in natural language\n", - "3. Our feedback specifies two preferences:\n", - " - Shorter meeting (30 minutes instead of 45)\n", - " - Preference for afternoon meetings (after 2pm)\n", - "4. The agent receives this feedback through the `response` type\n", - "5. The interrupt handler adds this feedback as a message to the state\n", - "6. The agent processes this feedback and generates a new tool call incorporating these preferences\n", - "\n", - "Unlike direct editing, which requires specifying the entire set of parameters, feedback allows users to express their preferences conversationally. The agent must then interpret this feedback and apply it appropriately to create a revised proposal." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a916e10", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Please schedule this for 30 minutes instead of 45 minutes, and I prefer afternoon meetings after 2pm.\"}]), config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "cf35f1a2", - "metadata": {}, - "source": [ - "Accept the `schedule_meeting` tool call after providing feedback" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2727fb0e", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "3ca470c5", - "metadata": {}, - "source": [ - "Now provide feedback for the `write_email` tool call\n", - "\n", - "After accepting the revised meeting schedule, the agent drafts an email. We now test feedback for email content:\n", - "\n", - "1. The agent's email is relatively formal and detailed\n", - "2. We provide stylistic feedback requesting:\n", - " - A shorter, more concise email\n", - " - A less formal tone\n", - " - A specific closing statement about looking forward to the meeting\n", - "3. The agent processes this feedback to completely rewrite the email\n", - "4. The new draft is much shorter, more casual, and includes the requested closing\n", - "\n", - "This demonstrates the power of natural language feedback for content creation:\n", - "- Users don't need to rewrite the entire email themselves\n", - "- They can provide high-level guidance on style, tone, and content\n", - "- The agent handles the actual writing based on this guidance\n", - "- The result better matches user preferences while preserving the essential information\n", - "\n", - "The message history shows both the original and revised emails, clearly showing how the feedback was incorporated." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f5221d87", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Shorter and less formal. Include a closing statement about looking forward to the meeting!\"}]), config=thread_config_5):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "1266ec72", - "metadata": {}, - "source": [ - "Accept the `write_email` tool call after providing feedback" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0b4698c0", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "c270f52a", - "metadata": {}, - "source": [ - "Look at the full message history, and see the trace:\n", - "\n", - "https://smith.langchain.com/public/57006770-6bb3-4e40-b990-143c373ebe60/r\n", - "\n", - "We can see that user feedback in incorporated into the tool calls. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1daf10d6", - "metadata": {}, - "outputs": [], - "source": [ - "state = graph.get_state(thread_config_5)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "1d964e36", - "metadata": {}, - "source": [ - "### Interrupts Enable New Tools\n", - "\n", - "Now let's try an email that calls the `Question` tool to provide feedback\n", - "\n", - "Finally, we test how feedback works with the `Question` tool:\n", - "\n", - "1. For the brunch invitation email, the agent asks about preferred day and time\n", - "2. Instead of ignoring, we provide a substantive response with additional context:\n", - " - Confirming we want to invite the people mentioned\n", - " - Noting we need to check which weekend works best\n", - " - Adding information about needing a reservation\n", - "3. The agent uses this information to:\n", - " - Draft a comprehensive email response incorporating all our feedback\n", - " - Notice we didn't provide a specific day/time, so it suggests checking the calendar\n", - " - Include the detail about making a reservation\n", - "4. The complete email reflects both the original request and our additional guidance\n", - "\n", - "This demonstrates how question responses can shape the entire workflow:\n", - "- Questions let the agent gather missing information\n", - "- User responses can include both direct answers and additional context\n", - "- The agent integrates all this information into its next actions\n", - "- The final outcome reflects the collaborative intelligence of both human and AI" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8827632a", - "metadata": {}, - "outputs": [], - "source": [ - "# Respond\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Partner \",\n", - " \"subject\": \"Dinner?\",\n", - " \"email_thread\": \"Hey, do you want italian or indian tonight?\"}\n", - "\n", - "# Compile the graph\n", - "checkpointer = InMemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_6 = uuid.uuid4()\n", - "thread_config_6 = {\"configurable\": {\"thread_id\": thread_id_6}}\n", - "\n", - "# Run the graph until the first interrupt\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_6):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "1d9f7f1b", - "metadata": {}, - "source": [ - "Provide feedback for the `Question` tool call" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4979effd", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Let's do indian.\"}]), config=thread_config_6):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "76b4ba9b", - "metadata": {}, - "source": [ - "Accept the `write_email` tool call" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bfd34ec2", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_6):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "e214fe9e", - "metadata": {}, - "source": [ - "Look at the full message history, and see the trace:\n", - "\n", - "https://smith.langchain.com/public/f4c727c3-b1d9-47a5-b3d0-3451619db8a2/r\n", - "\n", - "We can see that user feedback in incorporated into the email response." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "070393eb", - "metadata": {}, - "outputs": [], - "source": [ - "state = graph.get_state(thread_config_6)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "0cbec016-f08a-4984-abb9-07f428f5e69f", - "metadata": {}, - "source": [ - "### Deployment\n", - "\n", - "Let's create a local deployment of our email assistant with HITL from `src/email_assistant/email_assistant_hitl.py`. \n", - " \n", - "As before, run `langgraph dev`, select `email_assistant_hitl` in Studio, and submit the e-mail:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2609b7e4-2065-4641-a1e6-5960f399a5f5", - "metadata": {}, - "outputs": [], - "source": [ - "{\n", - " \"author\": \"Alice Smith \",\n", - " \"to\": \"John Doe \",\n", - " \"subject\": \"Quick question about API documentation\",\n", - " \"email_thread\": \"Hi John,\\nI was reviewing the API documentation for the new authentication service and noticed a few endpoints seem to be missing from the specs. Could you help clarify if this was intentional or if we should update the docs?\\nSpecifically, I'm looking at:\\n- /auth/refresh\\n- /auth/validate\\nThanks!\\nAlice\"\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "e0fe48f4", - "metadata": {}, - "source": [ - "Our server it stateless. Threads with a local deployment are simply saved to the local filesystem (`.langgraph_api` in the project folder).\n", - "\n", - "With a [hosted](https://langchain-ai.github.io/langgraph/tutorials/deployment/#other-deployment-options) deployment, threads stored in Postgres.\n", - "\n", - "Interrupted threads are threads with status 'interrupted', and we can see the interrupt in Studio: \n", - "\n", - "![studio-img](img/studio-interrupt.png)\n", - "\n", - "We'll use a custom interface to view these interrupted threads, [Agent Inbox](https://dev.agentinbox.ai/). \n", - "\n", - "This interface is a nice way to edit, approve, ignore, or provide feedback on specific actions taken by LangGraph agents. \n", - "\n", - "If you go to [dev.agentinbox.ai](https://dev.agentinbox.ai/), you can easily connect to the graph:\n", - " * Graph name: the name from the `langgraph.json` file (`email_assistant_hitl`)\n", - " * Graph URL: `http://127.0.0.1:2024/`\n", - "\n", - "All interrupted threads run will then be visible: \n", - "\n", - "![agent-inbox-img](img/agent-inbox.png)\n", - "\n", - "Agent Inbox simply uses a `Command` with `resume`, as [shown with the SDK](https://langchain-ai.github.io/langgraph/how-tos/human_in_the_loop/wait-user-input/#interacting-with-the-agent) above, the resume the graph." - ] - }, - { - "cell_type": "markdown", - "id": "4dd416e5", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "jupytext": { - "cell_metadata_filter": "-all", - "main_language": "python", - "notebook_metadata_filter": "-all" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} +{"cells": [{"cell_type": "markdown", "id": "07c57479", "metadata": {}, "source": ["# Agents with Human-in-the-Loop\n", "\n", "We have an email assistant that uses a router to triage emails and then passes the email to the agent for response generation. We've also evaluated it. But do we fully *trust* it to manage our inbox autonomously? For such a sensitive task, human-in-the-loop (HITL) is important! Here we'll show how to add a human-in-the-loop to our email assistant so that we can review specific tool calls. \n", "\n", "![overview-img](img/overview_hitl.png)\n", "\n"]}, {"cell_type": "markdown", "id": "c8f73f12", "metadata": {}, "source": ["We're going to show how to make the graph *pause* at specific points and await human input.\n", "\n", "![overview-img](img/hitl_schematic.png)"]}, {"cell_type": "markdown", "id": "52e3532e", "metadata": {}, "source": ["#### Load Environment Variables"]}, {"cell_type": "code", "execution_count": null, "id": "a57594a7", "metadata": {}, "outputs": [], "source": ["from dotenv import load_dotenv\n", "load_dotenv(\"../.env\")"]}, {"cell_type": "markdown", "id": "2566464d", "metadata": {}, "source": ["## Adding HITL to our email assistant\n", "\n", "Let's add HITL to our email assistant. \n", "\n", "We can start with tools, just as we did before. \n", "\n", "But now, we'll add a new tool Question that allows the assistant to ask the user a question."]}, {"cell_type": "code", "execution_count": null, "id": "6d4dfb07", "metadata": {}, "outputs": [], "source": ["%load_ext autoreload\n", "%autoreload 2\n", "\n", "from typing import Literal\n", "from datetime import datetime\n", "from pydantic import BaseModel\n", "\n", "from langchain.chat_models import init_chat_model\n", "from langchain_core.tools import tool\n", "\n", "from langgraph.graph import StateGraph, START, END\n", "from langgraph.types import interrupt, Command\n", "\n", "from email_assistant.prompts import triage_system_prompt, triage_user_prompt, agent_system_prompt_hitl, default_background, default_triage_instructions, default_response_preferences, default_cal_preferences\n", "from email_assistant.tools.default.prompt_templates import HITL_TOOLS_PROMPT\n", "from email_assistant.schemas import State, RouterSchema, StateInput\n", "from email_assistant.utils import parse_email, format_for_display, format_email_markdown\n", "\n", "# Agent tools \n", "@tool\n", "def write_email(to: str, subject: str, content: str) -> str:\n", " \"\"\"Write and send an email.\"\"\"\n", " # Placeholder response - in real app would send email\n", " return f\"Email sent to {to} with subject '{subject}' and content: {content}\"\n", "\n", "@tool\n", "def schedule_meeting(\n", " attendees: list[str], subject: str, duration_minutes: int, preferred_day: datetime, start_time: int\n", ") -> str:\n", " \"\"\"Schedule a calendar meeting.\"\"\"\n", " # Placeholder response - in real app would check calendar and schedule\n", " date_str = preferred_day.strftime(\"%A, %B %d, %Y\")\n", " return f\"Meeting '{subject}' scheduled on {date_str} at {start_time} for {duration_minutes} minutes with {len(attendees)} attendees\"\n", "\n", "@tool\n", "def check_calendar_availability(day: str) -> str:\n", " \"\"\"Check calendar availability for a given day.\"\"\"\n", " # Placeholder response - in real app would check actual calendar\n", " return f\"Available times on {day}: 9:00 AM, 2:00 PM, 4:00 PM\"\n", "\n", "@tool\n", "# This is new! \n", "class Question(BaseModel):\n", " \"\"\"Question to ask user.\"\"\"\n", " content: str\n", " \n", "@tool\n", "class Done(BaseModel):\n", " \"\"\"E-mail has been sent.\"\"\"\n", " done: bool\n", "\n", "# All tools available to the agent\n", "tools = [\n", " write_email, \n", " schedule_meeting, \n", " check_calendar_availability, \n", " Question, \n", " Done,\n", "]\n", "\n", "tools_by_name = {tool.name: tool for tool in tools}\n", "\n", "# Initialize the LLM for use with router / structured output\n", "llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0)\n", "llm_router = llm.with_structured_output(RouterSchema) \n", "\n", "# Initialize the LLM, enforcing tool use (of any available tools) for agent\n", "llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0)\n", "llm_with_tools = llm.bind_tools(tools, tool_choice=\"required\")"]}, {"cell_type": "code", "execution_count": null, "id": "bf05b260-9809-4f32-807b-abe1632e4181", "metadata": {}, "outputs": [], "source": ["HITL_TOOLS_PROMPT"]}, {"cell_type": "markdown", "id": "b9f8f334", "metadata": {}, "source": ["#### Triage node\n", "\n", "We define a python function with our triage routing logic, just as we did before.\n", "\n", "But, if the classification is `notify`, we want to interrupt the graph to allow the user to review the email! \n", "\n", "So we go to a new node, `triage_interrupt_handler`."]}, {"cell_type": "code", "execution_count": null, "id": "65efb689", "metadata": {}, "outputs": [], "source": ["def triage_router(state: State) -> Command[Literal[\"triage_interrupt_handler\", \"response_agent\", \"__end__\"]]:\n", " \"\"\"Analyze email content to decide if we should respond, notify, or ignore.\"\"\"\n", "\n", " # Parse the email input\n", " author, to, subject, email_thread = parse_email(state[\"email_input\"])\n", " user_prompt = triage_user_prompt.format(\n", " author=author, to=to, subject=subject, email_thread=email_thread\n", " )\n", "\n", " # Create email markdown for Agent Inbox in case of notification \n", " email_markdown = format_email_markdown(subject, author, to, email_thread)\n", "\n", " # Format system prompt with background and triage instructions\n", " system_prompt = triage_system_prompt.format(\n", " background=default_background,\n", " triage_instructions=default_triage_instructions\n", " )\n", "\n", " # Run the router LLM\n", " result = llm_router.invoke(\n", " [\n", " {\"role\": \"system\", \"content\": system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt},\n", " ]\n", " )\n", "\n", " # Decision\n", " classification = result.classification\n", "\n", " # Process the classification decision\n", " if classification == \"respond\":\n", " print(\"\ud83d\udce7 Classification: RESPOND - This email requires a response\")\n", " # Next node\n", " goto = \"response_agent\"\n", " # Update the state\n", " update = {\n", " \"classification_decision\": result.classification,\n", " \"messages\": [{\"role\": \"user\",\n", " \"content\": f\"Respond to the email: {email_markdown}\"\n", " }],\n", " }\n", " elif classification == \"ignore\":\n", " print(\"\ud83d\udeab Classification: IGNORE - This email can be safely ignored\")\n", " # Next node\n", " goto = END\n", " # Update the state\n", " update = {\n", " \"classification_decision\": classification,\n", " }\n", "\n", " elif classification == \"notify\":\n", " print(\"\ud83d\udd14 Classification: NOTIFY - This email contains important information\") \n", " # This is new! \n", " goto = \"triage_interrupt_handler\"\n", " # Update the state\n", " update = {\n", " \"classification_decision\": classification,\n", " }\n", "\n", " else:\n", " raise ValueError(f\"Invalid classification: {classification}\")\n", " return Command(goto=goto, update=update)"]}, {"cell_type": "markdown", "id": "4a1f564a", "metadata": {}, "source": ["#### Triage Interrupt Handler\n", "\n", "If the decision is to `notify` the user, we interrupt the graph! \n", "\n", "![overview-img](img/HITL_flow_triage.png)\n", "\n", "For this, we add a new node, `triage_interrupt_handler`, that will: \n", "\n", "1. Show the classification to the user if it is `notify`: We'll pass a `dict` to the interrupt that contains our classification. \n", "2. Allow the user to respond to the decision: We'll design the code to handle what we will get back from Agent Inbox. \n", "\n", "As you can see [here](https://github.com/langchain-ai/agent-inbox?tab=readme-ov-file#what-do-the-fields-mean), we format our interrupt with specific fields so that it can be viewed in Agent Inbox:\n", "\n", "* `action_request`: The action and arguments for the interrupt with `action` (the action name) and `args` (the tool call arguments). This is rendered in the Agent Inbox as the main header for the interrupt event.\n", "* `config`: Configures which interaction types are allowed, and specific UI elements for each. \n", "* `description`: Should be detailed, and may be markdown. This will be rendered in the Agent Inbox as the description\n"]}, {"cell_type": "code", "execution_count": null, "id": "203346bb", "metadata": {}, "outputs": [], "source": ["def triage_interrupt_handler(state: State) -> Command[Literal[\"response_agent\", \"__end__\"]]:\n", " \"\"\"Handles interrupts from the triage step.\"\"\"\n", " \n", " # Parse the email input\n", " author, to, subject, email_thread = parse_email(state[\"email_input\"])\n", "\n", " # Create email markdown for Agent Inbox in case of notification \n", " email_markdown = format_email_markdown(subject, author, to, email_thread)\n", "\n", " # Create messages\n", " messages = [{\"role\": \"user\",\n", " \"content\": f\"Email to notify user about: {email_markdown}\"\n", " }]\n", "\n", " # Create interrupt that is shown to the user\n", " request = {\n", " \"action_request\": {\n", " \"action\": f\"Email Assistant: {state['classification_decision']}\",\n", " \"args\": {}\n", " },\n", " \"config\": {\n", " \"allow_ignore\": True, \n", " \"allow_respond\": True, \n", " \"allow_edit\": False, \n", " \"allow_accept\": False, \n", " },\n", " # Email to show in Agent Inbox\n", " \"description\": email_markdown,\n", " }\n", "\n", " # Agent Inbox responds with a list of dicts with a single key `type` that can be `accept`, `edit`, `ignore`, or `response`. \n", " response = interrupt([request])[0]\n", "\n", " # If user provides feedback, go to response agent and use feedback to respond to email \n", " if response[\"type\"] == \"response\":\n", " # Add feedback to messages \n", " user_input = response[\"args\"]\n", " # Used by the response agent\n", " messages.append({\"role\": \"user\",\n", " \"content\": f\"User wants to reply to the email. Use this feedback to respond: {user_input}\"\n", " })\n", " # Go to response agent\n", " goto = \"response_agent\"\n", "\n", " # If user ignores email, go to END\n", " elif response[\"type\"] == \"ignore\":\n", " goto = END\n", "\n", " # Catch all other responses\n", " else:\n", " raise ValueError(f\"Invalid response: {response}\")\n", "\n", " # Update the state \n", " update = {\n", " \"messages\": messages,\n", " }\n", "\n", " return Command(goto=goto, update=update)"]}, {"cell_type": "markdown", "id": "8613e4c4", "metadata": {}, "source": ["#### LLM call\n", "\n", "The `llm_call` node is the same as before:"]}, {"cell_type": "code", "execution_count": null, "id": "036aba96", "metadata": {}, "outputs": [], "source": ["def llm_call(state: State):\n", " \"\"\"LLM decides whether to call a tool or not.\"\"\"\n", "\n", " return {\n", " \"messages\": [\n", " llm_with_tools.invoke(\n", " [\n", " {\"role\": \"system\", \"content\": agent_system_prompt_hitl.format(tools_prompt=HITL_TOOLS_PROMPT, \n", " background=default_background,\n", " response_preferences=default_response_preferences, \n", " cal_preferences=default_cal_preferences)}\n", " ]\n", " + state[\"messages\"]\n", " )\n", " ]\n", " }"]}, {"cell_type": "markdown", "id": "397516ee", "metadata": {}, "source": ["#### Interrupt Handler\n", "\n", "The `interrupt_handler` is the core HITL component of our response agent. \n", "\n", "Its job is to examine the tool calls that the LLM wants to make and determine which ones need human review before execution. Here's how it works:\n", "\n", "1. **Tool Selection**: The handler maintains a list of \"HITL tools\" that require human approval:\n", " - `write_email`: Since sending emails has significant external impact\n", " - `schedule_meeting`: Since scheduling meetings affects calendars\n", " - `Question`: Since asking users questions requires direct interaction\n", "\n", "2. **Direct Execution**: Tools not in the HITL list (like `check_calendar_availability`) are executed immediately without interruption. This allows low-risk operations to proceed automatically.\n", "\n", "3. **Context Preparation**: For tools requiring review, the handler:\n", " - Retrieves the original email for context\n", " - Formats the tool call details for clear display\n", " - Configures which interaction types are allowed for each tool type\n", "\n", "4. **Interrupt Creation**: The handler creates a structured interrupt request with:\n", " - The action name and arguments\n", " - Configuration for allowed interaction types\n", " - A description that includes both the original email and the proposed action\n", "\n", "5. **Response Processing**: After the interrupt, the handler processes the human response:\n", " - **Accept**: Executes the tool with original arguments\n", " - **Edit**: Updates the tool call with edited arguments and then executes\n", " - **Ignore**: Cancels the tool execution\n", " - **Response**: Records feedback without execution\n", "\n", "This handler ensures humans have oversight of all significant actions while allowing routine operations to proceed automatically. \n", "\n", "The ability to edit tool arguments (like email content or meeting details) gives users precise control over the assistant's actions.\n", "\n", "We can visualize the overall flow: \n", "\n", "![overview-img](img/HITL_flow.png)"]}, {"cell_type": "code", "execution_count": null, "id": "f41929d5", "metadata": {}, "outputs": [], "source": ["def interrupt_handler(state: State) -> Command[Literal[\"llm_call\", \"__end__\"]]:\n", " \"\"\"Creates an interrupt for human review of tool calls\"\"\"\n", " \n", " # Store messages\n", " result = []\n", "\n", " # Go to the LLM call node next\n", " goto = \"llm_call\"\n", "\n", " # Iterate over the tool calls in the last message\n", " for tool_call in state[\"messages\"][-1].tool_calls:\n", " \n", " # Allowed tools for HITL\n", " hitl_tools = [\"write_email\", \"schedule_meeting\", \"Question\"]\n", " \n", " # If tool is not in our HITL list, execute it directly without interruption\n", " if tool_call[\"name\"] not in hitl_tools:\n", "\n", " # Execute search_memory and other tools without interruption\n", " tool = tools_by_name[tool_call[\"name\"]]\n", " observation = tool.invoke(tool_call[\"args\"])\n", " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": tool_call[\"id\"]})\n", " continue\n", " \n", " # Get original email from email_input in state\n", " email_input = state[\"email_input\"]\n", " author, to, subject, email_thread = parse_email(email_input)\n", " original_email_markdown = format_email_markdown(subject, author, to, email_thread)\n", " \n", " # Format tool call for display and prepend the original email\n", " tool_display = format_for_display(state, tool_call)\n", " description = original_email_markdown + tool_display\n", "\n", " # Configure what actions are allowed in Agent Inbox\n", " if tool_call[\"name\"] == \"write_email\":\n", " config = {\n", " \"allow_ignore\": True,\n", " \"allow_respond\": True,\n", " \"allow_edit\": True,\n", " \"allow_accept\": True,\n", " }\n", " elif tool_call[\"name\"] == \"schedule_meeting\":\n", " config = {\n", " \"allow_ignore\": True,\n", " \"allow_respond\": True,\n", " \"allow_edit\": True,\n", " \"allow_accept\": True,\n", " }\n", " elif tool_call[\"name\"] == \"Question\":\n", " config = {\n", " \"allow_ignore\": True,\n", " \"allow_respond\": True,\n", " \"allow_edit\": False,\n", " \"allow_accept\": False,\n", " }\n", " else:\n", " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", "\n", " # Create the interrupt request\n", " request = {\n", " \"action_request\": {\n", " \"action\": tool_call[\"name\"],\n", " \"args\": tool_call[\"args\"]\n", " },\n", " \"config\": config,\n", " \"description\": description,\n", " }\n", "\n", " # Send to Agent Inbox and wait for response\n", " response = interrupt([request])[0]\n", "\n", " # Handle the responses \n", " if response[\"type\"] == \"accept\":\n", "\n", " # Execute the tool with original args\n", " tool = tools_by_name[tool_call[\"name\"]]\n", " observation = tool.invoke(tool_call[\"args\"])\n", " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": tool_call[\"id\"]})\n", " \n", " elif response[\"type\"] == \"edit\":\n", "\n", " # Tool selection \n", " tool = tools_by_name[tool_call[\"name\"]]\n", " \n", " # Get edited args from Agent Inbox\n", " edited_args = response[\"args\"][\"args\"]\n", "\n", " # Update the AI message's tool call with edited content (reference to the message in the state)\n", " ai_message = state[\"messages\"][-1] # Get the most recent message from the state\n", " current_id = tool_call[\"id\"] # Store the ID of the tool call being edited\n", " \n", " # Create a new list of tool calls by filtering out the one being edited and adding the updated version\n", " # This avoids modifying the original list directly (immutable approach)\n", " updated_tool_calls = [tc for tc in ai_message.tool_calls if tc[\"id\"] != current_id] + [\n", " {\"type\": \"tool_call\", \"name\": tool_call[\"name\"], \"args\": edited_args, \"id\": current_id}\n", " ]\n", "\n", " # Create a new copy of the message with updated tool calls rather than modifying the original\n", " # This ensures state immutability and prevents side effects in other parts of the code\n", " result.append(ai_message.model_copy(update={\"tool_calls\": updated_tool_calls}))\n", "\n", " # Update the write_email tool call with the edited content from Agent Inbox\n", " if tool_call[\"name\"] == \"write_email\":\n", " \n", " # Execute the tool with edited args\n", " observation = tool.invoke(edited_args)\n", " \n", " # Add only the tool response message\n", " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": current_id})\n", " \n", " # Update the schedule_meeting tool call with the edited content from Agent Inbox\n", " elif tool_call[\"name\"] == \"schedule_meeting\":\n", " \n", " \n", " # Execute the tool with edited args\n", " observation = tool.invoke(edited_args)\n", " \n", " # Add only the tool response message\n", " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": current_id})\n", " \n", " # Catch all other tool calls\n", " else:\n", " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", "\n", " elif response[\"type\"] == \"ignore\":\n", " if tool_call[\"name\"] == \"write_email\":\n", " # Don't execute the tool, and tell the agent how to proceed\n", " result.append({\"role\": \"tool\", \"content\": \"User ignored this email draft. Ignore this email and end the workflow.\", \"tool_call_id\": tool_call[\"id\"]})\n", " # Go to END\n", " goto = END\n", " elif tool_call[\"name\"] == \"schedule_meeting\":\n", " # Don't execute the tool, and tell the agent how to proceed\n", " result.append({\"role\": \"tool\", \"content\": \"User ignored this calendar meeting draft. Ignore this email and end the workflow.\", \"tool_call_id\": tool_call[\"id\"]})\n", " # Go to END\n", " goto = END\n", " elif tool_call[\"name\"] == \"Question\":\n", " # Don't execute the tool, and tell the agent how to proceed\n", " result.append({\"role\": \"tool\", \"content\": \"User ignored this question. Ignore this email and end the workflow.\", \"tool_call_id\": tool_call[\"id\"]})\n", " # Go to END\n", " goto = END\n", " else:\n", " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", " \n", " elif response[\"type\"] == \"response\":\n", " # User provided feedback\n", " user_feedback = response[\"args\"]\n", " if tool_call[\"name\"] == \"write_email\":\n", " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", " result.append({\"role\": \"tool\", \"content\": f\"User gave feedback, which can we incorporate into the email. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", " elif tool_call[\"name\"] == \"schedule_meeting\":\n", " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", " result.append({\"role\": \"tool\", \"content\": f\"User gave feedback, which can we incorporate into the meeting request. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", " elif tool_call[\"name\"] == \"Question\": \n", " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", " result.append({\"role\": \"tool\", \"content\": f\"User answered the question, which can we can use for any follow up actions. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", " else:\n", " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", "\n", " # Catch all other responses\n", " else:\n", " raise ValueError(f\"Invalid response: {response}\")\n", " \n", " # Update the state \n", " update = {\n", " \"messages\": result,\n", " }\n", "\n", " return Command(goto=goto, update=update)"]}, {"cell_type": "markdown", "id": "164b0897", "metadata": {}, "source": ["Now, let's compile the graph. "]}, {"cell_type": "code", "execution_count": null, "id": "3b6d1013", "metadata": {}, "outputs": [], "source": ["from email_assistant.utils import show_graph\n", "\n", "# Conditional edge function\n", "def should_continue(state: State) -> Literal[\"interrupt_handler\", END]:\n", " \"\"\"Route to tool handler, or end if Done tool called\"\"\"\n", " messages = state[\"messages\"]\n", " last_message = messages[-1]\n", " if last_message.tool_calls:\n", " for tool_call in last_message.tool_calls: \n", " if tool_call[\"name\"] == \"Done\":\n", " return END\n", " else:\n", " return \"interrupt_handler\"\n", "\n", "# Build workflow\n", "agent_builder = StateGraph(State)\n", "\n", "# Add nodes\n", "agent_builder.add_node(\"llm_call\", llm_call)\n", "agent_builder.add_node(\"interrupt_handler\", interrupt_handler)\n", "\n", "# Add edges\n", "agent_builder.add_edge(START, \"llm_call\")\n", "agent_builder.add_conditional_edges(\n", " \"llm_call\",\n", " should_continue,\n", " {\n", " \"interrupt_handler\": \"interrupt_handler\",\n", " END: END,\n", " },\n", ")\n", "\n", "# Compile the agent\n", "response_agent = agent_builder.compile()\n", "\n", "# Build overall workflow\n", "overall_workflow = (\n", " StateGraph(State, input=StateInput)\n", " .add_node(triage_router)\n", " .add_node(triage_interrupt_handler)\n", " .add_node(\"response_agent\", response_agent)\n", " .add_edge(START, \"triage_router\")\n", " \n", ")\n", "\n", "email_assistant = overall_workflow.compile()\n", "show_graph(email_assistant, xray=True)"]}, {"cell_type": "markdown", "id": "d747dcda", "metadata": {}, "source": ["#### Review of HITL Patterns\n", "\n", "**Triage Interruption** When an email is classified as \"notify\", the system interrupts to show the email to the human user\n", "- *User Decision*: User can choose to ignore the notification or provide feedback to respond to the email\n", "- *Flow Control*: If ignored, workflow ends; if user provides feedback, it flows to the Response Agent\n", "\n", "**Write Email**: System shows proposed email draft for human review\n", "- *User Decision and Flow Control*: ignore (end workflow), respond with feedback, accept draft as-is, or edit draft\n", "\n", "**Schedule Meeting**: System shows proposed meeting details for human review\n", "- *User Decision and Flow Control*: ignore (end workflow), respond with feedback, accept meeting details as-is, or edit details\n", "\n", "**Question**: System asks user a question to clarify information\n", "- *User Decision and Flow Control*: ignore (end workflow) or respond with an answer\n", "\n", "### Interrupts Allow Us to Review and Accept Tool Calls"]}, {"cell_type": "code", "execution_count": null, "id": "c12b2097", "metadata": {}, "outputs": [], "source": ["import uuid\n", "from langgraph.checkpoint.memory import InMemorySaver\n", "\n", "# Email to respond to\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph with checkpointer\n", "checkpointer = InMemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_1 = uuid.uuid4()\n", "thread_config_1 = {\"configurable\": {\"thread_id\": thread_id_1}}\n", "\n", "# Run the graph until a tool call that we choose to interrupt\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_1):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "5546ad46", "metadata": {}, "source": ["What happened? We hit the [interrupt](https://langchain-ai.github.io/langgraph/concepts/interrupts/), which paused execution at the tool call. You can see the `action` (tool call name) and `args` (tool call arguments) that we interrupted displayed to the user.\n", "\n", "Now, how do we handle the interrupt? This is where the `Command` interface comes in. [The `Command` object has several powerful capabilities](https://langchain-ai.github.io/langgraph/how-tos/command/). We used it to direct the flow of the graph in prior notebooks: \n", "- `goto`: Specifies which node to route to next\n", "- `update`: Modifies the state before continuing execution\n", "\n", "Here, we'll use it to resume the graph from the interrupted state:\n", "- `resume`: Provides the value to return from the interrupt call\n", "\n", "We can return whatever value our graph is designed to handle. In our case, the graph is designed to handle a list of dicts with a single key `type` that can be `accept`, `edit`, `ignore`, or `response`. So, we can simply pass `{\"type\": \"accept\"}` to the `resume` argument in order to tell the graph that we accept the tool call."]}, {"cell_type": "code", "execution_count": null, "id": "50b1f772", "metadata": {}, "outputs": [], "source": ["from langgraph.types import Command\n", "\n", "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "code", "execution_count": null, "id": "fd321c51", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "code", "execution_count": null, "id": "c77baa35", "metadata": {}, "outputs": [], "source": ["state = graph.get_state(thread_config_1)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "86b1ba30", "metadata": {}, "source": ["\n", "\n", "### Interrupts Allow Us to Edit Tool Calls\n", "\n", "This test demonstrates how human modification works in the HITL flow:\n", "1. We start with the same tax planning email as before\n", "2. The agent proposes a meeting with the same parameters\n", "3. This time, the user EDITS the meeting proposal to change:\n", " - Duration from 45 to 30 minutes\n", " - Meeting subject is made more concise\n", "4. The agent adapts to these changes when drafting the email\n", "5. The user further EDITS the email to be shorter and less formal\n", "6. The workflow completes with both modifications incorporated\n", "\n", "This scenario showcases one of the most powerful aspects of HITL: \n", "\n", "* Users can make precise modifications to agent actions before they are executed, ensuring the final outcome matches their preferences without having to handle all the details themselves."]}, {"cell_type": "code", "execution_count": null, "id": "3bfca1b4", "metadata": {}, "outputs": [], "source": ["# Same email as before\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph with new thread\n", "checkpointer = InMemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_2 = uuid.uuid4()\n", "thread_config_2 = {\"configurable\": {\"thread_id\": thread_id_2}}\n", "\n", "# Run the graph until the first interrupt - will be classified as \"respond\" and the agent will create a write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_2):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "706ac0a6", "metadata": {}, "source": ["Edit the `schedule_meeting` tool call\n", "\n", "When the agent proposes the initial meeting schedule, we now simulate the user making modifications through the edit functionality. This demonstrates how the `edit` response type works:\n", "\n", "1. The user receives the same meeting proposal as in the previous test\n", "2. Instead of accepting, they modify the parameters:\n", " - Reducing duration from 45 to 30 minutes\n", " - Keeping the same day and time\n", "3. The `edit` response includes the complete set of modified arguments\n", "4. The interrupt handler replaces the original tool arguments with these edited ones\n", "5. The tool is executed with the user's modifications\n", "\n", "This shows how edit capability gives users precise control over agent actions while still letting the agent handle the execution details."]}, {"cell_type": "code", "execution_count": null, "id": "7175fedb", "metadata": {}, "outputs": [], "source": ["# Now simulate user editing the schedule_meeting tool call\n", "print(\"\\nSimulating user editing the schedule_meeting tool call...\")\n", "edited_schedule_args = {\n", " \"attendees\": [\"pm@client.com\", \"lance@company.com\"],\n", " \"subject\": \"Tax Planning Discussion\",\n", " \"duration_minutes\": 30, # Changed from 45 to 30\n", " \"preferred_day\": \"2025-05-06\",\n", " \"start_time\": 14 \n", "}\n", "\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_schedule_args}}]), config=thread_config_2):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "9757706b", "metadata": {}, "source": ["Edit the `write_email` tool call\n", "\n", "After accepting the modified meeting schedule, the agent drafts an email reflecting the 30-minute duration. Now we demonstrate how editing works with email content:\n", "\n", "1. The agent has adapted its email to mention the shorter 30-minute duration\n", "2. We simulate the user wanting an even more significant change to the email:\n", " - Completely rewriting the content to be shorter and less formal\n", " - Changing the meeting day mentioned in the email (showing how users can correct agent mistakes)\n", " - Requesting confirmation rather than stating the meeting as definite\n", "3. The `edit` response contains the complete new email content\n", "4. The tool arguments are updated with this edited content\n", "5. The email is sent with the user's preferred wording\n", "\n", "This example shows the power of HITL for complex communication tasks - the agent handles the structure and initial content, while humans can refine tone, style, and substance."]}, {"cell_type": "code", "execution_count": null, "id": "a0604d53", "metadata": {}, "outputs": [], "source": ["# Now simulate user editing the write_email tool call\n", "print(\"\\nSimulating user editing the write_email tool call...\")\n", "edited_email_args = {\n", " \"to\": \"pm@client.com\",\n", " \"subject\": \"Re: Tax season let's schedule call\",\n", " \"content\": \"Hello Project Manager,\\n\\nThank you for reaching out about tax planning. I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\\n\\nBest regards,\\nLance Martin\"\n", "}\n", "\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_email_args}}]), config=thread_config_2):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "ac279101", "metadata": {}, "source": ["Look at the full message history, and see trace, to view the edited tool calls:\n", "\n", "https://smith.langchain.com/public/21769510-d57a-41e4-b5c7-0ddb23c237d8/r"]}, {"cell_type": "code", "execution_count": null, "id": "6d3e9be7", "metadata": {}, "outputs": [], "source": ["state = graph.get_state(thread_config_2)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "da4c39e9", "metadata": {}, "source": ["### Interrupts Allow Us to Provide Feedback on Tool Calls\n", "\n", "This test set demonstrates the \"response\" capability - providing feedback without editing or accepting:\n", "\n", "1. First, we test feedback for meeting scheduling:\n", " - The user provides specific preferences (30 minutes instead of 45, and afternoon meetings)\n", " - The agent incorporates this feedback into a revised proposal\n", " - The user then accepts the revised meeting schedule\n", "\n", "2. Second, we test feedback for email drafting:\n", " - The user requests a shorter, less formal email with a specific closing statement\n", " - The agent completely rewrites the email according to this guidance\n", " - The user accepts the new draft\n", "\n", "3. Lastly, we test feedback for questions:\n", " - For the brunch invitation, the user answers the question with additional context\n", " - The agent uses this information to draft an appropriate email response\n", " - The workflow proceeds with the user's input integrated\n", "\n", "The \"response\" capability bridges the gap between acceptance and editing - users can guide the agent without having to write the full content themselves. This is especially powerful for:\n", "- Adjusting tone and style\n", "- Adding context the agent missed\n", "- Redirecting the agent's approach\n", "- Answering questions in a way that shapes the next steps"]}, {"cell_type": "code", "execution_count": null, "id": "3c4b3517", "metadata": {}, "outputs": [], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = InMemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_5 = uuid.uuid4()\n", "thread_config_5 = {\"configurable\": {\"thread_id\": thread_id_5}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "ae2bea0a", "metadata": {}, "source": ["Provide feedback for the `schedule_meeting` tool call\n", "\n", "Now we explore the feedback capability for meeting scheduling:\n", "\n", "1. The agent proposes the standard 45-minute meeting on Tuesday at 2:00 PM\n", "2. Instead of accepting or editing, we provide feedback in natural language\n", "3. Our feedback specifies two preferences:\n", " - Shorter meeting (30 minutes instead of 45)\n", " - Preference for afternoon meetings (after 2pm)\n", "4. The agent receives this feedback through the `response` type\n", "5. The interrupt handler adds this feedback as a message to the state\n", "6. The agent processes this feedback and generates a new tool call incorporating these preferences\n", "\n", "Unlike direct editing, which requires specifying the entire set of parameters, feedback allows users to express their preferences conversationally. The agent must then interpret this feedback and apply it appropriately to create a revised proposal."]}, {"cell_type": "code", "execution_count": null, "id": "9a916e10", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Please schedule this for 30 minutes instead of 45 minutes, and I prefer afternoon meetings after 2pm.\"}]), config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "cf35f1a2", "metadata": {}, "source": ["Accept the `schedule_meeting` tool call after providing feedback"]}, {"cell_type": "code", "execution_count": null, "id": "2727fb0e", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "3ca470c5", "metadata": {}, "source": ["Now provide feedback for the `write_email` tool call\n", "\n", "After accepting the revised meeting schedule, the agent drafts an email. We now test feedback for email content:\n", "\n", "1. The agent's email is relatively formal and detailed\n", "2. We provide stylistic feedback requesting:\n", " - A shorter, more concise email\n", " - A less formal tone\n", " - A specific closing statement about looking forward to the meeting\n", "3. The agent processes this feedback to completely rewrite the email\n", "4. The new draft is much shorter, more casual, and includes the requested closing\n", "\n", "This demonstrates the power of natural language feedback for content creation:\n", "- Users don't need to rewrite the entire email themselves\n", "- They can provide high-level guidance on style, tone, and content\n", "- The agent handles the actual writing based on this guidance\n", "- The result better matches user preferences while preserving the essential information\n", "\n", "The message history shows both the original and revised emails, clearly showing how the feedback was incorporated."]}, {"cell_type": "code", "execution_count": null, "id": "f5221d87", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Shorter and less formal. Include a closing statement about looking forward to the meeting!\"}]), config=thread_config_5):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "1266ec72", "metadata": {}, "source": ["Accept the `write_email` tool call after providing feedback"]}, {"cell_type": "code", "execution_count": null, "id": "0b4698c0", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "c270f52a", "metadata": {}, "source": ["Look at the full message history, and see the trace:\n", "\n", "https://smith.langchain.com/public/57006770-6bb3-4e40-b990-143c373ebe60/r\n", "\n", "We can see that user feedback in incorporated into the tool calls. "]}, {"cell_type": "code", "execution_count": null, "id": "1daf10d6", "metadata": {}, "outputs": [], "source": ["state = graph.get_state(thread_config_5)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "1d964e36", "metadata": {}, "source": ["### Interrupts Enable New Tools\n", "\n", "Now let's try an email that calls the `Question` tool to provide feedback\n", "\n", "Finally, we test how feedback works with the `Question` tool:\n", "\n", "1. For the brunch invitation email, the agent asks about preferred day and time\n", "2. Instead of ignoring, we provide a substantive response with additional context:\n", " - Confirming we want to invite the people mentioned\n", " - Noting we need to check which weekend works best\n", " - Adding information about needing a reservation\n", "3. The agent uses this information to:\n", " - Draft a comprehensive email response incorporating all our feedback\n", " - Notice we didn't provide a specific day/time, so it suggests checking the calendar\n", " - Include the detail about making a reservation\n", "4. The complete email reflects both the original request and our additional guidance\n", "\n", "This demonstrates how question responses can shape the entire workflow:\n", "- Questions let the agent gather missing information\n", "- User responses can include both direct answers and additional context\n", "- The agent integrates all this information into its next actions\n", "- The final outcome reflects the collaborative intelligence of both human and AI"]}, {"cell_type": "code", "execution_count": null, "id": "8827632a", "metadata": {}, "outputs": [], "source": ["# Respond\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Partner \",\n", " \"subject\": \"Dinner?\",\n", " \"email_thread\": \"Hey, do you want italian or indian tonight?\"}\n", "\n", "# Compile the graph\n", "checkpointer = InMemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_6 = uuid.uuid4()\n", "thread_config_6 = {\"configurable\": {\"thread_id\": thread_id_6}}\n", "\n", "# Run the graph until the first interrupt\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_6):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "1d9f7f1b", "metadata": {}, "source": ["Provide feedback for the `Question` tool call"]}, {"cell_type": "code", "execution_count": null, "id": "4979effd", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Let's do indian.\"}]), config=thread_config_6):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "76b4ba9b", "metadata": {}, "source": ["Accept the `write_email` tool call"]}, {"cell_type": "code", "execution_count": null, "id": "bfd34ec2", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_6):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "e214fe9e", "metadata": {}, "source": ["Look at the full message history, and see the trace:\n", "\n", "https://smith.langchain.com/public/f4c727c3-b1d9-47a5-b3d0-3451619db8a2/r\n", "\n", "We can see that user feedback in incorporated into the email response."]}, {"cell_type": "code", "execution_count": null, "id": "070393eb", "metadata": {}, "outputs": [], "source": ["state = graph.get_state(thread_config_6)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "0cbec016-f08a-4984-abb9-07f428f5e69f", "metadata": {}, "source": ["### Deployment\n", "\n", "Let's create a local deployment of our email assistant with HITL from `src/email_assistant/email_assistant_hitl.py`. \n", " \n", "As before, run `langgraph dev`, select `email_assistant_hitl` in Studio, and submit the e-mail:"]}, {"cell_type": "code", "execution_count": null, "id": "2609b7e4-2065-4641-a1e6-5960f399a5f5", "metadata": {}, "outputs": [], "source": ["{\n", " \"author\": \"Alice Smith \",\n", " \"to\": \"John Doe \",\n", " \"subject\": \"Quick question about API documentation\",\n", " \"email_thread\": \"Hi John,\\nI was reviewing the API documentation for the new authentication service and noticed a few endpoints seem to be missing from the specs. Could you help clarify if this was intentional or if we should update the docs?\\nSpecifically, I'm looking at:\\n- /auth/refresh\\n- /auth/validate\\nThanks!\\nAlice\"\n", "}"]}, {"cell_type": "markdown", "id": "e0fe48f4", "metadata": {}, "source": ["Our server it stateless. Threads with a local deployment are simply saved to the local filesystem (`.langgraph_api` in the project folder).\n", "\n", "With a [hosted](https://langchain-ai.github.io/langgraph/tutorials/deployment/#other-deployment-options) deployment, threads stored in Postgres.\n", "\n", "Interrupted threads are threads with status 'interrupted', and we can see the interrupt in Studio: \n", "\n", "![studio-img](img/studio-interrupt.png)\n", "\n", "We'll use a custom interface to view these interrupted threads, [Agent Inbox](https://dev.agentinbox.ai/). \n", "\n", "This interface is a nice way to edit, approve, ignore, or provide feedback on specific actions taken by LangGraph agents. \n", "\n", "If you go to [dev.agentinbox.ai](https://dev.agentinbox.ai/), you can easily connect to the graph:\n", " * Graph name: the name from the `langgraph.json` file (`email_assistant_hitl`)\n", " * Graph URL: `http://127.0.0.1:2024/`\n", "\n", "All interrupted threads run will then be visible: \n", "\n", "![agent-inbox-img](img/agent-inbox.png)\n", "\n", "Agent Inbox simply uses a `Command` with `resume`, as [shown with the SDK](https://langchain-ai.github.io/langgraph/how-tos/human_in_the_loop/wait-user-input/#interacting-with-the-agent) above, the resume the graph."]}, {"cell_type": "markdown", "id": "4dd416e5", "metadata": {}, "source": []}], "metadata": {"jupytext": {"cell_metadata_filter": "-all", "main_language": "python", "notebook_metadata_filter": "-all"}, "kernelspec": {"display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6"}}, "nbformat": 4, "nbformat_minor": 5} diff --git a/notebooks/memory.ipynb b/notebooks/memory.ipynb index 8a248c0..be18189 100644 --- a/notebooks/memory.ipynb +++ b/notebooks/memory.ipynb @@ -1,1769 +1 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "d047044f", - "metadata": {}, - "source": [ - "# Agents with Memory\n", - "\n", - "We have an email assistant that uses a router to triage emails and then passes the email to the agent for response generation. We've also evaluated it and added human-in-the-loop (HITL) to review specific tool calls. Now, we add memory, giving our assistant the ability to remember our HITL feedback!\n", - "\n", - "![overview-img](img/overview_memory.png)" - ] - }, - { - "cell_type": "markdown", - "id": "143094b4", - "metadata": {}, - "source": [ - "#### Load Environment Variables" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "085c21ad", - "metadata": {}, - "outputs": [], - "source": [ - "from dotenv import load_dotenv\n", - "load_dotenv(\"../.env\")" - ] - }, - { - "cell_type": "markdown", - "id": "b99855af", - "metadata": {}, - "source": [ - "## Memory in LangGraph\n", - "\n", - "### Thread-Scoped and Across-Thread Memory\n", - "\n", - "First, it's worth explaining how [memory works in LangGraph](https://langchain-ai.github.io/langgraph/concepts/memory/). LangGraph offers two distinct types of memory that serve complementary purposes:\n", - "\n", - "**Thread-Scoped Memory (Short-term)** operates within the boundaries of a single conversation thread. It's automatically managed as part of the graph's state and persisted through thread-scoped checkpoints. This memory type retains conversation history, uploaded files, retrieved documents, and other artifacts generated during the interaction. Think of it as the working memory that maintains context within one specific conversation, allowing the agent to reference earlier messages or actions without starting from scratch each time.\n", - "\n", - "**Across-Thread Memory (Long-term)** extends beyond individual conversations, creating a persistent knowledge base that spans multiple sessions. This memory is stored as JSON documents in a memory store, organized by namespaces (like folders) and distinct keys (like filenames). Unlike thread-scoped memory, this information persists even after conversations end, enabling the system to recall user preferences, past decisions, and accumulated knowledge. This is what allows an agent to truly learn and adapt over time, rather than treating each interaction as isolated.\n", - "\n", - "![short-vs-long-term-memory](img/short-vs-long.png)\n", - "\n", - "The [Store](https://langchain-ai.github.io/langgraph/reference/store/#langgraph.store.base.BaseStore) is the foundation of this architecture, providing a flexible database where memories can be organized, retrieved, and updated. What makes this approach powerful is that regardless of which memory type you're working with, the same Store interface provides consistent access patterns. This allows your agent's code to remain unchanged whether you're using a simple in-memory implementation during development or a production-grade database in deployment. \n", - "\n", - "### LangGraph Store\n", - "\n", - "LangGraph offers different [Store implementations](https://langchain-ai.github.io/langgraph/reference/store/#langgraph.store.base.BaseStore) depending on your [deployment](https://langchain-ai.github.io/langgraph/tutorials/deployment/#other-deployment-options):\n", - "\n", - "1. **In-Memory (e.g., notebooks)**:\n", - " - Uses `from langgraph.store.memory import InMemoryStore`\n", - " - Purely a Python dictionary in memory with no persistence\n", - " - Data is lost when the process terminates\n", - " - Useful for quick experiments and testing\n", - " - Semantic search can be configured as shown [here](https://langchain-ai.github.io/langgraph/how-tos/memory/semantic-search/)\n", - "\n", - "2. **Local Development with `langgraph dev`**:\n", - " - Similar to InMemoryStore but with pseudo-persistence\n", - " - Data is pickled to the local filesystem between restarts\n", - " - Lightweight and fast, no need for external databases\n", - " - Semantic search can be configured as shown [here](https://langchain-ai.github.io/langgraph/cloud/deployment/semantic_search/)\n", - " - Great for development but not designed for production use\n", - "\n", - "3. **LangGraph Platform or Production Deployments**:\n", - " - Uses PostgreSQL with pgvector for production-grade persistence\n", - " - Fully persistent data storage with reliable backups\n", - " - Scalable for larger datasets\n", - " - Semantic search can be configured as shown [here](https://langchain-ai.github.io/langgraph/cloud/deployment/semantic_search/)\n", - " - Default distance metric is cosine similarity (customizable)\n", - "\n", - "Let's use the `InMemoryStore` here in the notebook! " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7fa1dda7", - "metadata": {}, - "outputs": [], - "source": [ - "from langgraph.store.memory import InMemoryStore\n", - "in_memory_store = InMemoryStore()" - ] - }, - { - "cell_type": "markdown", - "id": "aceb204c", - "metadata": {}, - "source": [ - "Memories are namespaced by a tuple, which in this specific example will be (``, \"memories\"). The namespace can be any length and represent anything, it does not have to be user specific." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0488a5f", - "metadata": {}, - "outputs": [], - "source": [ - "user_id = \"1\"\n", - "namespace_for_memory = (user_id, \"memories\")" - ] - }, - { - "cell_type": "markdown", - "id": "3da8b303", - "metadata": {}, - "source": [ - "We use the `store.put` method to save memories to our namespace in the store. When we do this, we specify the namespace, as defined above, and a key-value pair for the memory: the key is simply a unique identifier for the memory (memory_id) and the value (a dictionary) is the memory itself." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4af95b32", - "metadata": {}, - "outputs": [], - "source": [ - "import uuid\n", - "memory_id = str(uuid.uuid4())\n", - "memory = {\"food_preference\" : \"I like pizza\"}\n", - "in_memory_store.put(namespace_for_memory, memory_id, memory)" - ] - }, - { - "cell_type": "markdown", - "id": "60408492", - "metadata": {}, - "source": [ - "We can read out memories in our namespace using the `store.search` method, which will return all memories for a given user as a list. The most recent memory is the last in the list. Each memory type is a Python class (`Item`) with certain attributes. We can access it as a dictionary by converting via `.dict`. The attributes it has are shown below, but the most important one is typically `value`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4c25f5ef", - "metadata": {}, - "outputs": [], - "source": [ - "memories = in_memory_store.search(namespace_for_memory)\n", - "memories[-1].dict()" - ] - }, - { - "cell_type": "markdown", - "id": "44f3e781", - "metadata": {}, - "source": [ - "To use this in a graph, all we need to do is compile the graph with the store:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6476b361", - "metadata": {}, - "outputs": [], - "source": [ - "# We need this because we want to enable threads (conversations)\n", - "from langgraph.checkpoint.memory import InMemorySaver\n", - "checkpointer = InMemorySaver()\n", - "# We need this because we want to enable across-thread memory\n", - "from langgraph.store.memory import InMemoryStore\n", - "in_memory_store = InMemoryStore()\n", - "# Compile the graph with the checkpointer and store\n", - "# graph = graph.compile(checkpointer=checkpointer, store=in_memory_store)" - ] - }, - { - "cell_type": "markdown", - "id": "9c982928", - "metadata": {}, - "source": [ - "The store is then accessible in any node of the graph, as we'll see below!\n", - "\n", - "## Adding Memory to our Assistant\n", - "\n", - "Let's take our graph with HITL and add memory to it. This will be very similar to what we had previously. We'll simply update memory in the store when we get feedback from the user.\n", - "\n", - "![overview-img](img/HITL_flow_memory.png)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "38308fc3", - "metadata": {}, - "outputs": [], - "source": [ - "%cd ..\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "from dotenv import load_dotenv\n", - "load_dotenv(\".env\", override=True)\n", - "\n", - "from typing import Literal\n", - "from datetime import datetime\n", - "from pydantic import BaseModel\n", - "\n", - "from langchain.chat_models import init_chat_model\n", - "from langchain_core.tools import tool\n", - "\n", - "from langgraph.graph import StateGraph, START, END\n", - "from langgraph.store.base import BaseStore\n", - "from langgraph.types import interrupt, Command\n", - "\n", - "from email_assistant.prompts import triage_system_prompt, triage_user_prompt, agent_system_prompt_hitl_memory, default_triage_instructions, default_background, default_response_preferences, default_cal_preferences\n", - "from email_assistant.tools.default.prompt_templates import HITL_MEMORY_TOOLS_PROMPT\n", - "from email_assistant.schemas import State, RouterSchema, StateInput\n", - "from email_assistant.utils import parse_email, format_for_display, format_email_markdown\n", - "\n", - "# Agent tools \n", - "@tool\n", - "def write_email(to: str, subject: str, content: str) -> str:\n", - " \"\"\"Write and send an email.\"\"\"\n", - " # Placeholder response - in real app would send email\n", - " return f\"Email sent to {to} with subject '{subject}' and content: {content}\"\n", - "\n", - "@tool\n", - "def schedule_meeting(\n", - " attendees: list[str], subject: str, duration_minutes: int, preferred_day: datetime, start_time: int\n", - ") -> str:\n", - " \"\"\"Schedule a calendar meeting.\"\"\"\n", - " # Placeholder response - in real app would check calendar and schedule\n", - " date_str = preferred_day.strftime(\"%A, %B %d, %Y\")\n", - " return f\"Meeting '{subject}' scheduled on {date_str} at {start_time} for {duration_minutes} minutes with {len(attendees)} attendees\"\n", - "\n", - "@tool\n", - "def check_calendar_availability(day: str) -> str:\n", - " \"\"\"Check calendar availability for a given day.\"\"\"\n", - " # Placeholder response - in real app would check actual calendar\n", - " return f\"Available times on {day}: 9:00 AM, 2:00 PM, 4:00 PM\"\n", - "\n", - "@tool\n", - "class Question(BaseModel):\n", - " \"\"\"Question to ask user.\"\"\"\n", - " content: str\n", - "\n", - "@tool\n", - "class Done(BaseModel):\n", - " \"\"\"E-mail has been sent.\"\"\"\n", - " done: bool\n", - " \n", - "# All tools available to the agent\n", - "tools = [\n", - " write_email, \n", - " schedule_meeting, \n", - " check_calendar_availability, \n", - " Question, \n", - " Done\n", - "]\n", - "\n", - "tools_by_name = {tool.name: tool for tool in tools}\n", - "\n", - "# Initialize the LLM for use with router / structured output\n", - "llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0)\n", - "llm_router = llm.with_structured_output(RouterSchema) \n", - "\n", - "# Initialize the LLM, enforcing tool use (of any available tools) for agent\n", - "llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0)\n", - "llm_with_tools = llm.bind_tools(tools, tool_choice=\"required\")" - ] - }, - { - "cell_type": "markdown", - "id": "03538f56", - "metadata": {}, - "source": [ - "Now, this is the critical part! We currently don't capture any feedback from the user in our graph. \n", - "\n", - "### Memory Management \n", - "\n", - "What we *want* to do is fairly straightforward: we want to add the feedback to the memory `Store`. If we compile our graph with the `Store`, we can access it in any node. So that is not a problem! \n", - "\n", - "But we have to answer two questions: \n", - "\n", - "1) how do we want the memory to be structured?\n", - "2) how do we want to update the memory?\n", - "\n", - "For 1) we'll just store memories as string to keep things simple. In the below function, we'll just fetch memories from the store as string and initialize with default if it doesn't exist." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d2715152-2d19-4449-be4b-fdc602eee52d", - "metadata": {}, - "outputs": [], - "source": [ - "default_triage_instructions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ca9ab99d-bc21-4cf7-a58a-261e82920566", - "metadata": {}, - "outputs": [], - "source": [ - "default_cal_preferences" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f6cd98f1-15a7-4fbb-8cce-cbbb0503d22b", - "metadata": {}, - "outputs": [], - "source": [ - "default_response_preferences" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d195aa00", - "metadata": {}, - "outputs": [], - "source": [ - "def get_memory(store, namespace, default_content=None):\n", - " \"\"\"Get memory from the store or initialize with default if it doesn't exist.\n", - " \n", - " Args:\n", - " store: LangGraph BaseStore instance to search for existing memory\n", - " namespace: Tuple defining the memory namespace, e.g. (\"email_assistant\", \"triage_preferences\")\n", - " default_content: Default content to use if memory doesn't exist\n", - " \n", - " Returns:\n", - " str: The content of the memory profile, either from existing memory or the default\n", - " \"\"\"\n", - " # Search for existing memory with namespace and key\n", - " user_preferences = store.get(namespace, \"user_preferences\")\n", - " \n", - " # If memory exists, return its content (the value)\n", - " if user_preferences:\n", - " return user_preferences.value\n", - " \n", - " # If memory doesn't exist, add it to the store and return the default content\n", - " else:\n", - " # Namespace, key, value\n", - " store.put(namespace, \"user_preferences\", default_content)\n", - " user_preferences = default_content\n", - " \n", - " # Return the default content\n", - " return user_preferences " - ] - }, - { - "cell_type": "markdown", - "id": "cc5181e6", - "metadata": {}, - "source": [ - "For 2) updating memory, we can use a few tricks from the [GPT-4.1 prompting guide]((https://cookbook.openai.com/examples/gpt4-1_prompting_guide)) to help us update the memory: \n", - "\n", - "* For optimal performance, repeat the key instructions at the start and end of the prompt\n", - "* Create clear, explicit instructions \n", - "* Use XML delimiters for structure\n", - "* Provide examples " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1f8aa70e", - "metadata": {}, - "outputs": [], - "source": [ - "MEMORY_UPDATE_INSTRUCTIONS = \"\"\"\n", - "# Role and Objective\n", - "You are a memory profile manager for an email assistant agent that selectively updates user preferences based on feedback messages from human-in-the-loop interactions with the email assistant.\n", - "\n", - "# Instructions\n", - "- NEVER overwrite the entire memory profile\n", - "- ONLY make targeted additions of new information\n", - "- ONLY update specific facts that are directly contradicted by feedback messages\n", - "- PRESERVE all other existing information in the profile\n", - "- Format the profile consistently with the original style\n", - "- Generate the profile as a string\n", - "\n", - "# Reasoning Steps\n", - "1. Analyze the current memory profile structure and content\n", - "2. Review feedback messages from human-in-the-loop interactions\n", - "3. Extract relevant user preferences from these feedback messages (such as edits to emails/calendar invites, explicit feedback on assistant performance, user decisions to ignore certain emails)\n", - "4. Compare new information against existing profile\n", - "5. Identify only specific facts to add or update\n", - "6. Preserve all other existing information\n", - "7. Output the complete updated profile\n", - "\n", - "# Example\n", - "\n", - "RESPOND:\n", - "- wife\n", - "- specific questions\n", - "- system admin notifications\n", - "NOTIFY: \n", - "- meeting invites\n", - "IGNORE:\n", - "- marketing emails\n", - "- company-wide announcements\n", - "- messages meant for other teams\n", - "\n", - "\n", - "\n", - "\"The assistant shouldn't have responded to that system admin notification.\"\n", - "\n", - "\n", - "\n", - "RESPOND:\n", - "- wife\n", - "- specific questions\n", - "NOTIFY: \n", - "- meeting invites\n", - "- system admin notifications\n", - "IGNORE:\n", - "- marketing emails\n", - "- company-wide announcements\n", - "- messages meant for other teams\n", - "\n", - "\n", - "# Process current profile for {namespace}\n", - "\n", - "{current_profile}\n", - "\n", - "\n", - "Think step by step about what specific feedback is being provided and what specific information should be added or updated in the profile while preserving everything else.\"\"\"\n", - "\n", - "MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT = \"\"\"\n", - "Remember:\n", - "- NEVER overwrite the entire profile\n", - "- ONLY make targeted additions or changes based on explicit feedback\n", - "- PRESERVE all existing information not directly contradicted\n", - "- Output the complete updated profile as a string\n", - "\"\"\"\n", - "\n", - "class UserPreferences(BaseModel):\n", - " \"\"\"User preferences.\"\"\"\n", - " preferences: str\n", - " justification: str\n", - "\n", - "def update_memory(store, namespace, messages):\n", - " \"\"\"Update memory profile in the store.\n", - " \n", - " Args:\n", - " store: LangGraph BaseStore instance to update memory\n", - " namespace: Tuple defining the memory namespace, e.g. (\"email_assistant\", \"triage_preferences\")\n", - " messages: List of messages to update the memory with\n", - " \"\"\"\n", - "\n", - " # Get the existing memory\n", - " user_preferences = store.get(namespace, \"user_preferences\")\n", - "\n", - " # Update the memory\n", - " llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0).with_structured_output(UserPreferences)\n", - " result = llm.invoke(\n", - " [\n", - " {\"role\": \"system\", \"content\": MEMORY_UPDATE_INSTRUCTIONS.format(current_profile=user_preferences.value, namespace=namespace)},\n", - " {\"role\": \"user\", \"content\": f\"Think carefully and update the memory profile based upon these user messages:\"}\n", - " ] + messages\n", - " )\n", - " \n", - " # Save the updated memory to the store\n", - " store.put(namespace, \"user_preferences\", result.preferences)" - ] - }, - { - "cell_type": "markdown", - "id": "8af20960", - "metadata": {}, - "source": [ - "We set up the triage router as we had before, with one small change" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1a789ab", - "metadata": {}, - "outputs": [], - "source": [ - "def triage_router(state: State, store: BaseStore) -> Command[Literal[\"triage_interrupt_handler\", \"response_agent\", \"__end__\"]]:\n", - " \"\"\"Analyze email content to decide if we should respond, notify, or ignore.\n", - "\n", - " The triage step prevents the assistant from wasting time on:\n", - " - Marketing emails and spam\n", - " - Company-wide announcements\n", - " - Messages meant for other teams\n", - " \"\"\"\n", - " # Parse the email input\n", - " author, to, subject, email_thread = parse_email(state[\"email_input\"])\n", - " user_prompt = triage_user_prompt.format(\n", - " author=author, to=to, subject=subject, email_thread=email_thread\n", - " )\n", - "\n", - " # Create email markdown for Agent Inbox in case of notification \n", - " email_markdown = format_email_markdown(subject, author, to, email_thread)\n", - "\n", - " # Search for existing triage_preferences memory\n", - " triage_instructions = get_memory(store, (\"email_assistant\", \"triage_preferences\"), default_triage_instructions)\n", - "\n", - " # Format system prompt with background and triage instructions\n", - " system_prompt = triage_system_prompt.format(\n", - " background=default_background,\n", - " triage_instructions=triage_instructions,\n", - " )\n", - "\n", - " # Run the router LLM\n", - " result = llm_router.invoke(\n", - " [\n", - " {\"role\": \"system\", \"content\": system_prompt},\n", - " {\"role\": \"user\", \"content\": user_prompt},\n", - " ]\n", - " )\n", - "\n", - " # Decision\n", - " classification = result.classification\n", - "\n", - " # Process the classification decision\n", - " if classification == \"respond\":\n", - " print(\"📧 Classification: RESPOND - This email requires a response\")\n", - " # Next node\n", - " goto = \"response_agent\"\n", - " # Update the state\n", - " update = {\n", - " \"classification_decision\": result.classification,\n", - " \"messages\": [{\"role\": \"user\",\n", - " \"content\": f\"Respond to the email: {email_markdown}\"\n", - " }],\n", - " }\n", - " \n", - " elif classification == \"ignore\":\n", - " print(\"🚫 Classification: IGNORE - This email can be safely ignored\")\n", - "\n", - " # Next node\n", - " goto = END\n", - " # Update the state\n", - " update = {\n", - " \"classification_decision\": classification,\n", - " }\n", - "\n", - " elif classification == \"notify\":\n", - " print(\"🔔 Classification: NOTIFY - This email contains important information\") \n", - "\n", - " # Next node\n", - " goto = \"triage_interrupt_handler\"\n", - " # Update the state\n", - " update = {\n", - " \"classification_decision\": classification,\n", - " }\n", - "\n", - " else:\n", - " raise ValueError(f\"Invalid classification: {classification}\")\n", - " \n", - " return Command(goto=goto, update=update)\n" - ] - }, - { - "cell_type": "markdown", - "id": "a6be4d63", - "metadata": {}, - "source": [ - "We only need to make a small change to the interrupt handler to update the memory when the user provides feedback. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f76ef46d", - "metadata": {}, - "outputs": [], - "source": [ - "def triage_interrupt_handler(state: State, store: BaseStore) -> Command[Literal[\"response_agent\", \"__end__\"]]:\n", - " \"\"\"Handles interrupts from the triage step\"\"\"\n", - " \n", - " # Parse the email input\n", - " author, to, subject, email_thread = parse_email(state[\"email_input\"])\n", - "\n", - " # Create email markdown for Agent Inbox in case of notification \n", - " email_markdown = format_email_markdown(subject, author, to, email_thread)\n", - "\n", - " # Create messages\n", - " messages = [{\"role\": \"user\",\n", - " \"content\": f\"Email to notify user about: {email_markdown}\"\n", - " }]\n", - "\n", - " # Create interrupt for Agent Inbox\n", - " request = {\n", - " \"action_request\": {\n", - " \"action\": f\"Email Assistant: {state['classification_decision']}\",\n", - " \"args\": {}\n", - " },\n", - " \"config\": {\n", - " \"allow_ignore\": True, \n", - " \"allow_respond\": True,\n", - " \"allow_edit\": False, \n", - " \"allow_accept\": False, \n", - " },\n", - " # Email to show in Agent Inbox\n", - " \"description\": email_markdown,\n", - " }\n", - "\n", - " # Send to Agent Inbox and wait for response\n", - " response = interrupt([request])[0]\n", - "\n", - " # If user provides feedback, go to response agent and use feedback to respond to email \n", - " if response[\"type\"] == \"response\":\n", - " # Add feedback to messages \n", - " user_input = response[\"args\"]\n", - " messages.append({\"role\": \"user\",\n", - " \"content\": f\"User wants to reply to the email. Use this feedback to respond: {user_input}\"\n", - " })\n", - " # This is new: update triage_preferences with feedback\n", - " update_memory(store, (\"email_assistant\", \"triage_preferences\"), [{\n", - " \"role\": \"user\",\n", - " \"content\": f\"The user decided to respond to the email, so update the triage preferences to capture this.\"\n", - " }] + messages)\n", - "\n", - " goto = \"response_agent\"\n", - "\n", - " # If user ignores email, go to END\n", - " elif response[\"type\"] == \"ignore\":\n", - " # Make note of the user's decision to ignore the email\n", - " messages.append({\"role\": \"user\",\n", - " \"content\": f\"The user decided to ignore the email even though it was classified as notify. Update triage preferences to capture this.\"\n", - " })\n", - " # This is new: triage_preferences with feedback\n", - " update_memory(store, (\"email_assistant\", \"triage_preferences\"), messages)\n", - " goto = END\n", - "\n", - " # Catch all other responses\n", - " else:\n", - " raise ValueError(f\"Invalid response: {response}\")\n", - "\n", - " # Update the state \n", - " update = {\n", - " \"messages\": messages,\n", - " }\n", - "\n", - " return Command(goto=goto, update=update)" - ] - }, - { - "cell_type": "markdown", - "id": "9cd428f5", - "metadata": {}, - "source": [ - "### Incorporating Memory into LLM Responses\n", - "\n", - "Now that we have memory managers set up, we can use the stored preferences when generating responses" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a82b17a6", - "metadata": {}, - "outputs": [], - "source": [ - "def llm_call(state: State, store: BaseStore):\n", - " \"\"\"LLM decides whether to call a tool or not\"\"\"\n", - "\n", - " # Search for existing cal_preferences memory\n", - " cal_preferences = get_memory(store, (\"email_assistant\", \"cal_preferences\"), default_cal_preferences)\n", - " \n", - " # Search for existing response_preferences memory\n", - " response_preferences = get_memory(store, (\"email_assistant\", \"response_preferences\"), default_response_preferences)\n", - "\n", - " return {\n", - " \"messages\": [\n", - " llm_with_tools.invoke(\n", - " [\n", - " {\"role\": \"system\", \"content\": agent_system_prompt_hitl_memory.format(tools_prompt=HITL_MEMORY_TOOLS_PROMPT,\n", - " background=default_background,\n", - " response_preferences=response_preferences, \n", - " cal_preferences=cal_preferences)}\n", - " ]\n", - " + state[\"messages\"]\n", - " )\n", - " ]\n", - " }" - ] - }, - { - "cell_type": "markdown", - "id": "e60aff5d", - "metadata": {}, - "source": [ - "### Memory Integration in the Interrupt Handler\n", - "\n", - "Similarly, we'll add memory to the interrupt handler! " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "126d3680", - "metadata": {}, - "outputs": [], - "source": [ - "def interrupt_handler(state: State, store: BaseStore) -> Command[Literal[\"llm_call\", \"__end__\"]]:\n", - " \"\"\"Creates an interrupt for human review of tool calls\"\"\"\n", - " \n", - " # Store messages\n", - " result = []\n", - "\n", - " # Go to the LLM call node next\n", - " goto = \"llm_call\"\n", - "\n", - " # Iterate over the tool calls in the last message\n", - " for tool_call in state[\"messages\"][-1].tool_calls:\n", - " \n", - " # Allowed tools for HITL\n", - " hitl_tools = [\"write_email\", \"schedule_meeting\", \"Question\"]\n", - " \n", - " # If tool is not in our HITL list, execute it directly without interruption\n", - " if tool_call[\"name\"] not in hitl_tools:\n", - "\n", - " # Execute search_memory and other tools without interruption\n", - " tool = tools_by_name[tool_call[\"name\"]]\n", - " observation = tool.invoke(tool_call[\"args\"])\n", - " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": tool_call[\"id\"]})\n", - " continue\n", - " \n", - " # Get original email from email_input in state\n", - " email_input = state[\"email_input\"]\n", - " author, to, subject, email_thread = parse_email(email_input)\n", - " original_email_markdown = format_email_markdown(subject, author, to, email_thread)\n", - " \n", - " # Format tool call for display and prepend the original email\n", - " tool_display = format_for_display(state, tool_call)\n", - " description = original_email_markdown + tool_display\n", - "\n", - " # Configure what actions are allowed in Agent Inbox\n", - " if tool_call[\"name\"] == \"write_email\":\n", - " config = {\n", - " \"allow_ignore\": True,\n", - " \"allow_respond\": True,\n", - " \"allow_edit\": True,\n", - " \"allow_accept\": True,\n", - " }\n", - " elif tool_call[\"name\"] == \"schedule_meeting\":\n", - " config = {\n", - " \"allow_ignore\": True,\n", - " \"allow_respond\": True,\n", - " \"allow_edit\": True,\n", - " \"allow_accept\": True,\n", - " }\n", - " elif tool_call[\"name\"] == \"Question\":\n", - " config = {\n", - " \"allow_ignore\": True,\n", - " \"allow_respond\": True,\n", - " \"allow_edit\": False,\n", - " \"allow_accept\": False,\n", - " }\n", - " else:\n", - " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", - "\n", - " # Create the interrupt request\n", - " request = {\n", - " \"action_request\": {\n", - " \"action\": tool_call[\"name\"],\n", - " \"args\": tool_call[\"args\"]\n", - " },\n", - " \"config\": config,\n", - " \"description\": description,\n", - " }\n", - "\n", - " # Send to Agent Inbox and wait for response\n", - " response = interrupt([request])[0]\n", - "\n", - " # Handle the responses \n", - " if response[\"type\"] == \"accept\":\n", - "\n", - " # Execute the tool with original args\n", - " tool = tools_by_name[tool_call[\"name\"]]\n", - " observation = tool.invoke(tool_call[\"args\"])\n", - " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": tool_call[\"id\"]})\n", - " \n", - " elif response[\"type\"] == \"edit\":\n", - "\n", - " # Tool selection \n", - " tool = tools_by_name[tool_call[\"name\"]]\n", - " initial_tool_call = tool_call[\"args\"]\n", - " \n", - " # Get edited args from Agent Inbox\n", - " edited_args = response[\"args\"][\"args\"]\n", - "\n", - " # Update the AI message's tool call with edited content (reference to the message in the state)\n", - " ai_message = state[\"messages\"][-1] # Get the most recent message from the state\n", - " current_id = tool_call[\"id\"] # Store the ID of the tool call being edited\n", - " \n", - " # Create a new list of tool calls by filtering out the one being edited and adding the updated version\n", - " # This avoids modifying the original list directly (immutable approach)\n", - " updated_tool_calls = [tc for tc in ai_message.tool_calls if tc[\"id\"] != current_id] + [\n", - " {\"type\": \"tool_call\", \"name\": tool_call[\"name\"], \"args\": edited_args, \"id\": current_id}\n", - " ]\n", - "\n", - " # Create a new copy of the message with updated tool calls rather than modifying the original\n", - " # This ensures state immutability and prevents side effects in other parts of the code\n", - " result.append(ai_message.model_copy(update={\"tool_calls\": updated_tool_calls}))\n", - "\n", - " # Save feedback in memory and update the write_email tool call with the edited content from Agent Inbox\n", - " if tool_call[\"name\"] == \"write_email\":\n", - " \n", - " # Execute the tool with edited args\n", - " observation = tool.invoke(edited_args)\n", - " \n", - " # Add only the tool response message\n", - " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": current_id})\n", - "\n", - " # This is new: update the memory\n", - " update_memory(store, (\"email_assistant\", \"response_preferences\"), [{\n", - " \"role\": \"user\",\n", - " \"content\": f\"User edited the email response. Here is the initial email generated by the assistant: {initial_tool_call}. Here is the edited email: {edited_args}. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", - " }])\n", - " \n", - " # Save feedback in memory and update the schedule_meeting tool call with the edited content from Agent Inbox\n", - " elif tool_call[\"name\"] == \"schedule_meeting\":\n", - " \n", - " # Execute the tool with edited args\n", - " observation = tool.invoke(edited_args)\n", - " \n", - " # Add only the tool response message\n", - " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": current_id})\n", - "\n", - " # This is new: update the memory\n", - " update_memory(store, (\"email_assistant\", \"cal_preferences\"), [{\n", - " \"role\": \"user\",\n", - " \"content\": f\"User edited the calendar invitation. Here is the initial calendar invitation generated by the assistant: {initial_tool_call}. Here is the edited calendar invitation: {edited_args}. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", - " }])\n", - " \n", - " # Catch all other tool calls\n", - " else:\n", - " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", - "\n", - " elif response[\"type\"] == \"ignore\":\n", - "\n", - " if tool_call[\"name\"] == \"write_email\":\n", - " # Don't execute the tool, and tell the agent how to proceed\n", - " result.append({\"role\": \"tool\", \"content\": \"User ignored this email draft. Ignore this email and end the workflow.\", \"tool_call_id\": tool_call[\"id\"]})\n", - " # Go to END\n", - " goto = END\n", - " # This is new: update the memory\n", - " update_memory(store, (\"email_assistant\", \"triage_preferences\"), state[\"messages\"] + result + [{\n", - " \"role\": \"user\",\n", - " \"content\": f\"The user ignored the email draft. That means they did not want to respond to the email. Update the triage preferences to ensure emails of this type are not classified as respond. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", - " }])\n", - "\n", - " elif tool_call[\"name\"] == \"schedule_meeting\":\n", - " # Don't execute the tool, and tell the agent how to proceed\n", - " result.append({\"role\": \"tool\", \"content\": \"User ignored this calendar meeting draft. Ignore this email and end the workflow.\", \"tool_call_id\": tool_call[\"id\"]})\n", - " # Go to END\n", - " goto = END\n", - " # This is new: update the memory\n", - " update_memory(store, (\"email_assistant\", \"triage_preferences\"), state[\"messages\"] + result + [{\n", - " \"role\": \"user\",\n", - " \"content\": f\"The user ignored the calendar meeting draft. That means they did not want to schedule a meeting for this email. Update the triage preferences to ensure emails of this type are not classified as respond. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", - " }])\n", - "\n", - " elif tool_call[\"name\"] == \"Question\":\n", - " # Don't execute the tool, and tell the agent how to proceed\n", - " result.append({\"role\": \"tool\", \"content\": \"User ignored this question. Ignore this email and end the workflow.\", \"tool_call_id\": tool_call[\"id\"]})\n", - " # Go to END\n", - " goto = END\n", - " # This is new: update the memory\n", - " update_memory(store, (\"email_assistant\", \"triage_preferences\"), state[\"messages\"] + result + [{\n", - " \"role\": \"user\",\n", - " \"content\": f\"The user ignored the Question. That means they did not want to answer the question or deal with this email. Update the triage preferences to ensure emails of this type are not classified as respond. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", - " }])\n", - "\n", - " else:\n", - " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", - "\n", - " elif response[\"type\"] == \"response\":\n", - " # User provided feedback\n", - " user_feedback = response[\"args\"]\n", - " if tool_call[\"name\"] == \"write_email\":\n", - " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", - " result.append({\"role\": \"tool\", \"content\": f\"User gave feedback, which can we incorporate into the email. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", - " # This is new: update the memory\n", - " update_memory(store, (\"email_assistant\", \"response_preferences\"), state[\"messages\"] + result + [{\n", - " \"role\": \"user\",\n", - " \"content\": f\"User gave feedback, which we can use to update the response preferences. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", - " }])\n", - "\n", - " elif tool_call[\"name\"] == \"schedule_meeting\":\n", - " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", - " result.append({\"role\": \"tool\", \"content\": f\"User gave feedback, which can we incorporate into the meeting request. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", - " # This is new: update the memory\n", - " update_memory(store, (\"email_assistant\", \"cal_preferences\"), state[\"messages\"] + result + [{\n", - " \"role\": \"user\",\n", - " \"content\": f\"User gave feedback, which we can use to update the calendar preferences. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", - " }])\n", - "\n", - " elif tool_call[\"name\"] == \"Question\":\n", - " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", - " result.append({\"role\": \"tool\", \"content\": f\"User answered the question, which can we can use for any follow up actions. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", - "\n", - " else:\n", - " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", - "\n", - " # Update the state \n", - " update = {\n", - " \"messages\": result,\n", - " }\n", - "\n", - " return Command(goto=goto, update=update)" - ] - }, - { - "cell_type": "markdown", - "id": "ecedcaec", - "metadata": {}, - "source": [ - "The rest is the same as before!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7041f50d", - "metadata": {}, - "outputs": [], - "source": [ - "from email_assistant.utils import show_graph\n", - "\n", - "# Conditional edge function\n", - "def should_continue(state: State, store: BaseStore) -> Literal[\"interrupt_handler\", END]:\n", - " \"\"\"Route to tool handler, or end if Done tool called\"\"\"\n", - " messages = state[\"messages\"]\n", - " last_message = messages[-1]\n", - " if last_message.tool_calls:\n", - " for tool_call in last_message.tool_calls: \n", - " if tool_call[\"name\"] == \"Done\":\n", - " return END\n", - " else:\n", - " return \"interrupt_handler\"\n", - "\n", - "# Build workflow\n", - "agent_builder = StateGraph(State)\n", - "\n", - "# Add nodes - with store parameter\n", - "agent_builder.add_node(\"llm_call\", llm_call)\n", - "agent_builder.add_node(\"interrupt_handler\", interrupt_handler)\n", - "\n", - "# Add edges\n", - "agent_builder.add_edge(START, \"llm_call\")\n", - "agent_builder.add_conditional_edges(\n", - " \"llm_call\",\n", - " should_continue,\n", - " {\n", - " \"interrupt_handler\": \"interrupt_handler\",\n", - " END: END,\n", - " },\n", - ")\n", - "\n", - "# Compile the agent\n", - "response_agent = agent_builder.compile()\n", - "\n", - "# Build overall workflow with store and checkpointer\n", - "overall_workflow = (\n", - " StateGraph(State, input=StateInput)\n", - " .add_node(triage_router)\n", - " .add_node(triage_interrupt_handler)\n", - " .add_node(\"response_agent\", response_agent)\n", - " .add_edge(START, \"triage_router\")\n", - ")\n", - "\n", - "email_assistant = overall_workflow.compile()\n", - "show_graph(email_assistant)" - ] - }, - { - "cell_type": "markdown", - "id": "43747219", - "metadata": {}, - "source": [ - "## Testing the agent with memory\n", - "\n", - "Now that we've implemented memory into our email assistant, let's test how the system learns from user feedback and adapts over time. This testing section explores how different types of user interactions create distinct memory updates that improve the assistant's future performance.\n", - "\n", - "The key questions we're answering through these tests:\n", - "1. How does the system capture and store user preferences?\n", - "2. How do these stored preferences affect future decisions?\n", - "3. What patterns of interaction lead to which types of memory updates?\n", - "\n", - "First, let's build a helper function to display memory content so we can track how it evolves throughout our tests:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "59079929", - "metadata": {}, - "outputs": [], - "source": [ - "import uuid \n", - "from langgraph.checkpoint.memory import MemorySaver\n", - "from langgraph.types import Command\n", - "from langgraph.store.memory import InMemoryStore\n", - "\n", - "# Helper function to display memory content\n", - "def display_memory_content(store, namespace=None):\n", - " # Display current memory content for all namespaces\n", - " print(\"\\n======= CURRENT MEMORY CONTENT =======\")\n", - " if namespace:\n", - " memory = store.get(namespace, \"user_preferences\")\n", - " if memory:\n", - " print(f\"\\n--- {namespace[1]} ---\")\n", - " print({\"preferences\": memory.value})\n", - " else:\n", - " print(f\"\\n--- {namespace[1]} ---\")\n", - " print(\"No memory found\")\n", - " else:\n", - " for namespace in [\n", - " (\"email_assistant\", \"triage_preferences\"),\n", - " (\"email_assistant\", \"response_preferences\"),\n", - " (\"email_assistant\", \"cal_preferences\"),\n", - " (\"email_assistant\", \"background\")\n", - " ]:\n", - " memory = store.get(namespace, \"user_preferences\")\n", - " if memory:\n", - " print(f\"\\n--- {namespace[1]} ---\")\n", - " print({\"preferences\": memory.value})\n", - " else:\n", - " print(f\"\\n--- {namespace[1]} ---\")\n", - " print(\"No memory found\")\n", - " print(\"=======================================\\n\")" - ] - }, - { - "cell_type": "markdown", - "id": "397114bf", - "metadata": {}, - "source": [ - "### Accept `write_email` and `schedule_meeting`\n", - "\n", - "Our first test examines what happens when a user accepts the agent's actions without modification. This baseline case helps us understand how the system behaves when no feedback is provided:\n", - "\n", - "1. We'll use the same tax planning email from our previous tests\n", - "2. The system will classify it as \"RESPOND\" and propose scheduling a meeting\n", - "3. We'll accept the meeting schedule without changes\n", - "4. The agent will generate an email confirming the meeting\n", - "5. We'll accept the email without changes\n", - "\n", - "This test demonstrates the default behavior of our memory-enabled system. When a user simply accepts proposed actions, we expect minimal or no memory updates since there's no explicit feedback to learn from. However, the system will still leverage existing memory (if any) when generating its responses." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "649cee4f", - "metadata": {}, - "outputs": [], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "store = InMemoryStore()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", - "thread_id_1 = uuid.uuid4()\n", - "thread_config_1 = {\"configurable\": {\"thread_id\": thread_id_1}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_1):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after first interrupt\n", - "display_memory_content(store)" - ] - }, - { - "cell_type": "markdown", - "id": "878e199e", - "metadata": {}, - "source": [ - "Accept the `schedule_meeting` tool call\n", - "\n", - "As we examine the initial `schedule_meeting` proposal, note how the system uses existing memory to inform its decisions:\n", - "\n", - "1. The default calendar preferences show a preference for 30-minute meetings, though the email requests 45 minutes\n", - "2. The agent still proposes a 45-minute meeting, respecting the sender's specific request\n", - "3. We accept this proposal without modification to see if simple acceptance triggers any memory updates\n", - "\n", - "After running this step, we'll check the memory contents to confirm whether acceptance alone leads to memory updates. Simple acceptance represents the baseline user experience - the system works as intended without requiring adjustments." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9589423b", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "e6b80f99", - "metadata": {}, - "source": [ - "Accept the `write_email` tool call\n", - "\n", - "Now we'll accept the email draft that confirms the meeting scheduling:\n", - "\n", - "1. The email draft is generated with knowledge of our calendar preferences\n", - "2. It includes details about the meeting time, duration, and purpose\n", - "3. We'll accept it without changes to complete the baseline test case\n", - "\n", - "After accepting, we'll check all memory stores to see if any updates occurred. As expected, simply accepting the agent's proposals doesn't provide strong learning signals - there's no clear feedback about what the user likes or dislikes about the agent's approach.\n", - "\n", - "The trace link shows the complete workflow execution, where we can see that the memory is used in the LLM call for response generation, but no memory updates occur, which is the expected behavior for simple acceptances." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "12035cf6", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after accepting the write_email tool call\n", - "display_memory_content(store)" - ] - }, - { - "cell_type": "markdown", - "id": "fcbc178d", - "metadata": {}, - "source": [ - "We can look at the full messages, and the trace: \n", - "\n", - "https://smith.langchain.com/public/86ff6474-29fe-452e-8829-b05a91b458eb/r\n", - "\n", - "You'll notice that memory is used in the LLM call to respond. \n", - "\n", - "But the memory store is *not* updated, because we haven't added any feedback via HITL." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10ce8197", - "metadata": {}, - "outputs": [], - "source": [ - "state = graph.get_state(thread_config_1)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "58201a21", - "metadata": {}, - "source": [ - "### Edit `write_email` and `schedule_meeting`\n", - "\n", - "This test explores how the system learns from direct edits to its proposed actions. When users modify the agent's suggestions, it creates clear, specific learning signals about their preferences:\n", - "\n", - "1. We'll use the same tax planning email as before\n", - "2. When the agent proposes a 45-minute meeting, we'll edit it to:\n", - " - Change the duration to 30 minutes (matching our stored preference)\n", - " - Make the subject line more concise\n", - "3. When the agent drafts an email, we'll edit it to be:\n", - " - Shorter and less formal\n", - " - Structured differently\n", - "\n", - "Edits provide the most explicit feedback about user preferences, letting the system learn exactly what changes are desired. We expect to see specific, targeted updates to our memory stores that reflect these edits." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ac260423", - "metadata": {}, - "outputs": [], - "source": [ - "# Same email as before\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph with new thread\n", - "checkpointer = MemorySaver()\n", - "store = InMemoryStore()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", - "thread_id_2 = uuid.uuid4()\n", - "thread_config_2 = {\"configurable\": {\"thread_id\": thread_id_2}}\n", - "\n", - "# Run the graph until the first interrupt - will be classified as \"respond\" and the agent will create a write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_2):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after first interrupt\n", - "display_memory_content(store,(\"email_assistant\", \"cal_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "5d73ba71", - "metadata": {}, - "source": [ - "Edit the `schedule_meeting` tool call\n", - "\n", - "When we edit the meeting proposal, we're providing direct, explicit feedback about our preferences. This creates a significant learning opportunity for the system:\n", - "\n", - "1. The agent initially proposes a 45-minute meeting (the duration requested in the email)\n", - "2. We edit it to 30 minutes and simplify the subject from \"Tax Planning Strategies Discussion\" to \"Tax Planning Discussion\"\n", - "3. This creates clear, specific feedback about our time preferences and naming conventions\n", - "\n", - "After the edit, we'll check the calendar preferences memory store to see how it's updated. The memory update should capture both:\n", - "- Our preference for shorter 30-minute meetings\n", - "- Our preference for more concise meeting subjects\n", - "\n", - "The trace reveals the precise memory update logic, showing how the system analyzes the difference between its proposal and our edits to extract meaningful patterns and preferences. We can see the detailed justification for each memory update, ensuring transparency in the learning process." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "af760977", - "metadata": {}, - "outputs": [], - "source": [ - "# Now simulate user editing the schedule_meeting tool call\n", - "print(\"\\nSimulating user editing the schedule_meeting tool call...\")\n", - "edited_schedule_args = {\n", - " \"attendees\": [\"pm@client.com\", \"lance@company.com\"],\n", - " \"subject\": \"Tax Planning Discussion\",\n", - " \"duration_minutes\": 30, # Changed from 45 to 30\n", - " \"preferred_day\": \"2025-04-22\",\n", - " \"start_time\": 14 \n", - "}\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_schedule_args}}]), config=thread_config_2):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after editing schedule_meeting\n", - "print(\"\\nChecking memory after editing schedule_meeting:\")\n", - "display_memory_content(store,(\"email_assistant\", \"cal_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "bbbb324f", - "metadata": { - "vscode": { - "languageId": "raw" - } - }, - "source": [ - "```\n", - "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", - "```\n", - "\n", - "```\n", - "{'preferences': \"30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n\\nUser prefers 30 minute meetings over longer durations such as 45 minutes. When scheduling, default to 30 minutes unless otherwise specified. Subject lines should be concise (e.g., 'Tax Planning Discussion' instead of 'Tax Planning Strategies Discussion').\"}\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "0dfc585a", - "metadata": {}, - "source": [ - "Looking at the memory after editing the calendar invitation, we can see that it's been updated:\n", - "\n", - "1. The system has identified that we prefer 30-minute meetings over longer durations\n", - "2. It's also captured our preference for concise meeting subjects\n", - "\n", - "What's particularly impressive about this memory update is:\n", - "- It doesn't just record our specific edit, but generalizes to a broader preference pattern\n", - "- It preserves all existing memory content while adding the new information\n", - "- It extracts multiple preference signals from a single edit interaction\n", - "\n", - "Now, let's edit the email draft to see how the system captures different types of communication preferences:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81a1fa37", - "metadata": {}, - "outputs": [], - "source": [ - "display_memory_content(store,(\"email_assistant\", \"response_preferences\"))\n", - "# Now simulate user editing the write_email tool call\n", - "print(\"\\nSimulating user editing the write_email tool call...\")\n", - "edited_email_args = {\n", - " \"to\": \"pm@client.com\",\n", - " \"subject\": \"Re: Tax season let's schedule call\",\n", - " \"content\": \"Thanks! I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\\n\\nBest regards,\\nLance Martin\"\n", - "}\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_email_args}}]), config=thread_config_2):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after editing write_email\n", - "print(\"\\nChecking memory after editing write_email:\")\n", - "display_memory_content(store,(\"email_assistant\", \"response_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "3ffbd5f9", - "metadata": {}, - "source": [ - "Our email edit reveals even more sophisticated learning capabilities:\n", - "\n", - "1. We've dramatically shortened and simplified the email content\n", - "2. We've changed the tone to be more casual\n", - "3. We've added a question asking for confirmation rather than assuming the time works\n", - "4. We've slightly altered the meeting details (day and time)\n", - "\n", - "Looking at the updated memory, we can see that the system has extracted a key insight about our communication style:\n", - "\n", - "```\n", - "When scheduling a meeting, ask the recipient to confirm if the proposed time works for them, rather than assuming and stating the meeting is already scheduled.\n", - "```\n", - "\n", - "This demonstrates the system's ability to:\n", - "- Analyze our edit not just at a superficial level, but to understand intent\n", - "- Extract generalizable principles from specific examples\n", - "- Preserve all existing guidance while adding new insights\n", - "- Maintain the organization and structure of the memory\n", - "\n", - "These targeted, high-quality memory updates will improve all future interactions without requiring repeated corrections." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8ad818d6", - "metadata": {}, - "outputs": [], - "source": [ - "state = graph.get_state(thread_config_2)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "6d92a42b", - "metadata": {}, - "source": [ - "### Respond (with feedback) `write_email`, `schedule_meeting`, and `question`\n", - "\n", - "Our final test set explores the \"response\" feedback pattern - providing guidance without directly editing or accepting. This conversational feedback mechanism offers a middle ground between acceptance and editing:\n", - "\n", - "1. First, we'll test feedback for meeting scheduling by requesting:\n", - " - Shorter duration (30 minutes instead of 45)\n", - " - Afternoon meeting times (after 2pm)\n", - " \n", - "2. Next, we'll test feedback for email drafting by requesting:\n", - " - Shorter, less formal language\n", - " - A specific closing statement about looking forward to the meeting\n", - " \n", - "3. Finally, we'll test feedback for questions by providing:\n", - " - A direct answer with additional context\n", - " - Specific preferences (brunch location, time)\n", - "\n", - "This natural language feedback approach lets users guide the assistant without having to do the work themselves. We expect to see detailed memory updates that extract the general principles from our specific feedback." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "07676231", - "metadata": {}, - "outputs": [], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "store = InMemoryStore()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", - "thread_id_5 = uuid.uuid4()\n", - "thread_config_5 = {\"configurable\": {\"thread_id\": thread_id_5}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after first interrupt \n", - "display_memory_content(store, (\"email_assistant\", \"cal_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "b85fc45d", - "metadata": {}, - "source": [ - "Provide feedback for the `schedule_meeting` tool call\n", - "\n", - "Instead of directly editing the meeting proposal or simply accepting it, we'll provide natural language feedback:\n", - "\n", - "1. We request a 30-minute meeting instead of 45 minutes\n", - "2. We express a preference for afternoon meetings after 2pm\n", - "3. The system must interpret this feedback and generate a new proposal\n", - "\n", - "This conversational approach is often more natural and efficient than direct editing, especially for mobile users or those who prefer to give high-level direction rather than detailed edits.\n", - "\n", - "After providing feedback, we'll examine the calendar preferences memory to see how this natural language guidance is captured. We expect to see the system extract both the meeting duration and time-of-day preferences as general principles." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "30a151f1", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Please schedule this for 30 minutes instead of 45 minutes, and I prefer afternoon meetings after 2pm.\"}]), config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after providing feedback for schedule_meeting\n", - "print(\"\\nChecking memory after providing feedback for schedule_meeting:\")\n", - "display_memory_content(store, (\"email_assistant\", \"cal_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "8088757c", - "metadata": {}, - "source": [ - "Our memory check after providing feedback shows an elegantly simple calendar preference update:\n", - "\n", - "```\n", - "30 minute meetings are preferred, but 15 minute meetings are also acceptable.\n", - "Afternoon meetings after 2pm are preferred.\n", - "```\n", - "\n", - "The system has:\n", - "1. Captured both aspects of our feedback (duration and time of day)\n", - "2. Preserved the existing preference about 15-minute meetings\n", - "3. Added our preference for afternoon meetings after 2pm as a new line\n", - "4. Kept the format clean and readable\n", - "\n", - "This natural language feedback mechanism creates the same quality of memory updates as direct editing but requires less effort from the user. The system is able to extract structured preferences from unstructured feedback, showing its ability to learn from conversational interactions.\n", - "\n", - "Let's accept this revised meeting proposal and move to the email draft:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "545063be", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after accepting schedule_meeting after feedback\n", - "print(\"\\nChecking memory after accepting schedule_meeting after feedback:\")\n", - "display_memory_content(store, (\"email_assistant\", \"response_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "e72ede94", - "metadata": {}, - "source": [ - "Now provide feedback for the `write_email` tool call\n", - "\n", - "Similar to our meeting feedback, we'll now provide natural language guidance for the email draft:\n", - "\n", - "1. We request \"shorter and less formal\" language - a style preference\n", - "2. We ask for a specific closing statement about looking forward to the meeting\n", - "3. The system must interpret this guidance and rewrite the email accordingly\n", - "\n", - "After providing this feedback, we'll check the response preferences memory to see how these style and structure preferences are captured. We expect to see generalizable guidelines about email brevity, formality, and closing statements added to our preference profile." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9831ad2d", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Shorter and less formal. Include a closing statement about looking forward to the meeting!\"}]), config=thread_config_5):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after providing feedback for write_email\n", - "print(\"\\nChecking memory after providing feedback for write_email:\")\n", - "display_memory_content(store, (\"email_assistant\", \"response_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "b5b360a2", - "metadata": {}, - "source": [ - "The memory update after our email feedback shows highly sophisticated learning about both meeting scheduling and email writing preferences:\n", - "\n", - "1. The system has added a complete new section to the response preferences entitled \"When writing email responses\" with two key preferences:\n", - " - \"Favor shorter and less formal language when possible, unless the context requires formality\"\n", - " - \"Include a closing statement expressing that you look forward to the meeting or conversation when confirming appointments\"\n", - "\n", - "2. It has also added a new bullet point to the \"When responding to meeting scheduling requests\" section:\n", - " - \"When scheduling meetings, prefer afternoon times after 2pm when possible, and default to 30-minute durations unless otherwise specified\"\n", - "\n", - "This demonstrates the system's ability to:\n", - "- Organize learned preferences into appropriate categories\n", - "- Extract multiple insights from a single feedback instance\n", - "- Apply meeting preferences to both calendar and email contexts\n", - "- Capture nuance with appropriate qualifiers (\"when possible,\" \"unless otherwise specified\")\n", - "- Maintain the hierarchical structure of the memory\n", - "\n", - "The resulting email shows all these preferences applied: it's shorter, less formal, includes a closing statement about looking forward to the chat, and correctly references the 30-minute meeting time." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8c64999e", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after accepting write_email after feedback\n", - "print(\"\\nChecking memory after accepting write_email after feedback:\")\n", - "display_memory_content(store, (\"email_assistant\", \"response_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "f85e63cb", - "metadata": {}, - "source": [ - "Look at the full message history." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fa9cf91d", - "metadata": {}, - "outputs": [], - "source": [ - "state = graph.get_state(thread_config_5)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "80ac9df0-cd39-4c32-a073-c2482d9554b6", - "metadata": {}, - "source": [ - "## Local Deployment\n", - "\n", - "You can find this graph with memory integration in the `src/email_assistant` directory:\n", - "\n", - "* `src/email_assistant/email_assistant_hitl_memory.py`" - ] - }, - { - "cell_type": "markdown", - "id": "5a4aa8b0-f8b7-4197-8701-87dda60daa26", - "metadata": {}, - "source": [ - "Email to test: \n", - "```\n", - "{\n", - " \"author\": \"Alice Smith \",\n", - " \"to\": \"John Doe \",\n", - " \"subject\": \"Quick question about API documentation\",\n", - " \"email_thread\": \"Hi John,\\nI was reviewing the API documentation for the new authentication service and noticed a few endpoints seem to be missing from the specs. Could you help clarify if this was intentional or if we should update the docs?\\nSpecifically, I'm looking at:\\n- /auth/refresh\\n- /auth/validate\\nThanks!\\nAlice\"\n", - "}\n", - "```\n", - "\n", - "Testing this locally gives you the full experience of a memory-enabled HITL system:\n", - "\n", - "1. **Start the local server**: Run `langgraph dev` to launch the agent locally\n", - "2. **Connect Agent Inbox**: Use the graph URL from the `langgraph.json` file\n", - "3. **Submit test emails**: Try different email types to see classification in action\n", - "4. **Provide various feedback types**: Try accepting, editing, ignoring, and responding\n", - "5. **Observe memory evolution**: Check the Memory tab in LangGraph Studio to see changes\n", - "\n", - "![inbox](img/agent-inbox-edit.png)" - ] - }, - { - "cell_type": "markdown", - "id": "b075a3ea", - "metadata": {}, - "source": [ - "The Memory tab in LangGraph Studio offers a real-time view of how your preferences are being captured and updated with each interaction:\n", - "\n", - "![studio-img](img/memory-studio.png)\n", - "\n", - "Through continued use, the system becomes increasingly personalized:\n", - "- It learns which emails you want to respond to, be notified about, or ignore\n", - "- It adapts to your communication style preferences\n", - "- It remembers your scheduling preferences\n", - "- It refines its understanding with each interaction\n", - "\n", - "This combination of HITL and memory creates a system that balances automation with control - handling routine tasks automatically while learning from your feedback to become more aligned with your preferences over time." - ] - }, - { - "cell_type": "markdown", - "id": "f2ad7580", - "metadata": {}, - "source": [ - "## Hosted Deployment with Gmail Tools\n", - "\n", - "If you want to actually run this on your own email, you can deploy the graph with Gmail tools. \n", - "\n", - "Set up your Gmail credentials [following here](https://github.com/langchain-ai/agents-from-scratch/blob/main/src/email_assistant/tools/gmail/README.md).\n", - "\n", - "There is a graph set up with Gmail tools:\n", - "\n", - "```shell\n", - "python src/email_assistant/email_assistant_hitl_memory_gmail.py\n", - "```\n", - "\n", - "[One of the deployment options is `hosted`](https://langchain-ai.github.io/langgraph/tutorials/deployment/#other-deployment-options), and you can simply connect the deployed graph URL to the Agent Inbox as done with the local deployment.\n", - "\n", - "## Improving Memory \n", - "\n", - "Our current memory schema and updating is extremely simple: \n", - "\n", - "* Our schema is a string\n", - "* We always overwrite the existing memory with a new string\n", - " \n", - "The store can be easily [configured for semantic search](https://langchain-ai.github.io/langgraph/cloud/deployment/semantic_search/) over a collection of memories. \n", - "\n", - "Also consider using [LangMem](https://langchain-ai.github.io/langmem/) for more advanced memory management." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "846dbb9b-5c9a-4236-912e-02b3d9f674f6", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "jupytext": { - "cell_metadata_filter": "-all", - "main_language": "python", - "notebook_metadata_filter": "-all" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} +{"cells": [{"cell_type": "markdown", "id": "d047044f", "metadata": {}, "source": ["# Agents with Memory\n", "\n", "We have an email assistant that uses a router to triage emails and then passes the email to the agent for response generation. We've also evaluated it and added human-in-the-loop (HITL) to review specific tool calls. Now, we add memory, giving our assistant the ability to remember our HITL feedback!\n", "\n", "![overview-img](img/overview_memory.png)"]}, {"cell_type": "markdown", "id": "143094b4", "metadata": {}, "source": ["#### Load Environment Variables"]}, {"cell_type": "code", "execution_count": null, "id": "085c21ad", "metadata": {}, "outputs": [], "source": ["from dotenv import load_dotenv\n", "load_dotenv(\"../.env\")"]}, {"cell_type": "markdown", "id": "b99855af", "metadata": {}, "source": ["## Memory in LangGraph\n", "\n", "### Thread-Scoped and Across-Thread Memory\n", "\n", "First, it's worth explaining how [memory works in LangGraph](https://langchain-ai.github.io/langgraph/concepts/memory/). LangGraph offers two distinct types of memory that serve complementary purposes:\n", "\n", "**Thread-Scoped Memory (Short-term)** operates within the boundaries of a single conversation thread. It's automatically managed as part of the graph's state and persisted through thread-scoped checkpoints. This memory type retains conversation history, uploaded files, retrieved documents, and other artifacts generated during the interaction. Think of it as the working memory that maintains context within one specific conversation, allowing the agent to reference earlier messages or actions without starting from scratch each time.\n", "\n", "**Across-Thread Memory (Long-term)** extends beyond individual conversations, creating a persistent knowledge base that spans multiple sessions. This memory is stored as JSON documents in a memory store, organized by namespaces (like folders) and distinct keys (like filenames). Unlike thread-scoped memory, this information persists even after conversations end, enabling the system to recall user preferences, past decisions, and accumulated knowledge. This is what allows an agent to truly learn and adapt over time, rather than treating each interaction as isolated.\n", "\n", "![short-vs-long-term-memory](img/short-vs-long.png)\n", "\n", "The [Store](https://langchain-ai.github.io/langgraph/reference/store/#langgraph.store.base.BaseStore) is the foundation of this architecture, providing a flexible database where memories can be organized, retrieved, and updated. What makes this approach powerful is that regardless of which memory type you're working with, the same Store interface provides consistent access patterns. This allows your agent's code to remain unchanged whether you're using a simple in-memory implementation during development or a production-grade database in deployment. \n", "\n", "### LangGraph Store\n", "\n", "LangGraph offers different [Store implementations](https://langchain-ai.github.io/langgraph/reference/store/#langgraph.store.base.BaseStore) depending on your [deployment](https://langchain-ai.github.io/langgraph/tutorials/deployment/#other-deployment-options):\n", "\n", "1. **In-Memory (e.g., notebooks)**:\n", " - Uses `from langgraph.store.memory import InMemoryStore`\n", " - Purely a Python dictionary in memory with no persistence\n", " - Data is lost when the process terminates\n", " - Useful for quick experiments and testing\n", " - Semantic search can be configured as shown [here](https://langchain-ai.github.io/langgraph/how-tos/memory/semantic-search/)\n", "\n", "2. **Local Development with `langgraph dev`**:\n", " - Similar to InMemoryStore but with pseudo-persistence\n", " - Data is pickled to the local filesystem between restarts\n", " - Lightweight and fast, no need for external databases\n", " - Semantic search can be configured as shown [here](https://langchain-ai.github.io/langgraph/cloud/deployment/semantic_search/)\n", " - Great for development but not designed for production use\n", "\n", "3. **LangGraph Platform or Production Deployments**:\n", " - Uses PostgreSQL with pgvector for production-grade persistence\n", " - Fully persistent data storage with reliable backups\n", " - Scalable for larger datasets\n", " - Semantic search can be configured as shown [here](https://langchain-ai.github.io/langgraph/cloud/deployment/semantic_search/)\n", " - Default distance metric is cosine similarity (customizable)\n", "\n", "Let's use the `InMemoryStore` here in the notebook! "]}, {"cell_type": "code", "execution_count": null, "id": "7fa1dda7", "metadata": {}, "outputs": [], "source": ["from langgraph.store.memory import InMemoryStore\n", "in_memory_store = InMemoryStore()"]}, {"cell_type": "markdown", "id": "aceb204c", "metadata": {}, "source": ["Memories are namespaced by a tuple, which in this specific example will be (``, \"memories\"). The namespace can be any length and represent anything, it does not have to be user specific."]}, {"cell_type": "code", "execution_count": null, "id": "f0488a5f", "metadata": {}, "outputs": [], "source": ["user_id = \"1\"\n", "namespace_for_memory = (user_id, \"memories\")"]}, {"cell_type": "markdown", "id": "3da8b303", "metadata": {}, "source": ["We use the `store.put` method to save memories to our namespace in the store. When we do this, we specify the namespace, as defined above, and a key-value pair for the memory: the key is simply a unique identifier for the memory (memory_id) and the value (a dictionary) is the memory itself."]}, {"cell_type": "code", "execution_count": null, "id": "4af95b32", "metadata": {}, "outputs": [], "source": ["import uuid\n", "memory_id = str(uuid.uuid4())\n", "memory = {\"food_preference\" : \"I like pizza\"}\n", "in_memory_store.put(namespace_for_memory, memory_id, memory)"]}, {"cell_type": "markdown", "id": "60408492", "metadata": {}, "source": ["We can read out memories in our namespace using the `store.search` method, which will return all memories for a given user as a list. The most recent memory is the last in the list. Each memory type is a Python class (`Item`) with certain attributes. We can access it as a dictionary by converting via `.dict`. The attributes it has are shown below, but the most important one is typically `value`."]}, {"cell_type": "code", "execution_count": null, "id": "4c25f5ef", "metadata": {}, "outputs": [], "source": ["memories = in_memory_store.search(namespace_for_memory)\n", "memories[-1].dict()"]}, {"cell_type": "markdown", "id": "44f3e781", "metadata": {}, "source": ["To use this in a graph, all we need to do is compile the graph with the store:"]}, {"cell_type": "code", "execution_count": null, "id": "6476b361", "metadata": {}, "outputs": [], "source": ["# We need this because we want to enable threads (conversations)\n", "from langgraph.checkpoint.memory import InMemorySaver\n", "checkpointer = InMemorySaver()\n", "# We need this because we want to enable across-thread memory\n", "from langgraph.store.memory import InMemoryStore\n", "in_memory_store = InMemoryStore()\n", "# Compile the graph with the checkpointer and store\n", "# graph = graph.compile(checkpointer=checkpointer, store=in_memory_store)"]}, {"cell_type": "markdown", "id": "9c982928", "metadata": {}, "source": ["The store is then accessible in any node of the graph, as we'll see below!\n", "\n", "## Adding Memory to our Assistant\n", "\n", "Let's take our graph with HITL and add memory to it. This will be very similar to what we had previously. We'll simply update memory in the store when we get feedback from the user.\n", "\n", "![overview-img](img/HITL_flow_memory.png)"]}, {"cell_type": "code", "execution_count": null, "id": "38308fc3", "metadata": {}, "outputs": [], "source": ["%load_ext autoreload\n", "%autoreload 2\n", "\n", "from dotenv import load_dotenv\n", "load_dotenv(\".env\", override=True)\n", "\n", "from typing import Literal\n", "from datetime import datetime\n", "from pydantic import BaseModel\n", "\n", "from langchain.chat_models import init_chat_model\n", "from langchain_core.tools import tool\n", "\n", "from langgraph.graph import StateGraph, START, END\n", "from langgraph.store.base import BaseStore\n", "from langgraph.types import interrupt, Command\n", "\n", "from email_assistant.prompts import triage_system_prompt, triage_user_prompt, agent_system_prompt_hitl_memory, default_triage_instructions, default_background, default_response_preferences, default_cal_preferences\n", "from email_assistant.tools.default.prompt_templates import HITL_MEMORY_TOOLS_PROMPT\n", "from email_assistant.schemas import State, RouterSchema, StateInput\n", "from email_assistant.utils import parse_email, format_for_display, format_email_markdown\n", "\n", "# Agent tools \n", "@tool\n", "def write_email(to: str, subject: str, content: str) -> str:\n", " \"\"\"Write and send an email.\"\"\"\n", " # Placeholder response - in real app would send email\n", " return f\"Email sent to {to} with subject '{subject}' and content: {content}\"\n", "\n", "@tool\n", "def schedule_meeting(\n", " attendees: list[str], subject: str, duration_minutes: int, preferred_day: datetime, start_time: int\n", ") -> str:\n", " \"\"\"Schedule a calendar meeting.\"\"\"\n", " # Placeholder response - in real app would check calendar and schedule\n", " date_str = preferred_day.strftime(\"%A, %B %d, %Y\")\n", " return f\"Meeting '{subject}' scheduled on {date_str} at {start_time} for {duration_minutes} minutes with {len(attendees)} attendees\"\n", "\n", "@tool\n", "def check_calendar_availability(day: str) -> str:\n", " \"\"\"Check calendar availability for a given day.\"\"\"\n", " # Placeholder response - in real app would check actual calendar\n", " return f\"Available times on {day}: 9:00 AM, 2:00 PM, 4:00 PM\"\n", "\n", "@tool\n", "class Question(BaseModel):\n", " \"\"\"Question to ask user.\"\"\"\n", " content: str\n", "\n", "@tool\n", "class Done(BaseModel):\n", " \"\"\"E-mail has been sent.\"\"\"\n", " done: bool\n", " \n", "# All tools available to the agent\n", "tools = [\n", " write_email, \n", " schedule_meeting, \n", " check_calendar_availability, \n", " Question, \n", " Done\n", "]\n", "\n", "tools_by_name = {tool.name: tool for tool in tools}\n", "\n", "# Initialize the LLM for use with router / structured output\n", "llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0)\n", "llm_router = llm.with_structured_output(RouterSchema) \n", "\n", "# Initialize the LLM, enforcing tool use (of any available tools) for agent\n", "llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0)\n", "llm_with_tools = llm.bind_tools(tools, tool_choice=\"required\")"]}, {"cell_type": "markdown", "id": "03538f56", "metadata": {}, "source": ["Now, this is the critical part! We currently don't capture any feedback from the user in our graph. \n", "\n", "### Memory Management \n", "\n", "What we *want* to do is fairly straightforward: we want to add the feedback to the memory `Store`. If we compile our graph with the `Store`, we can access it in any node. So that is not a problem! \n", "\n", "But we have to answer two questions: \n", "\n", "1) how do we want the memory to be structured?\n", "2) how do we want to update the memory?\n", "\n", "For 1) we'll just store memories as string to keep things simple. In the below function, we'll just fetch memories from the store as string and initialize with default if it doesn't exist."]}, {"cell_type": "code", "execution_count": null, "id": "d2715152-2d19-4449-be4b-fdc602eee52d", "metadata": {}, "outputs": [], "source": ["default_triage_instructions"]}, {"cell_type": "code", "execution_count": null, "id": "ca9ab99d-bc21-4cf7-a58a-261e82920566", "metadata": {}, "outputs": [], "source": ["default_cal_preferences"]}, {"cell_type": "code", "execution_count": null, "id": "f6cd98f1-15a7-4fbb-8cce-cbbb0503d22b", "metadata": {}, "outputs": [], "source": ["default_response_preferences"]}, {"cell_type": "code", "execution_count": null, "id": "d195aa00", "metadata": {}, "outputs": [], "source": ["def get_memory(store, namespace, default_content=None):\n", " \"\"\"Get memory from the store or initialize with default if it doesn't exist.\n", " \n", " Args:\n", " store: LangGraph BaseStore instance to search for existing memory\n", " namespace: Tuple defining the memory namespace, e.g. (\"email_assistant\", \"triage_preferences\")\n", " default_content: Default content to use if memory doesn't exist\n", " \n", " Returns:\n", " str: The content of the memory profile, either from existing memory or the default\n", " \"\"\"\n", " # Search for existing memory with namespace and key\n", " user_preferences = store.get(namespace, \"user_preferences\")\n", " \n", " # If memory exists, return its content (the value)\n", " if user_preferences:\n", " return user_preferences.value\n", " \n", " # If memory doesn't exist, add it to the store and return the default content\n", " else:\n", " # Namespace, key, value\n", " store.put(namespace, \"user_preferences\", default_content)\n", " user_preferences = default_content\n", " \n", " # Return the default content\n", " return user_preferences "]}, {"cell_type": "markdown", "id": "cc5181e6", "metadata": {}, "source": ["For 2) updating memory, we can use a few tricks from the [GPT-4.1 prompting guide]((https://cookbook.openai.com/examples/gpt4-1_prompting_guide)) to help us update the memory: \n", "\n", "* For optimal performance, repeat the key instructions at the start and end of the prompt\n", "* Create clear, explicit instructions \n", "* Use XML delimiters for structure\n", "* Provide examples "]}, {"cell_type": "code", "execution_count": null, "id": "1f8aa70e", "metadata": {}, "outputs": [], "source": ["MEMORY_UPDATE_INSTRUCTIONS = \"\"\"\n", "# Role and Objective\n", "You are a memory profile manager for an email assistant agent that selectively updates user preferences based on feedback messages from human-in-the-loop interactions with the email assistant.\n", "\n", "# Instructions\n", "- NEVER overwrite the entire memory profile\n", "- ONLY make targeted additions of new information\n", "- ONLY update specific facts that are directly contradicted by feedback messages\n", "- PRESERVE all other existing information in the profile\n", "- Format the profile consistently with the original style\n", "- Generate the profile as a string\n", "\n", "# Reasoning Steps\n", "1. Analyze the current memory profile structure and content\n", "2. Review feedback messages from human-in-the-loop interactions\n", "3. Extract relevant user preferences from these feedback messages (such as edits to emails/calendar invites, explicit feedback on assistant performance, user decisions to ignore certain emails)\n", "4. Compare new information against existing profile\n", "5. Identify only specific facts to add or update\n", "6. Preserve all other existing information\n", "7. Output the complete updated profile\n", "\n", "# Example\n", "\n", "RESPOND:\n", "- wife\n", "- specific questions\n", "- system admin notifications\n", "NOTIFY: \n", "- meeting invites\n", "IGNORE:\n", "- marketing emails\n", "- company-wide announcements\n", "- messages meant for other teams\n", "\n", "\n", "\n", "\"The assistant shouldn't have responded to that system admin notification.\"\n", "\n", "\n", "\n", "RESPOND:\n", "- wife\n", "- specific questions\n", "NOTIFY: \n", "- meeting invites\n", "- system admin notifications\n", "IGNORE:\n", "- marketing emails\n", "- company-wide announcements\n", "- messages meant for other teams\n", "\n", "\n", "# Process current profile for {namespace}\n", "\n", "{current_profile}\n", "\n", "\n", "Think step by step about what specific feedback is being provided and what specific information should be added or updated in the profile while preserving everything else.\"\"\"\n", "\n", "MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT = \"\"\"\n", "Remember:\n", "- NEVER overwrite the entire profile\n", "- ONLY make targeted additions or changes based on explicit feedback\n", "- PRESERVE all existing information not directly contradicted\n", "- Output the complete updated profile as a string\n", "\"\"\"\n", "\n", "class UserPreferences(BaseModel):\n", " \"\"\"User preferences.\"\"\"\n", " preferences: str\n", " justification: str\n", "\n", "def update_memory(store, namespace, messages):\n", " \"\"\"Update memory profile in the store.\n", " \n", " Args:\n", " store: LangGraph BaseStore instance to update memory\n", " namespace: Tuple defining the memory namespace, e.g. (\"email_assistant\", \"triage_preferences\")\n", " messages: List of messages to update the memory with\n", " \"\"\"\n", "\n", " # Get the existing memory\n", " user_preferences = store.get(namespace, \"user_preferences\")\n", "\n", " # Update the memory\n", " llm = init_chat_model(\"openai:gpt-4.1\", temperature=0.0).with_structured_output(UserPreferences)\n", " result = llm.invoke(\n", " [\n", " {\"role\": \"system\", \"content\": MEMORY_UPDATE_INSTRUCTIONS.format(current_profile=user_preferences.value, namespace=namespace)},\n", " {\"role\": \"user\", \"content\": f\"Think carefully and update the memory profile based upon these user messages:\"}\n", " ] + messages\n", " )\n", " \n", " # Save the updated memory to the store\n", " store.put(namespace, \"user_preferences\", result.preferences)"]}, {"cell_type": "markdown", "id": "8af20960", "metadata": {}, "source": ["We set up the triage router as we had before, with one small change"]}, {"cell_type": "code", "execution_count": null, "id": "b1a789ab", "metadata": {}, "outputs": [], "source": ["def triage_router(state: State, store: BaseStore) -> Command[Literal[\"triage_interrupt_handler\", \"response_agent\", \"__end__\"]]:\n", " \"\"\"Analyze email content to decide if we should respond, notify, or ignore.\n", "\n", " The triage step prevents the assistant from wasting time on:\n", " - Marketing emails and spam\n", " - Company-wide announcements\n", " - Messages meant for other teams\n", " \"\"\"\n", " # Parse the email input\n", " author, to, subject, email_thread = parse_email(state[\"email_input\"])\n", " user_prompt = triage_user_prompt.format(\n", " author=author, to=to, subject=subject, email_thread=email_thread\n", " )\n", "\n", " # Create email markdown for Agent Inbox in case of notification \n", " email_markdown = format_email_markdown(subject, author, to, email_thread)\n", "\n", " # Search for existing triage_preferences memory\n", " triage_instructions = get_memory(store, (\"email_assistant\", \"triage_preferences\"), default_triage_instructions)\n", "\n", " # Format system prompt with background and triage instructions\n", " system_prompt = triage_system_prompt.format(\n", " background=default_background,\n", " triage_instructions=triage_instructions,\n", " )\n", "\n", " # Run the router LLM\n", " result = llm_router.invoke(\n", " [\n", " {\"role\": \"system\", \"content\": system_prompt},\n", " {\"role\": \"user\", \"content\": user_prompt},\n", " ]\n", " )\n", "\n", " # Decision\n", " classification = result.classification\n", "\n", " # Process the classification decision\n", " if classification == \"respond\":\n", " print(\"\ud83d\udce7 Classification: RESPOND - This email requires a response\")\n", " # Next node\n", " goto = \"response_agent\"\n", " # Update the state\n", " update = {\n", " \"classification_decision\": result.classification,\n", " \"messages\": [{\"role\": \"user\",\n", " \"content\": f\"Respond to the email: {email_markdown}\"\n", " }],\n", " }\n", " \n", " elif classification == \"ignore\":\n", " print(\"\ud83d\udeab Classification: IGNORE - This email can be safely ignored\")\n", "\n", " # Next node\n", " goto = END\n", " # Update the state\n", " update = {\n", " \"classification_decision\": classification,\n", " }\n", "\n", " elif classification == \"notify\":\n", " print(\"\ud83d\udd14 Classification: NOTIFY - This email contains important information\") \n", "\n", " # Next node\n", " goto = \"triage_interrupt_handler\"\n", " # Update the state\n", " update = {\n", " \"classification_decision\": classification,\n", " }\n", "\n", " else:\n", " raise ValueError(f\"Invalid classification: {classification}\")\n", " \n", " return Command(goto=goto, update=update)\n"]}, {"cell_type": "markdown", "id": "a6be4d63", "metadata": {}, "source": ["We only need to make a small change to the interrupt handler to update the memory when the user provides feedback. "]}, {"cell_type": "code", "execution_count": null, "id": "f76ef46d", "metadata": {}, "outputs": [], "source": ["def triage_interrupt_handler(state: State, store: BaseStore) -> Command[Literal[\"response_agent\", \"__end__\"]]:\n", " \"\"\"Handles interrupts from the triage step\"\"\"\n", " \n", " # Parse the email input\n", " author, to, subject, email_thread = parse_email(state[\"email_input\"])\n", "\n", " # Create email markdown for Agent Inbox in case of notification \n", " email_markdown = format_email_markdown(subject, author, to, email_thread)\n", "\n", " # Create messages\n", " messages = [{\"role\": \"user\",\n", " \"content\": f\"Email to notify user about: {email_markdown}\"\n", " }]\n", "\n", " # Create interrupt for Agent Inbox\n", " request = {\n", " \"action_request\": {\n", " \"action\": f\"Email Assistant: {state['classification_decision']}\",\n", " \"args\": {}\n", " },\n", " \"config\": {\n", " \"allow_ignore\": True, \n", " \"allow_respond\": True,\n", " \"allow_edit\": False, \n", " \"allow_accept\": False, \n", " },\n", " # Email to show in Agent Inbox\n", " \"description\": email_markdown,\n", " }\n", "\n", " # Send to Agent Inbox and wait for response\n", " response = interrupt([request])[0]\n", "\n", " # If user provides feedback, go to response agent and use feedback to respond to email \n", " if response[\"type\"] == \"response\":\n", " # Add feedback to messages \n", " user_input = response[\"args\"]\n", " messages.append({\"role\": \"user\",\n", " \"content\": f\"User wants to reply to the email. Use this feedback to respond: {user_input}\"\n", " })\n", " # This is new: update triage_preferences with feedback\n", " update_memory(store, (\"email_assistant\", \"triage_preferences\"), [{\n", " \"role\": \"user\",\n", " \"content\": f\"The user decided to respond to the email, so update the triage preferences to capture this.\"\n", " }] + messages)\n", "\n", " goto = \"response_agent\"\n", "\n", " # If user ignores email, go to END\n", " elif response[\"type\"] == \"ignore\":\n", " # Make note of the user's decision to ignore the email\n", " messages.append({\"role\": \"user\",\n", " \"content\": f\"The user decided to ignore the email even though it was classified as notify. Update triage preferences to capture this.\"\n", " })\n", " # This is new: triage_preferences with feedback\n", " update_memory(store, (\"email_assistant\", \"triage_preferences\"), messages)\n", " goto = END\n", "\n", " # Catch all other responses\n", " else:\n", " raise ValueError(f\"Invalid response: {response}\")\n", "\n", " # Update the state \n", " update = {\n", " \"messages\": messages,\n", " }\n", "\n", " return Command(goto=goto, update=update)"]}, {"cell_type": "markdown", "id": "9cd428f5", "metadata": {}, "source": ["### Incorporating Memory into LLM Responses\n", "\n", "Now that we have memory managers set up, we can use the stored preferences when generating responses"]}, {"cell_type": "code", "execution_count": null, "id": "a82b17a6", "metadata": {}, "outputs": [], "source": ["def llm_call(state: State, store: BaseStore):\n", " \"\"\"LLM decides whether to call a tool or not\"\"\"\n", "\n", " # Search for existing cal_preferences memory\n", " cal_preferences = get_memory(store, (\"email_assistant\", \"cal_preferences\"), default_cal_preferences)\n", " \n", " # Search for existing response_preferences memory\n", " response_preferences = get_memory(store, (\"email_assistant\", \"response_preferences\"), default_response_preferences)\n", "\n", " return {\n", " \"messages\": [\n", " llm_with_tools.invoke(\n", " [\n", " {\"role\": \"system\", \"content\": agent_system_prompt_hitl_memory.format(tools_prompt=HITL_MEMORY_TOOLS_PROMPT,\n", " background=default_background,\n", " response_preferences=response_preferences, \n", " cal_preferences=cal_preferences)}\n", " ]\n", " + state[\"messages\"]\n", " )\n", " ]\n", " }"]}, {"cell_type": "markdown", "id": "e60aff5d", "metadata": {}, "source": ["### Memory Integration in the Interrupt Handler\n", "\n", "Similarly, we'll add memory to the interrupt handler! "]}, {"cell_type": "code", "execution_count": null, "id": "126d3680", "metadata": {}, "outputs": [], "source": ["def interrupt_handler(state: State, store: BaseStore) -> Command[Literal[\"llm_call\", \"__end__\"]]:\n", " \"\"\"Creates an interrupt for human review of tool calls\"\"\"\n", " \n", " # Store messages\n", " result = []\n", "\n", " # Go to the LLM call node next\n", " goto = \"llm_call\"\n", "\n", " # Iterate over the tool calls in the last message\n", " for tool_call in state[\"messages\"][-1].tool_calls:\n", " \n", " # Allowed tools for HITL\n", " hitl_tools = [\"write_email\", \"schedule_meeting\", \"Question\"]\n", " \n", " # If tool is not in our HITL list, execute it directly without interruption\n", " if tool_call[\"name\"] not in hitl_tools:\n", "\n", " # Execute search_memory and other tools without interruption\n", " tool = tools_by_name[tool_call[\"name\"]]\n", " observation = tool.invoke(tool_call[\"args\"])\n", " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": tool_call[\"id\"]})\n", " continue\n", " \n", " # Get original email from email_input in state\n", " email_input = state[\"email_input\"]\n", " author, to, subject, email_thread = parse_email(email_input)\n", " original_email_markdown = format_email_markdown(subject, author, to, email_thread)\n", " \n", " # Format tool call for display and prepend the original email\n", " tool_display = format_for_display(state, tool_call)\n", " description = original_email_markdown + tool_display\n", "\n", " # Configure what actions are allowed in Agent Inbox\n", " if tool_call[\"name\"] == \"write_email\":\n", " config = {\n", " \"allow_ignore\": True,\n", " \"allow_respond\": True,\n", " \"allow_edit\": True,\n", " \"allow_accept\": True,\n", " }\n", " elif tool_call[\"name\"] == \"schedule_meeting\":\n", " config = {\n", " \"allow_ignore\": True,\n", " \"allow_respond\": True,\n", " \"allow_edit\": True,\n", " \"allow_accept\": True,\n", " }\n", " elif tool_call[\"name\"] == \"Question\":\n", " config = {\n", " \"allow_ignore\": True,\n", " \"allow_respond\": True,\n", " \"allow_edit\": False,\n", " \"allow_accept\": False,\n", " }\n", " else:\n", " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", "\n", " # Create the interrupt request\n", " request = {\n", " \"action_request\": {\n", " \"action\": tool_call[\"name\"],\n", " \"args\": tool_call[\"args\"]\n", " },\n", " \"config\": config,\n", " \"description\": description,\n", " }\n", "\n", " # Send to Agent Inbox and wait for response\n", " response = interrupt([request])[0]\n", "\n", " # Handle the responses \n", " if response[\"type\"] == \"accept\":\n", "\n", " # Execute the tool with original args\n", " tool = tools_by_name[tool_call[\"name\"]]\n", " observation = tool.invoke(tool_call[\"args\"])\n", " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": tool_call[\"id\"]})\n", " \n", " elif response[\"type\"] == \"edit\":\n", "\n", " # Tool selection \n", " tool = tools_by_name[tool_call[\"name\"]]\n", " initial_tool_call = tool_call[\"args\"]\n", " \n", " # Get edited args from Agent Inbox\n", " edited_args = response[\"args\"][\"args\"]\n", "\n", " # Update the AI message's tool call with edited content (reference to the message in the state)\n", " ai_message = state[\"messages\"][-1] # Get the most recent message from the state\n", " current_id = tool_call[\"id\"] # Store the ID of the tool call being edited\n", " \n", " # Create a new list of tool calls by filtering out the one being edited and adding the updated version\n", " # This avoids modifying the original list directly (immutable approach)\n", " updated_tool_calls = [tc for tc in ai_message.tool_calls if tc[\"id\"] != current_id] + [\n", " {\"type\": \"tool_call\", \"name\": tool_call[\"name\"], \"args\": edited_args, \"id\": current_id}\n", " ]\n", "\n", " # Create a new copy of the message with updated tool calls rather than modifying the original\n", " # This ensures state immutability and prevents side effects in other parts of the code\n", " result.append(ai_message.model_copy(update={\"tool_calls\": updated_tool_calls}))\n", "\n", " # Save feedback in memory and update the write_email tool call with the edited content from Agent Inbox\n", " if tool_call[\"name\"] == \"write_email\":\n", " \n", " # Execute the tool with edited args\n", " observation = tool.invoke(edited_args)\n", " \n", " # Add only the tool response message\n", " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": current_id})\n", "\n", " # This is new: update the memory\n", " update_memory(store, (\"email_assistant\", \"response_preferences\"), [{\n", " \"role\": \"user\",\n", " \"content\": f\"User edited the email response. Here is the initial email generated by the assistant: {initial_tool_call}. Here is the edited email: {edited_args}. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", " }])\n", " \n", " # Save feedback in memory and update the schedule_meeting tool call with the edited content from Agent Inbox\n", " elif tool_call[\"name\"] == \"schedule_meeting\":\n", " \n", " # Execute the tool with edited args\n", " observation = tool.invoke(edited_args)\n", " \n", " # Add only the tool response message\n", " result.append({\"role\": \"tool\", \"content\": observation, \"tool_call_id\": current_id})\n", "\n", " # This is new: update the memory\n", " update_memory(store, (\"email_assistant\", \"cal_preferences\"), [{\n", " \"role\": \"user\",\n", " \"content\": f\"User edited the calendar invitation. Here is the initial calendar invitation generated by the assistant: {initial_tool_call}. Here is the edited calendar invitation: {edited_args}. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", " }])\n", " \n", " # Catch all other tool calls\n", " else:\n", " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", "\n", " elif response[\"type\"] == \"ignore\":\n", "\n", " if tool_call[\"name\"] == \"write_email\":\n", " # Don't execute the tool, and tell the agent how to proceed\n", " result.append({\"role\": \"tool\", \"content\": \"User ignored this email draft. Ignore this email and end the workflow.\", \"tool_call_id\": tool_call[\"id\"]})\n", " # Go to END\n", " goto = END\n", " # This is new: update the memory\n", " update_memory(store, (\"email_assistant\", \"triage_preferences\"), state[\"messages\"] + result + [{\n", " \"role\": \"user\",\n", " \"content\": f\"The user ignored the email draft. That means they did not want to respond to the email. Update the triage preferences to ensure emails of this type are not classified as respond. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", " }])\n", "\n", " elif tool_call[\"name\"] == \"schedule_meeting\":\n", " # Don't execute the tool, and tell the agent how to proceed\n", " result.append({\"role\": \"tool\", \"content\": \"User ignored this calendar meeting draft. Ignore this email and end the workflow.\", \"tool_call_id\": tool_call[\"id\"]})\n", " # Go to END\n", " goto = END\n", " # This is new: update the memory\n", " update_memory(store, (\"email_assistant\", \"triage_preferences\"), state[\"messages\"] + result + [{\n", " \"role\": \"user\",\n", " \"content\": f\"The user ignored the calendar meeting draft. That means they did not want to schedule a meeting for this email. Update the triage preferences to ensure emails of this type are not classified as respond. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", " }])\n", "\n", " elif tool_call[\"name\"] == \"Question\":\n", " # Don't execute the tool, and tell the agent how to proceed\n", " result.append({\"role\": \"tool\", \"content\": \"User ignored this question. Ignore this email and end the workflow.\", \"tool_call_id\": tool_call[\"id\"]})\n", " # Go to END\n", " goto = END\n", " # This is new: update the memory\n", " update_memory(store, (\"email_assistant\", \"triage_preferences\"), state[\"messages\"] + result + [{\n", " \"role\": \"user\",\n", " \"content\": f\"The user ignored the Question. That means they did not want to answer the question or deal with this email. Update the triage preferences to ensure emails of this type are not classified as respond. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", " }])\n", "\n", " else:\n", " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", "\n", " elif response[\"type\"] == \"response\":\n", " # User provided feedback\n", " user_feedback = response[\"args\"]\n", " if tool_call[\"name\"] == \"write_email\":\n", " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", " result.append({\"role\": \"tool\", \"content\": f\"User gave feedback, which can we incorporate into the email. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", " # This is new: update the memory\n", " update_memory(store, (\"email_assistant\", \"response_preferences\"), state[\"messages\"] + result + [{\n", " \"role\": \"user\",\n", " \"content\": f\"User gave feedback, which we can use to update the response preferences. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", " }])\n", "\n", " elif tool_call[\"name\"] == \"schedule_meeting\":\n", " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", " result.append({\"role\": \"tool\", \"content\": f\"User gave feedback, which can we incorporate into the meeting request. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", " # This is new: update the memory\n", " update_memory(store, (\"email_assistant\", \"cal_preferences\"), state[\"messages\"] + result + [{\n", " \"role\": \"user\",\n", " \"content\": f\"User gave feedback, which we can use to update the calendar preferences. Follow all instructions above, and remember: {MEMORY_UPDATE_INSTRUCTIONS_REINFORCEMENT}.\"\n", " }])\n", "\n", " elif tool_call[\"name\"] == \"Question\":\n", " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", " result.append({\"role\": \"tool\", \"content\": f\"User answered the question, which can we can use for any follow up actions. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", "\n", " else:\n", " raise ValueError(f\"Invalid tool call: {tool_call['name']}\")\n", "\n", " # Update the state \n", " update = {\n", " \"messages\": result,\n", " }\n", "\n", " return Command(goto=goto, update=update)"]}, {"cell_type": "markdown", "id": "ecedcaec", "metadata": {}, "source": ["The rest is the same as before!"]}, {"cell_type": "code", "execution_count": null, "id": "7041f50d", "metadata": {}, "outputs": [], "source": ["from email_assistant.utils import show_graph\n", "\n", "# Conditional edge function\n", "def should_continue(state: State, store: BaseStore) -> Literal[\"interrupt_handler\", END]:\n", " \"\"\"Route to tool handler, or end if Done tool called\"\"\"\n", " messages = state[\"messages\"]\n", " last_message = messages[-1]\n", " if last_message.tool_calls:\n", " for tool_call in last_message.tool_calls: \n", " if tool_call[\"name\"] == \"Done\":\n", " return END\n", " else:\n", " return \"interrupt_handler\"\n", "\n", "# Build workflow\n", "agent_builder = StateGraph(State)\n", "\n", "# Add nodes - with store parameter\n", "agent_builder.add_node(\"llm_call\", llm_call)\n", "agent_builder.add_node(\"interrupt_handler\", interrupt_handler)\n", "\n", "# Add edges\n", "agent_builder.add_edge(START, \"llm_call\")\n", "agent_builder.add_conditional_edges(\n", " \"llm_call\",\n", " should_continue,\n", " {\n", " \"interrupt_handler\": \"interrupt_handler\",\n", " END: END,\n", " },\n", ")\n", "\n", "# Compile the agent\n", "response_agent = agent_builder.compile()\n", "\n", "# Build overall workflow with store and checkpointer\n", "overall_workflow = (\n", " StateGraph(State, input=StateInput)\n", " .add_node(triage_router)\n", " .add_node(triage_interrupt_handler)\n", " .add_node(\"response_agent\", response_agent)\n", " .add_edge(START, \"triage_router\")\n", ")\n", "\n", "email_assistant = overall_workflow.compile()\n", "show_graph(email_assistant)"]}, {"cell_type": "markdown", "id": "43747219", "metadata": {}, "source": ["## Testing the agent with memory\n", "\n", "Now that we've implemented memory into our email assistant, let's test how the system learns from user feedback and adapts over time. This testing section explores how different types of user interactions create distinct memory updates that improve the assistant's future performance.\n", "\n", "The key questions we're answering through these tests:\n", "1. How does the system capture and store user preferences?\n", "2. How do these stored preferences affect future decisions?\n", "3. What patterns of interaction lead to which types of memory updates?\n", "\n", "First, let's build a helper function to display memory content so we can track how it evolves throughout our tests:"]}, {"cell_type": "code", "execution_count": null, "id": "59079929", "metadata": {}, "outputs": [], "source": ["import uuid \n", "from langgraph.checkpoint.memory import MemorySaver\n", "from langgraph.types import Command\n", "from langgraph.store.memory import InMemoryStore\n", "\n", "# Helper function to display memory content\n", "def display_memory_content(store, namespace=None):\n", " # Display current memory content for all namespaces\n", " print(\"\\n======= CURRENT MEMORY CONTENT =======\")\n", " if namespace:\n", " memory = store.get(namespace, \"user_preferences\")\n", " if memory:\n", " print(f\"\\n--- {namespace[1]} ---\")\n", " print({\"preferences\": memory.value})\n", " else:\n", " print(f\"\\n--- {namespace[1]} ---\")\n", " print(\"No memory found\")\n", " else:\n", " for namespace in [\n", " (\"email_assistant\", \"triage_preferences\"),\n", " (\"email_assistant\", \"response_preferences\"),\n", " (\"email_assistant\", \"cal_preferences\"),\n", " (\"email_assistant\", \"background\")\n", " ]:\n", " memory = store.get(namespace, \"user_preferences\")\n", " if memory:\n", " print(f\"\\n--- {namespace[1]} ---\")\n", " print({\"preferences\": memory.value})\n", " else:\n", " print(f\"\\n--- {namespace[1]} ---\")\n", " print(\"No memory found\")\n", " print(\"=======================================\\n\")"]}, {"cell_type": "markdown", "id": "397114bf", "metadata": {}, "source": ["### Accept `write_email` and `schedule_meeting`\n", "\n", "Our first test examines what happens when a user accepts the agent's actions without modification. This baseline case helps us understand how the system behaves when no feedback is provided:\n", "\n", "1. We'll use the same tax planning email from our previous tests\n", "2. The system will classify it as \"RESPOND\" and propose scheduling a meeting\n", "3. We'll accept the meeting schedule without changes\n", "4. The agent will generate an email confirming the meeting\n", "5. We'll accept the email without changes\n", "\n", "This test demonstrates the default behavior of our memory-enabled system. When a user simply accepts proposed actions, we expect minimal or no memory updates since there's no explicit feedback to learn from. However, the system will still leverage existing memory (if any) when generating its responses."]}, {"cell_type": "code", "execution_count": null, "id": "649cee4f", "metadata": {}, "outputs": [], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "store = InMemoryStore()\n", "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", "thread_id_1 = uuid.uuid4()\n", "thread_config_1 = {\"configurable\": {\"thread_id\": thread_id_1}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_1):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after first interrupt\n", "display_memory_content(store)"]}, {"cell_type": "markdown", "id": "878e199e", "metadata": {}, "source": ["Accept the `schedule_meeting` tool call\n", "\n", "As we examine the initial `schedule_meeting` proposal, note how the system uses existing memory to inform its decisions:\n", "\n", "1. The default calendar preferences show a preference for 30-minute meetings, though the email requests 45 minutes\n", "2. The agent still proposes a 45-minute meeting, respecting the sender's specific request\n", "3. We accept this proposal without modification to see if simple acceptance triggers any memory updates\n", "\n", "After running this step, we'll check the memory contents to confirm whether acceptance alone leads to memory updates. Simple acceptance represents the baseline user experience - the system works as intended without requiring adjustments."]}, {"cell_type": "code", "execution_count": null, "id": "9589423b", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "e6b80f99", "metadata": {}, "source": ["Accept the `write_email` tool call\n", "\n", "Now we'll accept the email draft that confirms the meeting scheduling:\n", "\n", "1. The email draft is generated with knowledge of our calendar preferences\n", "2. It includes details about the meeting time, duration, and purpose\n", "3. We'll accept it without changes to complete the baseline test case\n", "\n", "After accepting, we'll check all memory stores to see if any updates occurred. As expected, simply accepting the agent's proposals doesn't provide strong learning signals - there's no clear feedback about what the user likes or dislikes about the agent's approach.\n", "\n", "The trace link shows the complete workflow execution, where we can see that the memory is used in the LLM call for response generation, but no memory updates occur, which is the expected behavior for simple acceptances."]}, {"cell_type": "code", "execution_count": null, "id": "12035cf6", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after accepting the write_email tool call\n", "display_memory_content(store)"]}, {"cell_type": "markdown", "id": "fcbc178d", "metadata": {}, "source": ["We can look at the full messages, and the trace: \n", "\n", "https://smith.langchain.com/public/86ff6474-29fe-452e-8829-b05a91b458eb/r\n", "\n", "You'll notice that memory is used in the LLM call to respond. \n", "\n", "But the memory store is *not* updated, because we haven't added any feedback via HITL."]}, {"cell_type": "code", "execution_count": null, "id": "10ce8197", "metadata": {}, "outputs": [], "source": ["state = graph.get_state(thread_config_1)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "58201a21", "metadata": {}, "source": ["### Edit `write_email` and `schedule_meeting`\n", "\n", "This test explores how the system learns from direct edits to its proposed actions. When users modify the agent's suggestions, it creates clear, specific learning signals about their preferences:\n", "\n", "1. We'll use the same tax planning email as before\n", "2. When the agent proposes a 45-minute meeting, we'll edit it to:\n", " - Change the duration to 30 minutes (matching our stored preference)\n", " - Make the subject line more concise\n", "3. When the agent drafts an email, we'll edit it to be:\n", " - Shorter and less formal\n", " - Structured differently\n", "\n", "Edits provide the most explicit feedback about user preferences, letting the system learn exactly what changes are desired. We expect to see specific, targeted updates to our memory stores that reflect these edits."]}, {"cell_type": "code", "execution_count": null, "id": "ac260423", "metadata": {}, "outputs": [], "source": ["# Same email as before\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph with new thread\n", "checkpointer = MemorySaver()\n", "store = InMemoryStore()\n", "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", "thread_id_2 = uuid.uuid4()\n", "thread_config_2 = {\"configurable\": {\"thread_id\": thread_id_2}}\n", "\n", "# Run the graph until the first interrupt - will be classified as \"respond\" and the agent will create a write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_2):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after first interrupt\n", "display_memory_content(store,(\"email_assistant\", \"cal_preferences\"))"]}, {"cell_type": "markdown", "id": "5d73ba71", "metadata": {}, "source": ["Edit the `schedule_meeting` tool call\n", "\n", "When we edit the meeting proposal, we're providing direct, explicit feedback about our preferences. This creates a significant learning opportunity for the system:\n", "\n", "1. The agent initially proposes a 45-minute meeting (the duration requested in the email)\n", "2. We edit it to 30 minutes and simplify the subject from \"Tax Planning Strategies Discussion\" to \"Tax Planning Discussion\"\n", "3. This creates clear, specific feedback about our time preferences and naming conventions\n", "\n", "After the edit, we'll check the calendar preferences memory store to see how it's updated. The memory update should capture both:\n", "- Our preference for shorter 30-minute meetings\n", "- Our preference for more concise meeting subjects\n", "\n", "The trace reveals the precise memory update logic, showing how the system analyzes the difference between its proposal and our edits to extract meaningful patterns and preferences. We can see the detailed justification for each memory update, ensuring transparency in the learning process."]}, {"cell_type": "code", "execution_count": null, "id": "af760977", "metadata": {}, "outputs": [], "source": ["# Now simulate user editing the schedule_meeting tool call\n", "print(\"\\nSimulating user editing the schedule_meeting tool call...\")\n", "edited_schedule_args = {\n", " \"attendees\": [\"pm@client.com\", \"lance@company.com\"],\n", " \"subject\": \"Tax Planning Discussion\",\n", " \"duration_minutes\": 30, # Changed from 45 to 30\n", " \"preferred_day\": \"2025-04-22\",\n", " \"start_time\": 14 \n", "}\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_schedule_args}}]), config=thread_config_2):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after editing schedule_meeting\n", "print(\"\\nChecking memory after editing schedule_meeting:\")\n", "display_memory_content(store,(\"email_assistant\", \"cal_preferences\"))"]}, {"cell_type": "markdown", "id": "bbbb324f", "metadata": {"vscode": {"languageId": "raw"}}, "source": ["```\n", "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", "```\n", "\n", "```\n", "{'preferences': \"30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n\\nUser prefers 30 minute meetings over longer durations such as 45 minutes. When scheduling, default to 30 minutes unless otherwise specified. Subject lines should be concise (e.g., 'Tax Planning Discussion' instead of 'Tax Planning Strategies Discussion').\"}\n", "```"]}, {"cell_type": "markdown", "id": "0dfc585a", "metadata": {}, "source": ["Looking at the memory after editing the calendar invitation, we can see that it's been updated:\n", "\n", "1. The system has identified that we prefer 30-minute meetings over longer durations\n", "2. It's also captured our preference for concise meeting subjects\n", "\n", "What's particularly impressive about this memory update is:\n", "- It doesn't just record our specific edit, but generalizes to a broader preference pattern\n", "- It preserves all existing memory content while adding the new information\n", "- It extracts multiple preference signals from a single edit interaction\n", "\n", "Now, let's edit the email draft to see how the system captures different types of communication preferences:"]}, {"cell_type": "code", "execution_count": null, "id": "81a1fa37", "metadata": {}, "outputs": [], "source": ["display_memory_content(store,(\"email_assistant\", \"response_preferences\"))\n", "# Now simulate user editing the write_email tool call\n", "print(\"\\nSimulating user editing the write_email tool call...\")\n", "edited_email_args = {\n", " \"to\": \"pm@client.com\",\n", " \"subject\": \"Re: Tax season let's schedule call\",\n", " \"content\": \"Thanks! I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\\n\\nBest regards,\\nLance Martin\"\n", "}\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_email_args}}]), config=thread_config_2):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after editing write_email\n", "print(\"\\nChecking memory after editing write_email:\")\n", "display_memory_content(store,(\"email_assistant\", \"response_preferences\"))"]}, {"cell_type": "markdown", "id": "3ffbd5f9", "metadata": {}, "source": ["Our email edit reveals even more sophisticated learning capabilities:\n", "\n", "1. We've dramatically shortened and simplified the email content\n", "2. We've changed the tone to be more casual\n", "3. We've added a question asking for confirmation rather than assuming the time works\n", "4. We've slightly altered the meeting details (day and time)\n", "\n", "Looking at the updated memory, we can see that the system has extracted a key insight about our communication style:\n", "\n", "```\n", "When scheduling a meeting, ask the recipient to confirm if the proposed time works for them, rather than assuming and stating the meeting is already scheduled.\n", "```\n", "\n", "This demonstrates the system's ability to:\n", "- Analyze our edit not just at a superficial level, but to understand intent\n", "- Extract generalizable principles from specific examples\n", "- Preserve all existing guidance while adding new insights\n", "- Maintain the organization and structure of the memory\n", "\n", "These targeted, high-quality memory updates will improve all future interactions without requiring repeated corrections."]}, {"cell_type": "code", "execution_count": null, "id": "8ad818d6", "metadata": {}, "outputs": [], "source": ["state = graph.get_state(thread_config_2)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "6d92a42b", "metadata": {}, "source": ["### Respond (with feedback) `write_email`, `schedule_meeting`, and `question`\n", "\n", "Our final test set explores the \"response\" feedback pattern - providing guidance without directly editing or accepting. This conversational feedback mechanism offers a middle ground between acceptance and editing:\n", "\n", "1. First, we'll test feedback for meeting scheduling by requesting:\n", " - Shorter duration (30 minutes instead of 45)\n", " - Afternoon meeting times (after 2pm)\n", " \n", "2. Next, we'll test feedback for email drafting by requesting:\n", " - Shorter, less formal language\n", " - A specific closing statement about looking forward to the meeting\n", " \n", "3. Finally, we'll test feedback for questions by providing:\n", " - A direct answer with additional context\n", " - Specific preferences (brunch location, time)\n", "\n", "This natural language feedback approach lets users guide the assistant without having to do the work themselves. We expect to see detailed memory updates that extract the general principles from our specific feedback."]}, {"cell_type": "code", "execution_count": null, "id": "07676231", "metadata": {}, "outputs": [], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "store = InMemoryStore()\n", "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", "thread_id_5 = uuid.uuid4()\n", "thread_config_5 = {\"configurable\": {\"thread_id\": thread_id_5}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after first interrupt \n", "display_memory_content(store, (\"email_assistant\", \"cal_preferences\"))"]}, {"cell_type": "markdown", "id": "b85fc45d", "metadata": {}, "source": ["Provide feedback for the `schedule_meeting` tool call\n", "\n", "Instead of directly editing the meeting proposal or simply accepting it, we'll provide natural language feedback:\n", "\n", "1. We request a 30-minute meeting instead of 45 minutes\n", "2. We express a preference for afternoon meetings after 2pm\n", "3. The system must interpret this feedback and generate a new proposal\n", "\n", "This conversational approach is often more natural and efficient than direct editing, especially for mobile users or those who prefer to give high-level direction rather than detailed edits.\n", "\n", "After providing feedback, we'll examine the calendar preferences memory to see how this natural language guidance is captured. We expect to see the system extract both the meeting duration and time-of-day preferences as general principles."]}, {"cell_type": "code", "execution_count": null, "id": "30a151f1", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Please schedule this for 30 minutes instead of 45 minutes, and I prefer afternoon meetings after 2pm.\"}]), config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after providing feedback for schedule_meeting\n", "print(\"\\nChecking memory after providing feedback for schedule_meeting:\")\n", "display_memory_content(store, (\"email_assistant\", \"cal_preferences\"))"]}, {"cell_type": "markdown", "id": "8088757c", "metadata": {}, "source": ["Our memory check after providing feedback shows an elegantly simple calendar preference update:\n", "\n", "```\n", "30 minute meetings are preferred, but 15 minute meetings are also acceptable.\n", "Afternoon meetings after 2pm are preferred.\n", "```\n", "\n", "The system has:\n", "1. Captured both aspects of our feedback (duration and time of day)\n", "2. Preserved the existing preference about 15-minute meetings\n", "3. Added our preference for afternoon meetings after 2pm as a new line\n", "4. Kept the format clean and readable\n", "\n", "This natural language feedback mechanism creates the same quality of memory updates as direct editing but requires less effort from the user. The system is able to extract structured preferences from unstructured feedback, showing its ability to learn from conversational interactions.\n", "\n", "Let's accept this revised meeting proposal and move to the email draft:"]}, {"cell_type": "code", "execution_count": null, "id": "545063be", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after accepting schedule_meeting after feedback\n", "print(\"\\nChecking memory after accepting schedule_meeting after feedback:\")\n", "display_memory_content(store, (\"email_assistant\", \"response_preferences\"))"]}, {"cell_type": "markdown", "id": "e72ede94", "metadata": {}, "source": ["Now provide feedback for the `write_email` tool call\n", "\n", "Similar to our meeting feedback, we'll now provide natural language guidance for the email draft:\n", "\n", "1. We request \"shorter and less formal\" language - a style preference\n", "2. We ask for a specific closing statement about looking forward to the meeting\n", "3. The system must interpret this guidance and rewrite the email accordingly\n", "\n", "After providing this feedback, we'll check the response preferences memory to see how these style and structure preferences are captured. We expect to see generalizable guidelines about email brevity, formality, and closing statements added to our preference profile."]}, {"cell_type": "code", "execution_count": null, "id": "9831ad2d", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Shorter and less formal. Include a closing statement about looking forward to the meeting!\"}]), config=thread_config_5):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after providing feedback for write_email\n", "print(\"\\nChecking memory after providing feedback for write_email:\")\n", "display_memory_content(store, (\"email_assistant\", \"response_preferences\"))"]}, {"cell_type": "markdown", "id": "b5b360a2", "metadata": {}, "source": ["The memory update after our email feedback shows highly sophisticated learning about both meeting scheduling and email writing preferences:\n", "\n", "1. The system has added a complete new section to the response preferences entitled \"When writing email responses\" with two key preferences:\n", " - \"Favor shorter and less formal language when possible, unless the context requires formality\"\n", " - \"Include a closing statement expressing that you look forward to the meeting or conversation when confirming appointments\"\n", "\n", "2. It has also added a new bullet point to the \"When responding to meeting scheduling requests\" section:\n", " - \"When scheduling meetings, prefer afternoon times after 2pm when possible, and default to 30-minute durations unless otherwise specified\"\n", "\n", "This demonstrates the system's ability to:\n", "- Organize learned preferences into appropriate categories\n", "- Extract multiple insights from a single feedback instance\n", "- Apply meeting preferences to both calendar and email contexts\n", "- Capture nuance with appropriate qualifiers (\"when possible,\" \"unless otherwise specified\")\n", "- Maintain the hierarchical structure of the memory\n", "\n", "The resulting email shows all these preferences applied: it's shorter, less formal, includes a closing statement about looking forward to the chat, and correctly references the 30-minute meeting time."]}, {"cell_type": "code", "execution_count": null, "id": "8c64999e", "metadata": {}, "outputs": [], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after accepting write_email after feedback\n", "print(\"\\nChecking memory after accepting write_email after feedback:\")\n", "display_memory_content(store, (\"email_assistant\", \"response_preferences\"))"]}, {"cell_type": "markdown", "id": "f85e63cb", "metadata": {}, "source": ["Look at the full message history."]}, {"cell_type": "code", "execution_count": null, "id": "fa9cf91d", "metadata": {}, "outputs": [], "source": ["state = graph.get_state(thread_config_5)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "80ac9df0-cd39-4c32-a073-c2482d9554b6", "metadata": {}, "source": ["## Local Deployment\n", "\n", "You can find this graph with memory integration in the `src/email_assistant` directory:\n", "\n", "* `src/email_assistant/email_assistant_hitl_memory.py`"]}, {"cell_type": "markdown", "id": "5a4aa8b0-f8b7-4197-8701-87dda60daa26", "metadata": {}, "source": ["Email to test: \n", "```\n", "{\n", " \"author\": \"Alice Smith \",\n", " \"to\": \"John Doe \",\n", " \"subject\": \"Quick question about API documentation\",\n", " \"email_thread\": \"Hi John,\\nI was reviewing the API documentation for the new authentication service and noticed a few endpoints seem to be missing from the specs. Could you help clarify if this was intentional or if we should update the docs?\\nSpecifically, I'm looking at:\\n- /auth/refresh\\n- /auth/validate\\nThanks!\\nAlice\"\n", "}\n", "```\n", "\n", "Testing this locally gives you the full experience of a memory-enabled HITL system:\n", "\n", "1. **Start the local server**: Run `langgraph dev` to launch the agent locally\n", "2. **Connect Agent Inbox**: Use the graph URL from the `langgraph.json` file\n", "3. **Submit test emails**: Try different email types to see classification in action\n", "4. **Provide various feedback types**: Try accepting, editing, ignoring, and responding\n", "5. **Observe memory evolution**: Check the Memory tab in LangGraph Studio to see changes\n", "\n", "![inbox](img/agent-inbox-edit.png)"]}, {"cell_type": "markdown", "id": "b075a3ea", "metadata": {}, "source": ["The Memory tab in LangGraph Studio offers a real-time view of how your preferences are being captured and updated with each interaction:\n", "\n", "![studio-img](img/memory-studio.png)\n", "\n", "Through continued use, the system becomes increasingly personalized:\n", "- It learns which emails you want to respond to, be notified about, or ignore\n", "- It adapts to your communication style preferences\n", "- It remembers your scheduling preferences\n", "- It refines its understanding with each interaction\n", "\n", "This combination of HITL and memory creates a system that balances automation with control - handling routine tasks automatically while learning from your feedback to become more aligned with your preferences over time."]}, {"cell_type": "markdown", "id": "f2ad7580", "metadata": {}, "source": ["## Hosted Deployment with Gmail Tools\n", "\n", "If you want to actually run this on your own email, you can deploy the graph with Gmail tools. \n", "\n", "Set up your Gmail credentials [following here](https://github.com/langchain-ai/agents-from-scratch/blob/main/src/email_assistant/tools/gmail/README.md).\n", "\n", "There is a graph set up with Gmail tools:\n", "\n", "```shell\n", "python src/email_assistant/email_assistant_hitl_memory_gmail.py\n", "```\n", "\n", "[One of the deployment options is `hosted`](https://langchain-ai.github.io/langgraph/tutorials/deployment/#other-deployment-options), and you can simply connect the deployed graph URL to the Agent Inbox as done with the local deployment.\n", "\n", "## Improving Memory \n", "\n", "Our current memory schema and updating is extremely simple: \n", "\n", "* Our schema is a string\n", "* We always overwrite the existing memory with a new string\n", " \n", "The store can be easily [configured for semantic search](https://langchain-ai.github.io/langgraph/cloud/deployment/semantic_search/) over a collection of memories. \n", "\n", "Also consider using [LangMem](https://langchain-ai.github.io/langmem/) for more advanced memory management."]}, {"cell_type": "code", "execution_count": null, "id": "846dbb9b-5c9a-4236-912e-02b3d9f674f6", "metadata": {}, "outputs": [], "source": []}], "metadata": {"jupytext": {"cell_metadata_filter": "-all", "main_language": "python", "notebook_metadata_filter": "-all"}, "kernelspec": {"display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6"}}, "nbformat": 4, "nbformat_minor": 5} diff --git a/src/email_assistant/email_assistant.py b/src/email_assistant/email_assistant.py index 2d56afd..ee06227 100644 --- a/src/email_assistant/email_assistant.py +++ b/src/email_assistant/email_assistant.py @@ -2,11 +2,11 @@ from langchain.chat_models import init_chat_model -from src.email_assistant.tools import get_tools, get_tools_by_name -from src.email_assistant.tools.default.prompt_templates import AGENT_TOOLS_PROMPT -from src.email_assistant.prompts import triage_system_prompt, triage_user_prompt, agent_system_prompt, default_background, default_triage_instructions, default_response_preferences, default_cal_preferences -from src.email_assistant.schemas import State, RouterSchema, StateInput -from src.email_assistant.utils import parse_email, format_email_markdown +from email_assistant.tools import get_tools, get_tools_by_name +from email_assistant.tools.default.prompt_templates import AGENT_TOOLS_PROMPT +from email_assistant.prompts import triage_system_prompt, triage_user_prompt, agent_system_prompt, default_background, default_triage_instructions, default_response_preferences, default_cal_preferences +from email_assistant.schemas import State, RouterSchema, StateInput +from email_assistant.utils import parse_email, format_email_markdown from langgraph.graph import StateGraph, START, END from langgraph.types import Command diff --git a/src/email_assistant/email_assistant_hitl.py b/src/email_assistant/email_assistant_hitl.py index c459ac1..9e52d40 100644 --- a/src/email_assistant/email_assistant_hitl.py +++ b/src/email_assistant/email_assistant_hitl.py @@ -5,11 +5,11 @@ from langgraph.graph import StateGraph, START, END from langgraph.types import interrupt, Command -from src.email_assistant.tools import get_tools, get_tools_by_name -from src.email_assistant.tools.default.prompt_templates import HITL_TOOLS_PROMPT -from src.email_assistant.prompts import triage_system_prompt, triage_user_prompt, agent_system_prompt_hitl, default_background, default_triage_instructions, default_response_preferences, default_cal_preferences -from src.email_assistant.schemas import State, RouterSchema, StateInput -from src.email_assistant.utils import parse_email, format_for_display, format_email_markdown +from email_assistant.tools import get_tools, get_tools_by_name +from email_assistant.tools.default.prompt_templates import HITL_TOOLS_PROMPT +from email_assistant.prompts import triage_system_prompt, triage_user_prompt, agent_system_prompt_hitl, default_background, default_triage_instructions, default_response_preferences, default_cal_preferences +from email_assistant.schemas import State, RouterSchema, StateInput +from email_assistant.utils import parse_email, format_for_display, format_email_markdown from dotenv import load_dotenv load_dotenv(".env") diff --git a/src/email_assistant/email_assistant_hitl_memory.py b/src/email_assistant/email_assistant_hitl_memory.py index e4b489d..06ebd26 100644 --- a/src/email_assistant/email_assistant_hitl_memory.py +++ b/src/email_assistant/email_assistant_hitl_memory.py @@ -8,11 +8,11 @@ from langgraph.store.base import BaseStore from langgraph.types import interrupt, Command -from src.email_assistant.tools import get_tools, get_tools_by_name -from src.email_assistant.tools.default.prompt_templates import HITL_MEMORY_TOOLS_PROMPT -from src.email_assistant.prompts import triage_system_prompt, triage_user_prompt, agent_system_prompt_hitl_memory, default_triage_instructions, default_background, default_response_preferences, default_cal_preferences -from src.email_assistant.schemas import State, RouterSchema, StateInput -from src.email_assistant.utils import parse_email, format_for_display, format_email_markdown +from email_assistant.tools import get_tools, get_tools_by_name +from email_assistant.tools.default.prompt_templates import HITL_MEMORY_TOOLS_PROMPT +from email_assistant.prompts import triage_system_prompt, triage_user_prompt, agent_system_prompt_hitl_memory, default_triage_instructions, default_background, default_response_preferences, default_cal_preferences +from email_assistant.schemas import State, RouterSchema, StateInput +from email_assistant.utils import parse_email, format_for_display, format_email_markdown from dotenv import load_dotenv load_dotenv(".env") diff --git a/src/email_assistant/email_assistant_hitl_memory_gmail.py b/src/email_assistant/email_assistant_hitl_memory_gmail.py index 2f85be7..e9ddeaf 100644 --- a/src/email_assistant/email_assistant_hitl_memory_gmail.py +++ b/src/email_assistant/email_assistant_hitl_memory_gmail.py @@ -8,12 +8,12 @@ from langgraph.store.base import BaseStore from langgraph.types import interrupt, Command -from src.email_assistant.tools import get_tools, get_tools_by_name -from src.email_assistant.tools.gmail.prompt_templates import GMAIL_TOOLS_PROMPT -from src.email_assistant.tools.gmail.gmail_tools import mark_as_read -from src.email_assistant.prompts import triage_system_prompt, triage_user_prompt, agent_system_prompt_hitl_memory, default_triage_instructions, default_background, default_response_preferences, default_cal_preferences -from src.email_assistant.schemas import State, RouterSchema, StateInput -from src.email_assistant.utils import parse_gmail, format_for_display, format_gmail_markdown +from email_assistant.tools import get_tools, get_tools_by_name +from email_assistant.tools.gmail.prompt_templates import GMAIL_TOOLS_PROMPT +from email_assistant.tools.gmail.gmail_tools import mark_as_read +from email_assistant.prompts import triage_system_prompt, triage_user_prompt, agent_system_prompt_hitl_memory, default_triage_instructions, default_background, default_response_preferences, default_cal_preferences +from email_assistant.schemas import State, RouterSchema, StateInput +from email_assistant.utils import parse_gmail, format_for_display, format_gmail_markdown from dotenv import load_dotenv load_dotenv(".env") diff --git a/src/email_assistant/tools/__init__.py b/src/email_assistant/tools/__init__.py index 0ab591c..bd03b2d 100644 --- a/src/email_assistant/tools/__init__.py +++ b/src/email_assistant/tools/__init__.py @@ -1,6 +1,6 @@ -from src.email_assistant.tools.base import get_tools, get_tools_by_name -from src.email_assistant.tools.default.email_tools import write_email, triage_email, Done -from src.email_assistant.tools.default.calendar_tools import schedule_meeting, check_calendar_availability +from email_assistant.tools.base import get_tools, get_tools_by_name +from email_assistant.tools.default.email_tools import write_email, triage_email, Done +from email_assistant.tools.default.calendar_tools import schedule_meeting, check_calendar_availability __all__ = [ "get_tools", diff --git a/src/email_assistant/tools/base.py b/src/email_assistant/tools/base.py index fc4ee5d..7c4f396 100644 --- a/src/email_assistant/tools/base.py +++ b/src/email_assistant/tools/base.py @@ -12,8 +12,8 @@ def get_tools(tool_names: List[str] = None, include_gmail: bool = False) -> List List of tool objects """ # Import default tools - from src.email_assistant.tools.default.email_tools import write_email, Done, Question - from src.email_assistant.tools.default.calendar_tools import schedule_meeting, check_calendar_availability + from email_assistant.tools.default.email_tools import write_email, Done, Question + from email_assistant.tools.default.calendar_tools import schedule_meeting, check_calendar_availability # Base tools dictionary all_tools = { @@ -27,7 +27,7 @@ def get_tools(tool_names: List[str] = None, include_gmail: bool = False) -> List # Add Gmail tools if requested if include_gmail: try: - from src.email_assistant.tools.gmail.gmail_tools import ( + from email_assistant.tools.gmail.gmail_tools import ( fetch_emails_tool, send_email_tool, check_calendar_tool, diff --git a/src/email_assistant/tools/default/__init__.py b/src/email_assistant/tools/default/__init__.py index 7e0a5c1..963c3b6 100644 --- a/src/email_assistant/tools/default/__init__.py +++ b/src/email_assistant/tools/default/__init__.py @@ -1,8 +1,8 @@ """Default tools for email assistant.""" -from src.email_assistant.tools.default.email_tools import write_email, triage_email, Done -from src.email_assistant.tools.default.calendar_tools import schedule_meeting, check_calendar_availability -from src.email_assistant.tools.default.prompt_templates import ( +from email_assistant.tools.default.email_tools import write_email, triage_email, Done +from email_assistant.tools.default.calendar_tools import schedule_meeting, check_calendar_availability +from email_assistant.tools.default.prompt_templates import ( STANDARD_TOOLS_PROMPT, AGENT_TOOLS_PROMPT, HITL_TOOLS_PROMPT, diff --git a/src/email_assistant/tools/gmail/__init__.py b/src/email_assistant/tools/gmail/__init__.py index 3b239df..1cae78b 100644 --- a/src/email_assistant/tools/gmail/__init__.py +++ b/src/email_assistant/tools/gmail/__init__.py @@ -1,13 +1,13 @@ """Gmail tools for email assistant.""" -from src.email_assistant.tools.gmail.gmail_tools import ( +from email_assistant.tools.gmail.gmail_tools import ( fetch_emails_tool, send_email_tool, check_calendar_tool, schedule_meeting_tool ) -from src.email_assistant.tools.gmail.prompt_templates import GMAIL_TOOLS_PROMPT +from email_assistant.tools.gmail.prompt_templates import GMAIL_TOOLS_PROMPT __all__ = [ "fetch_emails_tool", diff --git a/tests/hitl_testing.ipynb b/tests/hitl_testing.ipynb index 9b26d04..f9f1dec 100644 --- a/tests/hitl_testing.ipynb +++ b/tests/hitl_testing.ipynb @@ -1,2128 +1 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "07c57479", - "metadata": {}, - "source": [ - "# Test Cases for Human-in-the-Loop\n" - ] - }, - { - "cell_type": "markdown", - "id": "1c77003d", - "metadata": {}, - "source": [ - "Let' see this working in practice by importing the email assistant with HITL from our module, `src/email_assistant/email_assistant_hitl.py`. We'll compile the graph with a [checkpointer](https://langchain-ai.github.io/langgraph/concepts/memory/#short-term-memory), which allows us to persist the state of the graph after interruption. We can then resume execution from the same state after the human has responded.\n", - "\n", - "> **Note:** here we change to the parent directory (`%cd ..`) to access our project's module structure, which contains reusable prompts and components. The autoreload extensions ensure any changes to these modules are automatically reflected in the notebook without requiring kernel restarts. This allows us to organize our prompts in a dedicated module rather than defining them inline, making them easier to maintain and reuse across the notebooks! You can see all these files in: `src/email_assistant`" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "2a082b81", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/rlm/Desktop/Code/interrupt_workshop\n" - ] - } - ], - "source": [ - "%cd ..\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import uuid\n", - "from langgraph.checkpoint.memory import MemorySaver\n", - "from email_assistant.email_assistant_hitl import overall_workflow" - ] - }, - { - "cell_type": "markdown", - "id": "37bac65b", - "metadata": {}, - "source": [ - "## Accept `write_email` and `schedule_meeting`\n", - "\n", - "This test demonstrates the fundamental HITL approval flow when a user accepts all agent actions:\n", - "1. An email about tax planning is received and classified as \"RESPOND\"\n", - "2. The agent checks calendar availability for both suggested dates (Tuesday and Thursday)\n", - "3. The agent proposes scheduling a meeting on Tuesday at 2:00 PM for 45 minutes\n", - "4. The user reviews and ACCEPTS the meeting request without changes\n", - "5. The agent drafts a confirmation email to send to the client\n", - "6. The user reviews and ACCEPTS the email draft without changes\n", - "7. The agent marks the workflow as complete\n", - "\n", - "This scenario shows how human-in-the-loop works at its most basic level - humans provide oversight while the agent handles the execution." - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "5ac5dde7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-05-06', 'start_time': 14}}\n" - ] - } - ], - "source": [ - "import uuid\n", - "\n", - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_1 = uuid.uuid4()\n", - "thread_config_1 = {\"configurable\": {\"thread_id\": thread_id_1}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_1):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "5454d987", - "metadata": {}, - "source": [ - "Accept the `schedule_meeting` tool call\n", - "\n", - "After the agent proposes scheduling a meeting, it creates an interrupt to seek human approval. In this step, we simulate a user accepting the proposed meeting parameters without changes. This allows the workflow to proceed to the next step (drafting a confirmation email). The interrupt object contains critical information:\n", - "\n", - "1. The action type (`schedule_meeting`)\n", - "2. The proposed meeting parameters (attendees, subject, duration, day, time)\n", - "3. Configuration options for what kinds of responses are allowed\n", - "\n", - "When the user accepts, the tool is executed as proposed without modification." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "643e7974", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user accepting the {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-05-06', 'start_time': 14}} tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out. I am available on Tuesday, May 6th at 2:00 PM for a 45-minute call to discuss tax planning strategies. I have scheduled the meeting accordingly.\\n\\nLooking forward to your suggestions.\\n\\nBest regards,\\nLance'}}\n" - ] - } - ], - "source": [ - "from langgraph.types import Command\n", - "\n", - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "00d8c1a5", - "metadata": {}, - "source": [ - "Accept the `write_email` tool call\n", - "\n", - "With the meeting scheduled, the agent now drafts a confirmation email to the client. The interrupt contains:\n", - "\n", - "1. The email tool action with recipient, subject, and proposed content\n", - "2. The formatted email showing what will be sent\n", - "3. The same configuration options for user response types\n", - "\n", - "When the user accepts, the email is sent as written. After this step, the agent marks the task as complete with the `Done` tool call, and the workflow ends. The complete message history shows all the steps taken:\n", - "\n", - "1. Initial email processing\n", - "2. Calendar availability checks for both days\n", - "3. Meeting scheduling with confirmation\n", - "4. Email composition and sending\n", - "5. Workflow completion" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "656f4336", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user accepting the {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out. I have scheduled a call for us to discuss tax planning strategies on Tuesday, April 22, 2025, at 2:00 PM. The meeting is set for 45 minutes.\\n\\nLooking forward to our discussion.\\n\\nBest regards,\\n\\nLance Martin'}} tool call...\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_h6a55b78txHk7GV7yQa9sQU9)\n", - " Call ID: call_h6a55b78txHk7GV7yQa9sQU9\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "7d228ff8", - "metadata": {}, - "source": [ - "Look at the full message history, and see trace:\n", - "\n", - "https://smith.langchain.com/public/82277f96-3abd-48e0-a4db-413f7572240d/r" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "3b168d80", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Respond to the email: \n", - "\n", - "**Subject**: Tax season let's schedule call\n", - "**From**: Project Manager \n", - "**To**: Lance Martin \n", - "\n", - "Lance,\n", - "\n", - "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", - "\n", - "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", - "\n", - "Regards,\n", - "Project Manager\n", - "\n", - "---\n", - "\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " check_calendar_availability (call_NrkaML1VPyeDXFNiQZUvfGIo)\n", - " Call ID: call_NrkaML1VPyeDXFNiQZUvfGIo\n", - " Args:\n", - " day: 2025-04-22\n", - " check_calendar_availability (call_cjRWdZCRnPsYsagEGZxASZht)\n", - " Call ID: call_cjRWdZCRnPsYsagEGZxASZht\n", - " Args:\n", - " day: 2025-04-24\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " schedule_meeting (call_lD42qto5T8dxkfzV6yYjWZQL)\n", - " Call ID: call_lD42qto5T8dxkfzV6yYjWZQL\n", - " Args:\n", - " attendees: ['pm@client.com', 'lance@company.com']\n", - " subject: Tax Planning Strategies Discussion\n", - " duration_minutes: 45\n", - " preferred_day: 2025-04-22\n", - " start_time: 14\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Meeting 'Tax Planning Strategies Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 45 minutes with 2 attendees\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_ElvioA4LUlyWqCTgwQTI4gs6)\n", - " Call ID: call_ElvioA4LUlyWqCTgwQTI4gs6\n", - " Args:\n", - " to: pm@client.com\n", - " subject: Re: Tax season let's schedule call\n", - " content: Hello,\n", - "\n", - "Thank you for reaching out. I have scheduled a call for us to discuss tax planning strategies on Tuesday, April 22, 2025, at 2:00 PM. The meeting is set for 45 minutes.\n", - "\n", - "Looking forward to our discussion.\n", - "\n", - "Best regards,\n", - "\n", - "Lance Martin\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Email sent to pm@client.com with subject 'Re: Tax season let's schedule call' and content: Hello,\n", - "\n", - "Thank you for reaching out. I have scheduled a call for us to discuss tax planning strategies on Tuesday, April 22, 2025, at 2:00 PM. The meeting is set for 45 minutes.\n", - "\n", - "Looking forward to our discussion.\n", - "\n", - "Best regards,\n", - "\n", - "Lance Martin\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_h6a55b78txHk7GV7yQa9sQU9)\n", - " Call ID: call_h6a55b78txHk7GV7yQa9sQU9\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_1)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "86b1ba30", - "metadata": {}, - "source": [ - "## Edit `write_email` and `schedule_meeting`\n", - "\n", - "This test demonstrates how human modification works in the HITL flow:\n", - "1. We start with the same tax planning email as before\n", - "2. The agent proposes a meeting with the same parameters\n", - "3. This time, the user EDITS the meeting proposal to change:\n", - " - Duration from 45 to 30 minutes\n", - " - Meeting subject is made more concise\n", - "4. The agent adapts to these changes when drafting the email\n", - "5. The user further EDITS the email to be shorter and less formal\n", - "6. The workflow completes with both modifications incorporated\n", - "\n", - "This scenario showcases one of the most powerful aspects of HITL: users can make precise modifications to agent actions before they are executed, ensuring the final outcome matches their preferences without having to handle all the details themselves." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "3bfca1b4", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-05-06', 'start_time': 14}}\n" - ] - } - ], - "source": [ - "# Same email as before\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph with new thread\n", - "checkpointer = MemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_2 = uuid.uuid4()\n", - "thread_config_2 = {\"configurable\": {\"thread_id\": thread_id_2}}\n", - "\n", - "# Run the graph until the first interrupt - will be classified as \"respond\" and the agent will create a write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_2):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "706ac0a6", - "metadata": {}, - "source": [ - "Edit the `schedule_meeting` tool call\n", - "\n", - "When the agent proposes the initial meeting schedule, we now simulate the user making modifications through the edit functionality. This demonstrates how the `edit` response type works:\n", - "\n", - "1. The user receives the same meeting proposal as in the previous test\n", - "2. Instead of accepting, they modify the parameters:\n", - " - Reducing duration from 45 to 30 minutes\n", - " - Keeping the same day and time\n", - "3. The `edit` response includes the complete set of modified arguments\n", - "4. The interrupt handler replaces the original tool arguments with these edited ones\n", - "5. The tool is executed with the user's modifications\n", - "\n", - "This shows how edit capability gives users precise control over agent actions while still letting the agent handle the execution details." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "7175fedb", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user editing the schedule_meeting tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out regarding tax planning strategies. I am available on Tuesday, May 6th at 2:00 PM, and have scheduled a 30-minute call for us to discuss your suggestions. If you feel we need additional time, please let me know and I can adjust the meeting duration accordingly.\\n\\nLooking forward to our discussion.\\n\\nBest regards,\\nLance'}}\n" - ] - } - ], - "source": [ - "# Now simulate user editing the schedule_meeting tool call\n", - "print(\"\\nSimulating user editing the schedule_meeting tool call...\")\n", - "edited_schedule_args = {\n", - " \"attendees\": [\"pm@client.com\", \"lance@company.com\"],\n", - " \"subject\": \"Tax Planning Discussion\",\n", - " \"duration_minutes\": 30, # Changed from 45 to 30\n", - " \"preferred_day\": \"2025-05-06\",\n", - " \"start_time\": 14 \n", - "}\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_schedule_args}}]), config=thread_config_2):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "9757706b", - "metadata": {}, - "source": [ - "Edit the `write_email` tool call\n", - "\n", - "After accepting the modified meeting schedule, the agent drafts an email reflecting the 30-minute duration. Now we demonstrate how editing works with email content:\n", - "\n", - "1. The agent has adapted its email to mention the shorter 30-minute duration\n", - "2. We simulate the user wanting an even more significant change to the email:\n", - " - Completely rewriting the content to be shorter and less formal\n", - " - Changing the meeting day mentioned in the email (showing how users can correct agent mistakes)\n", - " - Requesting confirmation rather than stating the meeting as definite\n", - "3. The `edit` response contains the complete new email content\n", - "4. The tool arguments are updated with this edited content\n", - "5. The email is sent with the user's preferred wording\n", - "\n", - "This example shows the power of HITL for complex communication tasks - the agent handles the structure and initial content, while humans can refine tone, style, and substance." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "a0604d53", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user editing the write_email tool call...\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_PVtmxQJgG0wdFh299b70ab1z)\n", - " Call ID: call_PVtmxQJgG0wdFh299b70ab1z\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "# Now simulate user editing the write_email tool call\n", - "print(\"\\nSimulating user editing the write_email tool call...\")\n", - "edited_email_args = {\n", - " \"to\": \"pm@client.com\",\n", - " \"subject\": \"Re: Tax season let's schedule call\",\n", - " \"content\": \"Hello Project Manager,\\n\\nThank you for reaching out about tax planning. I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\\n\\nBest regards,\\nLance Martin\"\n", - "}\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_email_args}}]), config=thread_config_2):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "ac279101", - "metadata": {}, - "source": [ - "Look at the full message history, and see trace, to view the edited tool calls:\n", - "\n", - "https://smith.langchain.com/public/21769510-d57a-41e4-b5c7-0ddb23c237d8/r" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "6d3e9be7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Respond to the email: \n", - "\n", - "**Subject**: Tax season let's schedule call\n", - "**From**: Project Manager \n", - "**To**: Lance Martin \n", - "\n", - "Lance,\n", - "\n", - "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", - "\n", - "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", - "\n", - "Regards,\n", - "Project Manager\n", - "\n", - "---\n", - "\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " check_calendar_availability (call_hKFyYkfY3fhQqflCAh2dm69E)\n", - " Call ID: call_hKFyYkfY3fhQqflCAh2dm69E\n", - " Args:\n", - " day: 2025-05-06\n", - " check_calendar_availability (call_xdLGVBhQ3ki1vOSzReNxCZYk)\n", - " Call ID: call_xdLGVBhQ3ki1vOSzReNxCZYk\n", - " Args:\n", - " day: 2025-05-08\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-05-06: 9:00 AM, 2:00 PM, 4:00 PM\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-05-08: 9:00 AM, 2:00 PM, 4:00 PM\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " schedule_meeting (call_jJoVei2IxgVbQI7cXrEPJe8p)\n", - " Call ID: call_jJoVei2IxgVbQI7cXrEPJe8p\n", - " Args:\n", - " attendees: ['pm@client.com', 'lance@company.com']\n", - " subject: Tax Planning Discussion\n", - " duration_minutes: 30\n", - " preferred_day: 2025-05-06\n", - " start_time: 14\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Meeting 'Tax Planning Discussion' scheduled on Tuesday, May 06, 2025 at 14 for 30 minutes with 2 attendees\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_MTSKFqw6OfmpZU6bMt8nCHwl)\n", - " Call ID: call_MTSKFqw6OfmpZU6bMt8nCHwl\n", - " Args:\n", - " to: pm@client.com\n", - " subject: Re: Tax season let's schedule call\n", - " content: Hello Project Manager,\n", - "\n", - "Thank you for reaching out about tax planning. I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\n", - "\n", - "Best regards,\n", - "Lance Martin\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Email sent to pm@client.com with subject 'Re: Tax season let's schedule call' and content: Hello Project Manager,\n", - "\n", - "Thank you for reaching out about tax planning. I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\n", - "\n", - "Best regards,\n", - "Lance Martin\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_PVtmxQJgG0wdFh299b70ab1z)\n", - " Call ID: call_PVtmxQJgG0wdFh299b70ab1z\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_2)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "5281cb1f", - "metadata": {}, - "source": [ - "## Ignore `write_email`, `schedule_meeting`, and `question`\n", - "\n", - "This test set demonstrates the \"ignore\" capability of the HITL system, showing how users can reject agent actions entirely:\n", - "\n", - "1. First, we test ignoring a `schedule_meeting` request:\n", - " - When the agent proposes scheduling a meeting, the user rejects it completely\n", - " - The workflow ends immediately without scheduling anything\n", - " \n", - "2. Second, we test accepting a meeting but ignoring the follow-up email:\n", - " - The user accepts the meeting schedule\n", - " - But when the agent drafts a confirmation email, the user ignores it\n", - " - The meeting is still scheduled, but no email is sent\n", - " - The workflow ends after the rejection\n", - "\n", - "3. Third, we test ignoring a `question` tool call:\n", - " - For a different email about brunch plans\n", - " - The agent asks a clarifying question\n", - " - The user ignores the question\n", - " - The workflow ends without further action\n", - "\n", - "The \"ignore\" capability is crucial for HITL systems as it allows users to stop potentially unwanted actions before they occur, providing an important safety mechanism and ensuring users maintain full control." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "d60baeed", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n" - ] - } - ], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_3 = uuid.uuid4()\n", - "thread_config_3 = {\"configurable\": {\"thread_id\": thread_id_3}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_3):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "2442cc81", - "metadata": {}, - "source": [ - "Ignore the `schedule_meeting` tool call\n", - "\n", - "In this test, we demonstrate rejection of the meeting scheduling proposal:\n", - "\n", - "1. The agent suggests a 45-minute meeting on Tuesday at 2:00 PM\n", - "2. We simulate the user selecting \"ignore\" in the Agent Inbox interface\n", - "3. The `interrupt_handler` processes this rejection with special logic:\n", - " - It adds a message explaining the user's choice to ignore\n", - " - It returns a command to end the workflow\n", - " - No meeting is scheduled and no email is sent\n", - "\n", - "This capability is crucial for several reasons:\n", - "- Prevents incorrect actions from being executed\n", - "- Gives users veto power over any agent decision\n", - "- Provides a clear exit path when the agent's suggestion isn't appropriate\n", - "\n", - "The trace shows how the workflow ends immediately after the ignore action." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "f2e86ca6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user ignoring the {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}} tool call...\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user ignoring the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_3):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "f5ccb8e0", - "metadata": {}, - "source": [ - "As you can see from the trace, we end:\n", - "\n", - "https://smith.langchain.com/public/4e322b99-08ea-4d23-9653-475415ff3e33/r\n", - "\n", - "Now, let's run again, but ignore the `write_email` tool call:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "15e33156", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", - "\n", - "Simulating user accepting the schedule_meeting tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hi,\\n\\nThank you for reaching out. I have scheduled our call to discuss tax planning strategies for Tuesday, April 22, 2025, at 2:00 PM. We will have 45 minutes to go over your suggestions.\\n\\nLooking forward to our discussion.\\n\\nBest regards,\\n\\nLance Martin'}}\n", - "\n", - "Simulating user ignoring the write_email tool call...\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "User ignored this email draft. Ignore this email and end the workflow.\n" - ] - } - ], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_3 = uuid.uuid4()\n", - "thread_config_3 = {\"configurable\": {\"thread_id\": thread_id_3}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_3):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_3):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "print(f\"\\nSimulating user ignoring the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_3):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "1acea519", - "metadata": {}, - "source": [ - "Again, we end:\n", - "\n", - "https://smith.langchain.com/public/819be555-4919-4d14-bdd9-eb6f73a3bafe/r" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "eb55b5f1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Respond to the email: \n", - "\n", - "**Subject**: Tax season let's schedule call\n", - "**From**: Project Manager \n", - "**To**: Lance Martin \n", - "\n", - "Lance,\n", - "\n", - "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", - "\n", - "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", - "\n", - "Regards,\n", - "Project Manager\n", - "\n", - "---\n", - "\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " check_calendar_availability (call_JpQevSvNkmodiACKRfONrqan)\n", - " Call ID: call_JpQevSvNkmodiACKRfONrqan\n", - " Args:\n", - " day: 2025-04-22\n", - " check_calendar_availability (call_3gJfzFvJkgY9lRVTpUF1D9Zt)\n", - " Call ID: call_3gJfzFvJkgY9lRVTpUF1D9Zt\n", - " Args:\n", - " day: 2025-04-24\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " schedule_meeting (call_FB8EtqpH1UZHfvZzerDzq16R)\n", - " Call ID: call_FB8EtqpH1UZHfvZzerDzq16R\n", - " Args:\n", - " attendees: ['pm@client.com', 'lance@company.com']\n", - " subject: Tax Planning Strategies Discussion\n", - " duration_minutes: 45\n", - " preferred_day: 2025-04-22\n", - " start_time: 14\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Meeting 'Tax Planning Strategies Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 45 minutes with 2 attendees\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_GlntMuOdVnAjNq9R0tXmCkbV)\n", - " Call ID: call_GlntMuOdVnAjNq9R0tXmCkbV\n", - " Args:\n", - " to: pm@client.com\n", - " subject: Re: Tax season let's schedule call\n", - " content: Hi,\n", - "\n", - "Thank you for reaching out. I have scheduled our call to discuss tax planning strategies for Tuesday, April 22, 2025, at 2:00 PM. We will have 45 minutes to go over your suggestions.\n", - "\n", - "Looking forward to our discussion.\n", - "\n", - "Best regards,\n", - "\n", - "Lance Martin\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "User ignored this email draft. Ignore this email and end the workflow.\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_3)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "7200cdaf", - "metadata": {}, - "source": [ - "Now let's try an email that calls the `Question` tool\n", - "\n", - "The `Question` tool represents another important HITL interaction pattern - the agent asking for additional information rather than taking immediate action. This test shows:\n", - "\n", - "1. A different email scenario about a potential brunch invitation\n", - "2. The agent doesn't have enough information to respond definitively\n", - "3. Instead of guessing, it uses the `Question` tool to ask for clarification\n", - "4. We'll simulate the user ignoring this question\n", - "\n", - "This demonstrates how the HITL system gracefully handles requests for information, and what happens when users choose not to engage with these requests." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "bdea633a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'Question', 'args': {'content': 'What day and time are you considering for the brunch in 3 weeks?'}}\n" - ] - } - ], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Partner \",\n", - " \"subject\": \"Meet Jim and Lisa for brunch in 3 weeks?\",\n", - " \"email_thread\": \"Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_4 = uuid.uuid4()\n", - "thread_config_4 = {\"configurable\": {\"thread_id\": thread_id_4}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_4):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "9aaca6d4", - "metadata": {}, - "source": [ - "Ignore the `question` tool call\n", - "\n", - "When the agent asks for clarification about the brunch plans, we simulate a user ignoring the question:\n", - "\n", - "1. The agent has asked about preferred day and time for the brunch\n", - "2. We provide an \"ignore\" response to this question\n", - "3. The system processes this as a decision to abandon handling this email:\n", - " - A message is added stating \"User ignored this question. Ignore this email and end the workflow.\"\n", - " - The workflow ends without sending any response\n", - "\n", - "This pattern is important because it allows users to:\n", - "- Decide that some emails don't actually need responses\n", - "- Avoid providing information they're not ready to share\n", - "- Defer decision-making to a later time\n", - "- Take over email handling manually if they prefer\n", - "\n", - "The trace and message history show how cleanly the workflow ends after ignoring the question." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "d4e5d62b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user ignoring the {'action': 'Question', 'args': {'content': 'What day and time are you considering for the brunch in 3 weeks?'}} tool call...\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user ignoring the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_4):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "90c7ade4", - "metadata": {}, - "source": [ - "As before, we end:\n", - "\n", - "https://smith.langchain.com/public/276c4016-2b4c-43f5-a677-834a5eaa47c0/r" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "ef334860", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Respond to the email: \n", - "\n", - "**Subject**: Meet Jim and Lisa for brunch in 3 weeks?\n", - "**From**: Partner \n", - "**To**: Lance Martin \n", - "\n", - "Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\n", - "\n", - "---\n", - "\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Question (call_l6LC1srm8qt8CJCihDTz7wAW)\n", - " Call ID: call_l6LC1srm8qt8CJCihDTz7wAW\n", - " Args:\n", - " content: What day and time are you considering for the brunch in 3 weeks?\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "User ignored this question. Ignore this email and end the workflow.\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_4)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "da4c39e9", - "metadata": {}, - "source": [ - "## Respond (with feedback) `write_email`, `schedule_meeting`, and `question`\n", - "\n", - "This test set demonstrates the \"response\" capability - providing feedback without editing or accepting:\n", - "\n", - "1. First, we test feedback for meeting scheduling:\n", - " - The user provides specific preferences (30 minutes instead of 45, and afternoon meetings)\n", - " - The agent incorporates this feedback into a revised proposal\n", - " - The user then accepts the revised meeting schedule\n", - "\n", - "2. Second, we test feedback for email drafting:\n", - " - The user requests a shorter, less formal email with a specific closing statement\n", - " - The agent completely rewrites the email according to this guidance\n", - " - The user accepts the new draft\n", - "\n", - "3. Lastly, we test feedback for questions:\n", - " - For the brunch invitation, the user answers the question with additional context\n", - " - The agent uses this information to draft an appropriate email response\n", - " - The workflow proceeds with the user's input integrated\n", - "\n", - "The \"response\" capability bridges the gap between acceptance and editing - users can guide the agent without having to write the full content themselves. This is especially powerful for:\n", - "- Adjusting tone and style\n", - "- Adding context the agent missed\n", - "- Redirecting the agent's approach\n", - "- Answering questions in a way that shapes the next steps" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "3c4b3517", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n" - ] - } - ], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_5 = uuid.uuid4()\n", - "thread_config_5 = {\"configurable\": {\"thread_id\": thread_id_5}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "ae2bea0a", - "metadata": {}, - "source": [ - "Provide feedback for the `schedule_meeting` tool call\n", - "\n", - "Now we explore the feedback capability for meeting scheduling:\n", - "\n", - "1. The agent proposes the standard 45-minute meeting on Tuesday at 2:00 PM\n", - "2. Instead of accepting or editing, we provide feedback in natural language\n", - "3. Our feedback specifies two preferences:\n", - " - Shorter meeting (30 minutes instead of 45)\n", - " - Preference for afternoon meetings (after 2pm)\n", - "4. The agent receives this feedback through the `response` type\n", - "5. The interrupt handler adds this feedback as a message to the state\n", - "6. The agent processes this feedback and generates a new tool call incorporating these preferences\n", - "\n", - "Unlike direct editing, which requires specifying the entire set of parameters, feedback allows users to express their preferences conversationally. The agent must then interpret this feedback and apply it appropriately to create a revised proposal." - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "9a916e10", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user providing feedback for the schedule_meeting tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 30, 'preferred_day': '2025-04-22', 'start_time': 14}}\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Please schedule this for 30 minutes instead of 45 minutes, and I prefer afternoon meetings after 2pm.\"}]), config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "cf35f1a2", - "metadata": {}, - "source": [ - "Accept the `schedule_meeting` tool call after providing feedback" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "2727fb0e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user accepting the {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 30, 'preferred_day': '2025-04-22', 'start_time': 14}} tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': \"Hello,\\n\\nThank you for reaching out. I've scheduled a call for us to discuss tax planning strategies on Tuesday, April 22, 2025, at 2:00 PM. The meeting is set for 30 minutes.\\n\\nLooking forward to our discussion.\\n\\nBest regards,\\n\\nLance Martin\"}}\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "3ca470c5", - "metadata": {}, - "source": [ - "Now provide feedback for the `write_email` tool call\n", - "\n", - "After accepting the revised meeting schedule, the agent drafts an email. We now test feedback for email content:\n", - "\n", - "1. The agent's email is relatively formal and detailed\n", - "2. We provide stylistic feedback requesting:\n", - " - A shorter, more concise email\n", - " - A less formal tone\n", - " - A specific closing statement about looking forward to the meeting\n", - "3. The agent processes this feedback to completely rewrite the email\n", - "4. The new draft is much shorter, more casual, and includes the requested closing\n", - "\n", - "This demonstrates the power of natural language feedback for content creation:\n", - "- Users don't need to rewrite the entire email themselves\n", - "- They can provide high-level guidance on style, tone, and content\n", - "- The agent handles the actual writing based on this guidance\n", - "- The result better matches user preferences while preserving the essential information\n", - "\n", - "The message history shows both the original and revised emails, clearly showing how the feedback was incorporated." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "f5221d87", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user providing feedback for the write_email tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': \"Hi,\\n\\nI've set up our call for Tuesday, April 22, at 2:00 PM for 30 minutes. Looking forward to it!\\n\\nBest,\\nLance\"}}\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Shorter and less formal. Include a closing statement about looking forward to the meeting!\"}]), config=thread_config_5):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "1266ec72", - "metadata": {}, - "source": [ - "Accept the `write_email` tool call after providing feedback" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "0b4698c0", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user accepting the {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': \"Hi,\\n\\nI've set up our call for Tuesday, April 22, at 2:00 PM for 30 minutes. Looking forward to it!\\n\\nBest,\\nLance\"}} tool call...\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "c270f52a", - "metadata": {}, - "source": [ - "Look at the full message history, and see the trace:\n", - "\n", - "https://smith.langchain.com/public/57006770-6bb3-4e40-b990-143c373ebe60/r\n", - "\n", - "We can see that user feedback in incorporated into the tool calls. " - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "1daf10d6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Respond to the email: \n", - "\n", - "**Subject**: Tax season let's schedule call\n", - "**From**: Project Manager \n", - "**To**: Lance Martin \n", - "\n", - "Lance,\n", - "\n", - "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", - "\n", - "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", - "\n", - "Regards,\n", - "Project Manager\n", - "\n", - "---\n", - "\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " check_calendar_availability (call_5Cs51CxeKicG85x0xeeZgRxg)\n", - " Call ID: call_5Cs51CxeKicG85x0xeeZgRxg\n", - " Args:\n", - " day: 2025-04-22\n", - " check_calendar_availability (call_c7m8uYJaVH9x6l5zJdUj3HMk)\n", - " Call ID: call_c7m8uYJaVH9x6l5zJdUj3HMk\n", - " Args:\n", - " day: 2025-04-24\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " schedule_meeting (call_2HHQvYV5EnkOzGfGITEnE772)\n", - " Call ID: call_2HHQvYV5EnkOzGfGITEnE772\n", - " Args:\n", - " attendees: ['pm@client.com', 'lance@company.com']\n", - " subject: Tax Planning Strategies Discussion\n", - " duration_minutes: 45\n", - " preferred_day: 2025-04-22\n", - " start_time: 14\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "User gave feedback, which can we incorporate into the meeting request. Feedback: Please schedule this for 30 minutes instead of 45 minutes, and I prefer afternoon meetings after 2pm.\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " schedule_meeting (call_YbywvAlWYqWrbvH7zZqu6yUi)\n", - " Call ID: call_YbywvAlWYqWrbvH7zZqu6yUi\n", - " Args:\n", - " attendees: ['pm@client.com', 'lance@company.com']\n", - " subject: Tax Planning Strategies Discussion\n", - " duration_minutes: 30\n", - " preferred_day: 2025-04-22\n", - " start_time: 14\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Meeting 'Tax Planning Strategies Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 30 minutes with 2 attendees\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_MYaMobBPrGW9ZcC8DXI55rFd)\n", - " Call ID: call_MYaMobBPrGW9ZcC8DXI55rFd\n", - " Args:\n", - " to: pm@client.com\n", - " subject: Re: Tax season let's schedule call\n", - " content: Hello,\n", - "\n", - "Thank you for reaching out. I've scheduled a call for us to discuss tax planning strategies on Tuesday, April 22, 2025, at 2:00 PM. The meeting is set for 30 minutes.\n", - "\n", - "Looking forward to our discussion.\n", - "\n", - "Best regards,\n", - "\n", - "Lance Martin\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "User gave feedback, which can we incorporate into the email. Feedback: Shorter and less formal. Include a closing statement about looking forward to the meeting!\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_xVUwWSfechpEhekihDUmw0fE)\n", - " Call ID: call_xVUwWSfechpEhekihDUmw0fE\n", - " Args:\n", - " to: pm@client.com\n", - " subject: Re: Tax season let's schedule call\n", - " content: Hi,\n", - "\n", - "I've set up our call for Tuesday, April 22, at 2:00 PM for 30 minutes. Looking forward to it!\n", - "\n", - "Best,\n", - "Lance\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Email sent to pm@client.com with subject 'Re: Tax season let's schedule call' and content: Hi,\n", - "\n", - "I've set up our call for Tuesday, April 22, at 2:00 PM for 30 minutes. Looking forward to it!\n", - "\n", - "Best,\n", - "Lance\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_Y1oqvAcB4yYoNNY15Wxnroh7)\n", - " Call ID: call_Y1oqvAcB4yYoNNY15Wxnroh7\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_5)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "1d964e36", - "metadata": {}, - "source": [ - "Now let's try an email that calls the `Question` tool to provide feedback\n", - "\n", - "Finally, we test how feedback works with the `Question` tool:\n", - "\n", - "1. For the brunch invitation email, the agent asks about preferred day and time\n", - "2. Instead of ignoring, we provide a substantive response with additional context:\n", - " - Confirming we want to invite the people mentioned\n", - " - Noting we need to check which weekend works best\n", - " - Adding information about needing a reservation\n", - "3. The agent uses this information to:\n", - " - Draft a comprehensive email response incorporating all our feedback\n", - " - Notice we didn't provide a specific day/time, so it suggests checking the calendar\n", - " - Include the detail about making a reservation\n", - "4. The complete email reflects both the original request and our additional guidance\n", - "\n", - "This demonstrates how question responses can shape the entire workflow:\n", - "- Questions let the agent gather missing information\n", - "- User responses can include both direct answers and additional context\n", - "- The agent integrates all this information into its next actions\n", - "- The final outcome reflects the collaborative intelligence of both human and AI" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "8827632a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'Question', 'args': {'content': 'What day and time are you considering for the brunch in 3 weeks? I can check my calendar for availability.'}}\n" - ] - } - ], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Partner \",\n", - " \"subject\": \"Meet Jim and Lisa for brunch in 3 weeks?\",\n", - " \"email_thread\": \"Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_6 = uuid.uuid4()\n", - "thread_config_6 = {\"configurable\": {\"thread_id\": thread_id_6}}\n", - "\n", - "# Run the graph until the first interrupt\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_6):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "1d9f7f1b", - "metadata": {}, - "source": [ - "Provide feedback for the `Question` tool call" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "4979effd", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user providing feedback for the Question tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'partner@home.com', 'subject': 'Re: Meet Jim and Lisa for brunch in 3 weeks?', 'content': \"Hi,\\n\\nYes, let's definitely invite Jim and Lisa for brunch. I'll check my calendar to confirm which weekend works best for us. Also, since the new place on 17th is quite popular, we'll need to make a reservation in advance.\\n\\nI'll get back to you shortly with the best date.\\n\\nBest,\\nLance\"}}\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Yes, let's invite them, but let me confirm which weekend works best. Also mention that we'll need to make a reservation since that place is popular.\"}]), config=thread_config_6):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "76b4ba9b", - "metadata": {}, - "source": [ - "Accept the `write_email` tool call" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "bfd34ec2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user accepting the write_email tool call...\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_TWXVmN0lmZpypDo7TZwEEWuw)\n", - " Call ID: call_TWXVmN0lmZpypDo7TZwEEWuw\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_6):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "e214fe9e", - "metadata": {}, - "source": [ - "Look at the full message history, and see the trace:\n", - "\n", - "https://smith.langchain.com/public/f4c727c3-b1d9-47a5-b3d0-3451619db8a2/r\n", - "\n", - "We can see that user feedback in incorporated into the email response." - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "070393eb", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Respond to the email: \n", - "\n", - "**Subject**: Meet Jim and Lisa for brunch in 3 weeks?\n", - "**From**: Partner \n", - "**To**: Lance Martin \n", - "\n", - "Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\n", - "\n", - "---\n", - "\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Question (call_ebOvuGZuyzNxziFBrYtXifeJ)\n", - " Call ID: call_ebOvuGZuyzNxziFBrYtXifeJ\n", - " Args:\n", - " content: What day and time are you considering for the brunch in 3 weeks? I can check my calendar for availability.\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "User answered the question, which can we can use for any follow up actions. Feedback: Yes, let's invite them, but let me confirm which weekend works best. Also mention that we'll need to make a reservation since that place is popular.\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_SWBLQ1lAEioAMc2KdMft7AhX)\n", - " Call ID: call_SWBLQ1lAEioAMc2KdMft7AhX\n", - " Args:\n", - " to: partner@home.com\n", - " subject: Re: Meet Jim and Lisa for brunch in 3 weeks?\n", - " content: Hi,\n", - "\n", - "Yes, let's definitely invite Jim and Lisa for brunch. I'll check my calendar to confirm which weekend works best for us. Also, since the new place on 17th is quite popular, we'll need to make a reservation in advance.\n", - "\n", - "I'll get back to you shortly with the best date.\n", - "\n", - "Best,\n", - "Lance\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Email sent to partner@home.com with subject 'Re: Meet Jim and Lisa for brunch in 3 weeks?' and content: Hi,\n", - "\n", - "Yes, let's definitely invite Jim and Lisa for brunch. I'll check my calendar to confirm which weekend works best for us. Also, since the new place on 17th is quite popular, we'll need to make a reservation in advance.\n", - "\n", - "I'll get back to you shortly with the best date.\n", - "\n", - "Best,\n", - "Lance\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_TWXVmN0lmZpypDo7TZwEEWuw)\n", - " Call ID: call_TWXVmN0lmZpypDo7TZwEEWuw\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_6)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "8036348c", - "metadata": {}, - "source": [ - "## Test Case for Notify Classification\n", - "\n", - "This test demonstrates how the system handles emails classified as \"NOTIFY\" and how users can respond to notifications:\n", - "\n", - "1. The triage system classifies important informational emails as \"NOTIFY\" when:\n", - " - They contain important information (like security updates)\n", - " - They don't require immediate action\n", - " - They should be brought to the user's attention\n", - " \n", - "2. For notify classifications:\n", - " - The workflow routes to the `triage_interrupt_handler`\n", - " - The user sees the email content with options to ignore or respond\n", - " - No default action is suggested\n", - "\n", - "3. In this test, we:\n", - " - Process an IT security update email that gets classified as \"NOTIFY\"\n", - " - Simulate the user deciding to respond with specific feedback\n", - " - See how the agent drafts an appropriate response based on this feedback\n", - " - Approve the response to be sent\n", - "\n", - "This showcases how the HITL system can transform a passive notification into an active response when the user decides one is warranted, bridging the gap between the initial \"NOTIFY\" classification and a full response." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "930e86cf", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "🔔 Classification: NOTIFY - This email contains important information\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'Email Assistant: notify', 'args': {}}\n" - ] - } - ], - "source": [ - "# Notify - Important FYI Email\n", - "email_input_notify = {\n", - " \"to\": \"Team Members \",\n", - " \"author\": \"IT Department \",\n", - " \"subject\": \"Critical Security Update\",\n", - " \"email_thread\": \"Dear Team,\\n\\nThis is an important security notification. We will be updating our authentication system this weekend. During the update window (Saturday 2am-4am), you will not be able to access company resources.\\n\\nPlease ensure you log out of all systems before the maintenance window.\\n\\nRegards,\\nIT Department\"\n", - "}\n", - "\n", - "# Compile the graph with new thread\n", - "checkpointer = MemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_7 = uuid.uuid4()\n", - "thread_config_7 = {\"configurable\": {\"thread_id\": thread_id_7}}\n", - "\n", - "# Run the graph until the first interrupt - should be classified as \"notify\"\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_notify}, config=thread_config_7):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "c3d062bb", - "metadata": {}, - "source": [ - "Now simulate user deciding to respond with feedback.\n", - "\n", - "Although the email was classified as \"NOTIFY\" (meaning it normally wouldn't require a response), the HITL system gives users the flexibility to override this classification. In this step:\n", - "\n", - "1. We provide feedback indicating we want to acknowledge receipt of the security notice\n", - "2. The `triage_interrupt_handler` processes this feedback:\n", - " - It adds the user's guidance as a message to the state\n", - " - It routes to the `response_agent` node instead of ending\n", - "3. The response agent uses this guidance to draft an appropriate acknowledgment email\n", - "4. An interrupt is created for the user to review this draft before sending\n", - "\n", - "This demonstrates an important capability: the ability for users to override the initial classification when they feel a response is warranted. This ensures that the system remains flexible and adaptable to user preferences, while still providing useful initial triage." - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "6bd027a4", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user deciding to respond with feedback...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'it@company.com', 'subject': 'Re: Critical Security Update', 'content': 'Dear IT Department,\\n\\nThank you for the important security update notification. I acknowledge receipt of this notice and confirm that I will ensure to log out of all systems before the maintenance window this Saturday from 2am to 4am.\\n\\nRegards,\\nLance'}}\n" - ] - } - ], - "source": [ - "print(\"\\nSimulating user deciding to respond with feedback...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"We should acknowledge receipt of this important notice and confirm that we'll be logged out before the maintenance window.\"}]), config=thread_config_7):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "e84de751", - "metadata": {}, - "source": [ - "Accept the `write_email` tool call after feedback." - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "073642cd", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user accepting the {'action': 'write_email', 'args': {'to': 'it@company.com', 'subject': 'Re: Critical Security Update', 'content': 'Dear IT Department,\\n\\nThank you for the important security update notification. I acknowledge receipt of this notice and confirm that I will ensure to log out of all systems before the maintenance window this Saturday from 2am to 4am.\\n\\nRegards,\\nLance'}} tool call...\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_7):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "b270fa6c", - "metadata": {}, - "source": [ - "Look at the full message history, and see the trace:\n", - "\n", - "https://smith.langchain.com/public/6594f98f-eb83-4560-9c34-28ec22ada3dc/r\n", - "\n", - "We can see that user feedback causes agent to go reply to the email." - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "a1bbc9b7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Email to notify user about: \n", - "\n", - "**Subject**: Critical Security Update\n", - "**From**: IT Department \n", - "**To**: Team Members \n", - "\n", - "Dear Team,\n", - "\n", - "This is an important security notification. We will be updating our authentication system this weekend. During the update window (Saturday 2am-4am), you will not be able to access company resources.\n", - "\n", - "Please ensure you log out of all systems before the maintenance window.\n", - "\n", - "Regards,\n", - "IT Department\n", - "\n", - "---\n", - "\n", - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "User wants to reply to the email. Use this feedback to respond: We should acknowledge receipt of this important notice and confirm that we'll be logged out before the maintenance window.\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_9fi3k78DE98MFtey5aYSaql7)\n", - " Call ID: call_9fi3k78DE98MFtey5aYSaql7\n", - " Args:\n", - " to: it@company.com\n", - " subject: Re: Critical Security Update\n", - " content: Dear IT Department,\n", - "\n", - "Thank you for the important security update notification. I acknowledge receipt of this notice and confirm that I will ensure to log out of all systems before the maintenance window this Saturday from 2am to 4am.\n", - "\n", - "Regards,\n", - "Lance\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Email sent to it@company.com with subject 'Re: Critical Security Update' and content: Dear IT Department,\n", - "\n", - "Thank you for the important security update notification. I acknowledge receipt of this notice and confirm that I will ensure to log out of all systems before the maintenance window this Saturday from 2am to 4am.\n", - "\n", - "Regards,\n", - "Lance\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_K8dqQ4AIYjMz7QusZ5eZjrRY)\n", - " Call ID: call_K8dqQ4AIYjMz7QusZ5eZjrRY\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_7)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "318fce72", - "metadata": {}, - "source": [ - "## Test Case for Notify + Ignore\n", - "\n", - "This test demonstrates the other path for notifications - when users choose to simply acknowledge without responding:\n", - "\n", - "1. We process a company picnic announcement email, which gets classified as \"NOTIFY\"\n", - "2. The user decides this notification needs no response and chooses to ignore it\n", - "3. The workflow ends immediately with no further action\n", - "\n", - "This scenario highlights several key aspects of the HITL system:\n", - "- The initial triage correctly identifies information that's worth seeing but doesn't require action\n", - "- Users can quickly process such notifications with minimal interaction\n", - "- The system respects the user's decision not to act\n", - "- The workflow efficiently ends without wasting time on unnecessary steps\n", - "\n", - "Together with the previous test, this demonstrates the complete notification handling workflow:\n", - "- Some notifications warrant responses (previous test)\n", - "- Others simply need acknowledgment (this test)\n", - "- The user maintains control over which path to take" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "ff23e6e7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "🔔 Classification: NOTIFY - This email contains important information\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'Email Assistant: notify', 'args': {}}\n" - ] - } - ], - "source": [ - "# Notify - Important FYI Email\n", - "email_input_notify = {\n", - " \"to\": \"Team Members \",\n", - " \"author\": \"HR Department \",\n", - " \"subject\": \"Company Picnic Next Month\",\n", - " \"email_thread\": \"Dear Team,\\n\\nWe're planning the annual company picnic for next month. The tentative date is Saturday, June 15th from noon to 4pm at Central Park. There will be food, games, and activities for families.\\n\\nMore details will follow in the coming weeks.\\n\\nRegards,\\nHR Department\"\n", - "}\n", - "\n", - "# Compile the graph with new thread\n", - "checkpointer = MemorySaver()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer)\n", - "thread_id_8 = uuid.uuid4()\n", - "thread_config_8 = {\"configurable\": {\"thread_id\": thread_id_8}}\n", - "\n", - "# Run the graph until the first interrupt - should be classified as \"notify\"\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_notify}, config=thread_config_8):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "8b44290b", - "metadata": {}, - "source": [ - "Now simulate user deciding to ignore the notification.\n", - "\n", - "In this step, we simulate the simplest path for notification handling - acknowledgment without action:\n", - "\n", - "1. We receive the interrupt with the notification about the company picnic\n", - "2. We respond with the \"ignore\" response type\n", - "3. The workflow immediately ends without creating any responses\n", - "4. The message history shows only the notification itself, with no additional processing\n", - "\n", - "This straightforward path is actually critical for workflow efficiency:\n", - "- It allows users to quickly process informational emails\n", - "- It avoids the overhead of unnecessary response generation\n", - "- It recognizes that many notifications simply need to be seen, not answered\n", - "- It respects the user's time by ending the workflow immediately\n", - "\n", - "The complete message history shows how clean this path is - just the notification itself, with no additional messages once the user chooses to ignore it." - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "d17d9526", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user deciding to ignore the notification...\n" - ] - } - ], - "source": [ - "print(\"\\nSimulating user deciding to ignore the notification...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_8):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "0d7269bb", - "metadata": {}, - "source": [ - "Look at the full message history, and see the trace:\n", - "\n", - "https://smith.langchain.com/public/8193f616-244f-471d-8ec6-bd39624a0c88/r\n", - "\n", - "Here, we can see that if we ignore the notification, we just end. " - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "ae5dbaba", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Email to notify user about: \n", - "\n", - "**Subject**: Company Picnic Next Month\n", - "**From**: HR Department \n", - "**To**: Team Members \n", - "\n", - "Dear Team,\n", - "\n", - "We're planning the annual company picnic for next month. The tentative date is Saturday, June 15th from noon to 4pm at Central Park. There will be food, games, and activities for families.\n", - "\n", - "More details will follow in the coming weeks.\n", - "\n", - "Regards,\n", - "HR Department\n", - "\n", - "---\n", - "\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_8)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "afb1be24", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "jupytext": { - "cell_metadata_filter": "-all", - "main_language": "python", - "notebook_metadata_filter": "-all" - }, - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} +{"cells": [{"cell_type": "markdown", "id": "07c57479", "metadata": {}, "source": ["# Test Cases for Human-in-the-Loop\n"]}, {"cell_type": "markdown", "id": "1c77003d", "metadata": {}, "source": ["Let' see this working in practice by importing the email assistant with HITL from our module, `src/email_assistant/email_assistant_hitl.py`. We'll compile the graph with a [checkpointer](https://langchain-ai.github.io/langgraph/concepts/memory/#short-term-memory), which allows us to persist the state of the graph after interruption. We can then resume execution from the same state after the human has responded.\n", "\n", "> **Note:** () to access our project's module structure, which contains reusable prompts and components. The autoreload extensions ensure any changes to these modules are automatically reflected in the notebook without requiring kernel restarts. This allows us to organize our prompts in a dedicated module rather than defining them inline, making them easier to maintain and reuse across the notebooks! You can see all these files in: `src/email_assistant`"]}, {"cell_type": "code", "execution_count": 1, "id": "2a082b81", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["/Users/rlm/Desktop/Code/interrupt_workshop\n"]}], "source": ["%load_ext autoreload\n", "%autoreload 2\n", "\n", "import uuid\n", "from langgraph.checkpoint.memory import MemorySaver\n", "from email_assistant.email_assistant_hitl import overall_workflow"]}, {"cell_type": "markdown", "id": "37bac65b", "metadata": {}, "source": ["## Accept `write_email` and `schedule_meeting`\n", "\n", "This test demonstrates the fundamental HITL approval flow when a user accepts all agent actions:\n", "1. An email about tax planning is received and classified as \"RESPOND\"\n", "2. The agent checks calendar availability for both suggested dates (Tuesday and Thursday)\n", "3. The agent proposes scheduling a meeting on Tuesday at 2:00 PM for 45 minutes\n", "4. The user reviews and ACCEPTS the meeting request without changes\n", "5. The agent drafts a confirmation email to send to the client\n", "6. The user reviews and ACCEPTS the email draft without changes\n", "7. The agent marks the workflow as complete\n", "\n", "This scenario shows how human-in-the-loop works at its most basic level - humans provide oversight while the agent handles the execution."]}, {"cell_type": "code", "execution_count": 36, "id": "5ac5dde7", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-05-06', 'start_time': 14}}\n"]}], "source": ["import uuid\n", "\n", "# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_1 = uuid.uuid4()\n", "thread_config_1 = {\"configurable\": {\"thread_id\": thread_id_1}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_1):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "5454d987", "metadata": {}, "source": ["Accept the `schedule_meeting` tool call\n", "\n", "After the agent proposes scheduling a meeting, it creates an interrupt to seek human approval. In this step, we simulate a user accepting the proposed meeting parameters without changes. This allows the workflow to proceed to the next step (drafting a confirmation email). The interrupt object contains critical information:\n", "\n", "1. The action type (`schedule_meeting`)\n", "2. The proposed meeting parameters (attendees, subject, duration, day, time)\n", "3. Configuration options for what kinds of responses are allowed\n", "\n", "When the user accepts, the tool is executed as proposed without modification."]}, {"cell_type": "code", "execution_count": 37, "id": "643e7974", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user accepting the {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-05-06', 'start_time': 14}} tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out. I am available on Tuesday, May 6th at 2:00 PM for a 45-minute call to discuss tax planning strategies. I have scheduled the meeting accordingly.\\n\\nLooking forward to your suggestions.\\n\\nBest regards,\\nLance'}}\n"]}], "source": ["from langgraph.types import Command\n", "\n", "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "00d8c1a5", "metadata": {}, "source": ["Accept the `write_email` tool call\n", "\n", "With the meeting scheduled, the agent now drafts a confirmation email to the client. The interrupt contains:\n", "\n", "1. The email tool action with recipient, subject, and proposed content\n", "2. The formatted email showing what will be sent\n", "3. The same configuration options for user response types\n", "\n", "When the user accepts, the email is sent as written. After this step, the agent marks the task as complete with the `Done` tool call, and the workflow ends. The complete message history shows all the steps taken:\n", "\n", "1. Initial email processing\n", "2. Calendar availability checks for both days\n", "3. Meeting scheduling with confirmation\n", "4. Email composition and sending\n", "5. Workflow completion"]}, {"cell_type": "code", "execution_count": 13, "id": "656f4336", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user accepting the {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out. I have scheduled a call for us to discuss tax planning strategies on Tuesday, April 22, 2025, at 2:00 PM. The meeting is set for 45 minutes.\\n\\nLooking forward to our discussion.\\n\\nBest regards,\\n\\nLance Martin'}} tool call...\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_h6a55b78txHk7GV7yQa9sQU9)\n", " Call ID: call_h6a55b78txHk7GV7yQa9sQU9\n", " Args:\n", " done: True\n"]}], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "7d228ff8", "metadata": {}, "source": ["Look at the full message history, and see trace:\n", "\n", "https://smith.langchain.com/public/82277f96-3abd-48e0-a4db-413f7572240d/r"]}, {"cell_type": "code", "execution_count": 14, "id": "3b168d80", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Respond to the email: \n", "\n", "**Subject**: Tax season let's schedule call\n", "**From**: Project Manager \n", "**To**: Lance Martin \n", "\n", "Lance,\n", "\n", "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", "\n", "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", "\n", "Regards,\n", "Project Manager\n", "\n", "---\n", "\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " check_calendar_availability (call_NrkaML1VPyeDXFNiQZUvfGIo)\n", " Call ID: call_NrkaML1VPyeDXFNiQZUvfGIo\n", " Args:\n", " day: 2025-04-22\n", " check_calendar_availability (call_cjRWdZCRnPsYsagEGZxASZht)\n", " Call ID: call_cjRWdZCRnPsYsagEGZxASZht\n", " Args:\n", " day: 2025-04-24\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " schedule_meeting (call_lD42qto5T8dxkfzV6yYjWZQL)\n", " Call ID: call_lD42qto5T8dxkfzV6yYjWZQL\n", " Args:\n", " attendees: ['pm@client.com', 'lance@company.com']\n", " subject: Tax Planning Strategies Discussion\n", " duration_minutes: 45\n", " preferred_day: 2025-04-22\n", " start_time: 14\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Meeting 'Tax Planning Strategies Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 45 minutes with 2 attendees\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_ElvioA4LUlyWqCTgwQTI4gs6)\n", " Call ID: call_ElvioA4LUlyWqCTgwQTI4gs6\n", " Args:\n", " to: pm@client.com\n", " subject: Re: Tax season let's schedule call\n", " content: Hello,\n", "\n", "Thank you for reaching out. I have scheduled a call for us to discuss tax planning strategies on Tuesday, April 22, 2025, at 2:00 PM. The meeting is set for 45 minutes.\n", "\n", "Looking forward to our discussion.\n", "\n", "Best regards,\n", "\n", "Lance Martin\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Email sent to pm@client.com with subject 'Re: Tax season let's schedule call' and content: Hello,\n", "\n", "Thank you for reaching out. I have scheduled a call for us to discuss tax planning strategies on Tuesday, April 22, 2025, at 2:00 PM. The meeting is set for 45 minutes.\n", "\n", "Looking forward to our discussion.\n", "\n", "Best regards,\n", "\n", "Lance Martin\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_h6a55b78txHk7GV7yQa9sQU9)\n", " Call ID: call_h6a55b78txHk7GV7yQa9sQU9\n", " Args:\n", " done: True\n"]}], "source": ["state = graph.get_state(thread_config_1)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "86b1ba30", "metadata": {}, "source": ["## Edit `write_email` and `schedule_meeting`\n", "\n", "This test demonstrates how human modification works in the HITL flow:\n", "1. We start with the same tax planning email as before\n", "2. The agent proposes a meeting with the same parameters\n", "3. This time, the user EDITS the meeting proposal to change:\n", " - Duration from 45 to 30 minutes\n", " - Meeting subject is made more concise\n", "4. The agent adapts to these changes when drafting the email\n", "5. The user further EDITS the email to be shorter and less formal\n", "6. The workflow completes with both modifications incorporated\n", "\n", "This scenario showcases one of the most powerful aspects of HITL: users can make precise modifications to agent actions before they are executed, ensuring the final outcome matches their preferences without having to handle all the details themselves."]}, {"cell_type": "code", "execution_count": 14, "id": "3bfca1b4", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-05-06', 'start_time': 14}}\n"]}], "source": ["# Same email as before\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph with new thread\n", "checkpointer = MemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_2 = uuid.uuid4()\n", "thread_config_2 = {\"configurable\": {\"thread_id\": thread_id_2}}\n", "\n", "# Run the graph until the first interrupt - will be classified as \"respond\" and the agent will create a write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_2):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "706ac0a6", "metadata": {}, "source": ["Edit the `schedule_meeting` tool call\n", "\n", "When the agent proposes the initial meeting schedule, we now simulate the user making modifications through the edit functionality. This demonstrates how the `edit` response type works:\n", "\n", "1. The user receives the same meeting proposal as in the previous test\n", "2. Instead of accepting, they modify the parameters:\n", " - Reducing duration from 45 to 30 minutes\n", " - Keeping the same day and time\n", "3. The `edit` response includes the complete set of modified arguments\n", "4. The interrupt handler replaces the original tool arguments with these edited ones\n", "5. The tool is executed with the user's modifications\n", "\n", "This shows how edit capability gives users precise control over agent actions while still letting the agent handle the execution details."]}, {"cell_type": "code", "execution_count": 15, "id": "7175fedb", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user editing the schedule_meeting tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out regarding tax planning strategies. I am available on Tuesday, May 6th at 2:00 PM, and have scheduled a 30-minute call for us to discuss your suggestions. If you feel we need additional time, please let me know and I can adjust the meeting duration accordingly.\\n\\nLooking forward to our discussion.\\n\\nBest regards,\\nLance'}}\n"]}], "source": ["# Now simulate user editing the schedule_meeting tool call\n", "print(\"\\nSimulating user editing the schedule_meeting tool call...\")\n", "edited_schedule_args = {\n", " \"attendees\": [\"pm@client.com\", \"lance@company.com\"],\n", " \"subject\": \"Tax Planning Discussion\",\n", " \"duration_minutes\": 30, # Changed from 45 to 30\n", " \"preferred_day\": \"2025-05-06\",\n", " \"start_time\": 14 \n", "}\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_schedule_args}}]), config=thread_config_2):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "9757706b", "metadata": {}, "source": ["Edit the `write_email` tool call\n", "\n", "After accepting the modified meeting schedule, the agent drafts an email reflecting the 30-minute duration. Now we demonstrate how editing works with email content:\n", "\n", "1. The agent has adapted its email to mention the shorter 30-minute duration\n", "2. We simulate the user wanting an even more significant change to the email:\n", " - Completely rewriting the content to be shorter and less formal\n", " - Changing the meeting day mentioned in the email (showing how users can correct agent mistakes)\n", " - Requesting confirmation rather than stating the meeting as definite\n", "3. The `edit` response contains the complete new email content\n", "4. The tool arguments are updated with this edited content\n", "5. The email is sent with the user's preferred wording\n", "\n", "This example shows the power of HITL for complex communication tasks - the agent handles the structure and initial content, while humans can refine tone, style, and substance."]}, {"cell_type": "code", "execution_count": 16, "id": "a0604d53", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user editing the write_email tool call...\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_PVtmxQJgG0wdFh299b70ab1z)\n", " Call ID: call_PVtmxQJgG0wdFh299b70ab1z\n", " Args:\n", " done: True\n"]}], "source": ["# Now simulate user editing the write_email tool call\n", "print(\"\\nSimulating user editing the write_email tool call...\")\n", "edited_email_args = {\n", " \"to\": \"pm@client.com\",\n", " \"subject\": \"Re: Tax season let's schedule call\",\n", " \"content\": \"Hello Project Manager,\\n\\nThank you for reaching out about tax planning. I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\\n\\nBest regards,\\nLance Martin\"\n", "}\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_email_args}}]), config=thread_config_2):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "ac279101", "metadata": {}, "source": ["Look at the full message history, and see trace, to view the edited tool calls:\n", "\n", "https://smith.langchain.com/public/21769510-d57a-41e4-b5c7-0ddb23c237d8/r"]}, {"cell_type": "code", "execution_count": 17, "id": "6d3e9be7", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Respond to the email: \n", "\n", "**Subject**: Tax season let's schedule call\n", "**From**: Project Manager \n", "**To**: Lance Martin \n", "\n", "Lance,\n", "\n", "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", "\n", "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", "\n", "Regards,\n", "Project Manager\n", "\n", "---\n", "\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " check_calendar_availability (call_hKFyYkfY3fhQqflCAh2dm69E)\n", " Call ID: call_hKFyYkfY3fhQqflCAh2dm69E\n", " Args:\n", " day: 2025-05-06\n", " check_calendar_availability (call_xdLGVBhQ3ki1vOSzReNxCZYk)\n", " Call ID: call_xdLGVBhQ3ki1vOSzReNxCZYk\n", " Args:\n", " day: 2025-05-08\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-05-06: 9:00 AM, 2:00 PM, 4:00 PM\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-05-08: 9:00 AM, 2:00 PM, 4:00 PM\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " schedule_meeting (call_jJoVei2IxgVbQI7cXrEPJe8p)\n", " Call ID: call_jJoVei2IxgVbQI7cXrEPJe8p\n", " Args:\n", " attendees: ['pm@client.com', 'lance@company.com']\n", " subject: Tax Planning Discussion\n", " duration_minutes: 30\n", " preferred_day: 2025-05-06\n", " start_time: 14\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Meeting 'Tax Planning Discussion' scheduled on Tuesday, May 06, 2025 at 14 for 30 minutes with 2 attendees\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_MTSKFqw6OfmpZU6bMt8nCHwl)\n", " Call ID: call_MTSKFqw6OfmpZU6bMt8nCHwl\n", " Args:\n", " to: pm@client.com\n", " subject: Re: Tax season let's schedule call\n", " content: Hello Project Manager,\n", "\n", "Thank you for reaching out about tax planning. I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\n", "\n", "Best regards,\n", "Lance Martin\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Email sent to pm@client.com with subject 'Re: Tax season let's schedule call' and content: Hello Project Manager,\n", "\n", "Thank you for reaching out about tax planning. I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\n", "\n", "Best regards,\n", "Lance Martin\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_PVtmxQJgG0wdFh299b70ab1z)\n", " Call ID: call_PVtmxQJgG0wdFh299b70ab1z\n", " Args:\n", " done: True\n"]}], "source": ["state = graph.get_state(thread_config_2)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "5281cb1f", "metadata": {}, "source": ["## Ignore `write_email`, `schedule_meeting`, and `question`\n", "\n", "This test set demonstrates the \"ignore\" capability of the HITL system, showing how users can reject agent actions entirely:\n", "\n", "1. First, we test ignoring a `schedule_meeting` request:\n", " - When the agent proposes scheduling a meeting, the user rejects it completely\n", " - The workflow ends immediately without scheduling anything\n", " \n", "2. Second, we test accepting a meeting but ignoring the follow-up email:\n", " - The user accepts the meeting schedule\n", " - But when the agent drafts a confirmation email, the user ignores it\n", " - The meeting is still scheduled, but no email is sent\n", " - The workflow ends after the rejection\n", "\n", "3. Third, we test ignoring a `question` tool call:\n", " - For a different email about brunch plans\n", " - The agent asks a clarifying question\n", " - The user ignores the question\n", " - The workflow ends without further action\n", "\n", "The \"ignore\" capability is crucial for HITL systems as it allows users to stop potentially unwanted actions before they occur, providing an important safety mechanism and ensuring users maintain full control."]}, {"cell_type": "code", "execution_count": 20, "id": "d60baeed", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n"]}], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_3 = uuid.uuid4()\n", "thread_config_3 = {\"configurable\": {\"thread_id\": thread_id_3}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_3):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "2442cc81", "metadata": {}, "source": ["Ignore the `schedule_meeting` tool call\n", "\n", "In this test, we demonstrate rejection of the meeting scheduling proposal:\n", "\n", "1. The agent suggests a 45-minute meeting on Tuesday at 2:00 PM\n", "2. We simulate the user selecting \"ignore\" in the Agent Inbox interface\n", "3. The `interrupt_handler` processes this rejection with special logic:\n", " - It adds a message explaining the user's choice to ignore\n", " - It returns a command to end the workflow\n", " - No meeting is scheduled and no email is sent\n", "\n", "This capability is crucial for several reasons:\n", "- Prevents incorrect actions from being executed\n", "- Gives users veto power over any agent decision\n", "- Provides a clear exit path when the agent's suggestion isn't appropriate\n", "\n", "The trace shows how the workflow ends immediately after the ignore action."]}, {"cell_type": "code", "execution_count": 21, "id": "f2e86ca6", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user ignoring the {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}} tool call...\n"]}], "source": ["print(f\"\\nSimulating user ignoring the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_3):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "f5ccb8e0", "metadata": {}, "source": ["As you can see from the trace, we end:\n", "\n", "https://smith.langchain.com/public/4e322b99-08ea-4d23-9653-475415ff3e33/r\n", "\n", "Now, let's run again, but ignore the `write_email` tool call:"]}, {"cell_type": "code", "execution_count": 22, "id": "15e33156", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", "\n", "Simulating user accepting the schedule_meeting tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hi,\\n\\nThank you for reaching out. I have scheduled our call to discuss tax planning strategies for Tuesday, April 22, 2025, at 2:00 PM. We will have 45 minutes to go over your suggestions.\\n\\nLooking forward to our discussion.\\n\\nBest regards,\\n\\nLance Martin'}}\n", "\n", "Simulating user ignoring the write_email tool call...\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "User ignored this email draft. Ignore this email and end the workflow.\n"]}], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_3 = uuid.uuid4()\n", "thread_config_3 = {\"configurable\": {\"thread_id\": thread_id_3}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_3):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_3):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "print(f\"\\nSimulating user ignoring the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_3):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "1acea519", "metadata": {}, "source": ["Again, we end:\n", "\n", "https://smith.langchain.com/public/819be555-4919-4d14-bdd9-eb6f73a3bafe/r"]}, {"cell_type": "code", "execution_count": 23, "id": "eb55b5f1", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Respond to the email: \n", "\n", "**Subject**: Tax season let's schedule call\n", "**From**: Project Manager \n", "**To**: Lance Martin \n", "\n", "Lance,\n", "\n", "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", "\n", "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", "\n", "Regards,\n", "Project Manager\n", "\n", "---\n", "\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " check_calendar_availability (call_JpQevSvNkmodiACKRfONrqan)\n", " Call ID: call_JpQevSvNkmodiACKRfONrqan\n", " Args:\n", " day: 2025-04-22\n", " check_calendar_availability (call_3gJfzFvJkgY9lRVTpUF1D9Zt)\n", " Call ID: call_3gJfzFvJkgY9lRVTpUF1D9Zt\n", " Args:\n", " day: 2025-04-24\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " schedule_meeting (call_FB8EtqpH1UZHfvZzerDzq16R)\n", " Call ID: call_FB8EtqpH1UZHfvZzerDzq16R\n", " Args:\n", " attendees: ['pm@client.com', 'lance@company.com']\n", " subject: Tax Planning Strategies Discussion\n", " duration_minutes: 45\n", " preferred_day: 2025-04-22\n", " start_time: 14\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Meeting 'Tax Planning Strategies Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 45 minutes with 2 attendees\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_GlntMuOdVnAjNq9R0tXmCkbV)\n", " Call ID: call_GlntMuOdVnAjNq9R0tXmCkbV\n", " Args:\n", " to: pm@client.com\n", " subject: Re: Tax season let's schedule call\n", " content: Hi,\n", "\n", "Thank you for reaching out. I have scheduled our call to discuss tax planning strategies for Tuesday, April 22, 2025, at 2:00 PM. We will have 45 minutes to go over your suggestions.\n", "\n", "Looking forward to our discussion.\n", "\n", "Best regards,\n", "\n", "Lance Martin\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "User ignored this email draft. Ignore this email and end the workflow.\n"]}], "source": ["state = graph.get_state(thread_config_3)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "7200cdaf", "metadata": {}, "source": ["Now let's try an email that calls the `Question` tool\n", "\n", "The `Question` tool represents another important HITL interaction pattern - the agent asking for additional information rather than taking immediate action. This test shows:\n", "\n", "1. A different email scenario about a potential brunch invitation\n", "2. The agent doesn't have enough information to respond definitively\n", "3. Instead of guessing, it uses the `Question` tool to ask for clarification\n", "4. We'll simulate the user ignoring this question\n", "\n", "This demonstrates how the HITL system gracefully handles requests for information, and what happens when users choose not to engage with these requests."]}, {"cell_type": "code", "execution_count": 24, "id": "bdea633a", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'Question', 'args': {'content': 'What day and time are you considering for the brunch in 3 weeks?'}}\n"]}], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Partner \",\n", " \"subject\": \"Meet Jim and Lisa for brunch in 3 weeks?\",\n", " \"email_thread\": \"Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_4 = uuid.uuid4()\n", "thread_config_4 = {\"configurable\": {\"thread_id\": thread_id_4}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_4):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "9aaca6d4", "metadata": {}, "source": ["Ignore the `question` tool call\n", "\n", "When the agent asks for clarification about the brunch plans, we simulate a user ignoring the question:\n", "\n", "1. The agent has asked about preferred day and time for the brunch\n", "2. We provide an \"ignore\" response to this question\n", "3. The system processes this as a decision to abandon handling this email:\n", " - A message is added stating \"User ignored this question. Ignore this email and end the workflow.\"\n", " - The workflow ends without sending any response\n", "\n", "This pattern is important because it allows users to:\n", "- Decide that some emails don't actually need responses\n", "- Avoid providing information they're not ready to share\n", "- Defer decision-making to a later time\n", "- Take over email handling manually if they prefer\n", "\n", "The trace and message history show how cleanly the workflow ends after ignoring the question."]}, {"cell_type": "code", "execution_count": 25, "id": "d4e5d62b", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user ignoring the {'action': 'Question', 'args': {'content': 'What day and time are you considering for the brunch in 3 weeks?'}} tool call...\n"]}], "source": ["print(f\"\\nSimulating user ignoring the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_4):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "90c7ade4", "metadata": {}, "source": ["As before, we end:\n", "\n", "https://smith.langchain.com/public/276c4016-2b4c-43f5-a677-834a5eaa47c0/r"]}, {"cell_type": "code", "execution_count": 26, "id": "ef334860", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Respond to the email: \n", "\n", "**Subject**: Meet Jim and Lisa for brunch in 3 weeks?\n", "**From**: Partner \n", "**To**: Lance Martin \n", "\n", "Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\n", "\n", "---\n", "\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Question (call_l6LC1srm8qt8CJCihDTz7wAW)\n", " Call ID: call_l6LC1srm8qt8CJCihDTz7wAW\n", " Args:\n", " content: What day and time are you considering for the brunch in 3 weeks?\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "User ignored this question. Ignore this email and end the workflow.\n"]}], "source": ["state = graph.get_state(thread_config_4)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "da4c39e9", "metadata": {}, "source": ["## Respond (with feedback) `write_email`, `schedule_meeting`, and `question`\n", "\n", "This test set demonstrates the \"response\" capability - providing feedback without editing or accepting:\n", "\n", "1. First, we test feedback for meeting scheduling:\n", " - The user provides specific preferences (30 minutes instead of 45, and afternoon meetings)\n", " - The agent incorporates this feedback into a revised proposal\n", " - The user then accepts the revised meeting schedule\n", "\n", "2. Second, we test feedback for email drafting:\n", " - The user requests a shorter, less formal email with a specific closing statement\n", " - The agent completely rewrites the email according to this guidance\n", " - The user accepts the new draft\n", "\n", "3. Lastly, we test feedback for questions:\n", " - For the brunch invitation, the user answers the question with additional context\n", " - The agent uses this information to draft an appropriate email response\n", " - The workflow proceeds with the user's input integrated\n", "\n", "The \"response\" capability bridges the gap between acceptance and editing - users can guide the agent without having to write the full content themselves. This is especially powerful for:\n", "- Adjusting tone and style\n", "- Adding context the agent missed\n", "- Redirecting the agent's approach\n", "- Answering questions in a way that shapes the next steps"]}, {"cell_type": "code", "execution_count": 27, "id": "3c4b3517", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n"]}], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_5 = uuid.uuid4()\n", "thread_config_5 = {\"configurable\": {\"thread_id\": thread_id_5}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "ae2bea0a", "metadata": {}, "source": ["Provide feedback for the `schedule_meeting` tool call\n", "\n", "Now we explore the feedback capability for meeting scheduling:\n", "\n", "1. The agent proposes the standard 45-minute meeting on Tuesday at 2:00 PM\n", "2. Instead of accepting or editing, we provide feedback in natural language\n", "3. Our feedback specifies two preferences:\n", " - Shorter meeting (30 minutes instead of 45)\n", " - Preference for afternoon meetings (after 2pm)\n", "4. The agent receives this feedback through the `response` type\n", "5. The interrupt handler adds this feedback as a message to the state\n", "6. The agent processes this feedback and generates a new tool call incorporating these preferences\n", "\n", "Unlike direct editing, which requires specifying the entire set of parameters, feedback allows users to express their preferences conversationally. The agent must then interpret this feedback and apply it appropriately to create a revised proposal."]}, {"cell_type": "code", "execution_count": 28, "id": "9a916e10", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user providing feedback for the schedule_meeting tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 30, 'preferred_day': '2025-04-22', 'start_time': 14}}\n"]}], "source": ["print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Please schedule this for 30 minutes instead of 45 minutes, and I prefer afternoon meetings after 2pm.\"}]), config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "cf35f1a2", "metadata": {}, "source": ["Accept the `schedule_meeting` tool call after providing feedback"]}, {"cell_type": "code", "execution_count": 29, "id": "2727fb0e", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user accepting the {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 30, 'preferred_day': '2025-04-22', 'start_time': 14}} tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': \"Hello,\\n\\nThank you for reaching out. I've scheduled a call for us to discuss tax planning strategies on Tuesday, April 22, 2025, at 2:00 PM. The meeting is set for 30 minutes.\\n\\nLooking forward to our discussion.\\n\\nBest regards,\\n\\nLance Martin\"}}\n"]}], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "3ca470c5", "metadata": {}, "source": ["Now provide feedback for the `write_email` tool call\n", "\n", "After accepting the revised meeting schedule, the agent drafts an email. We now test feedback for email content:\n", "\n", "1. The agent's email is relatively formal and detailed\n", "2. We provide stylistic feedback requesting:\n", " - A shorter, more concise email\n", " - A less formal tone\n", " - A specific closing statement about looking forward to the meeting\n", "3. The agent processes this feedback to completely rewrite the email\n", "4. The new draft is much shorter, more casual, and includes the requested closing\n", "\n", "This demonstrates the power of natural language feedback for content creation:\n", "- Users don't need to rewrite the entire email themselves\n", "- They can provide high-level guidance on style, tone, and content\n", "- The agent handles the actual writing based on this guidance\n", "- The result better matches user preferences while preserving the essential information\n", "\n", "The message history shows both the original and revised emails, clearly showing how the feedback was incorporated."]}, {"cell_type": "code", "execution_count": 30, "id": "f5221d87", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user providing feedback for the write_email tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': \"Hi,\\n\\nI've set up our call for Tuesday, April 22, at 2:00 PM for 30 minutes. Looking forward to it!\\n\\nBest,\\nLance\"}}\n"]}], "source": ["print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Shorter and less formal. Include a closing statement about looking forward to the meeting!\"}]), config=thread_config_5):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "1266ec72", "metadata": {}, "source": ["Accept the `write_email` tool call after providing feedback"]}, {"cell_type": "code", "execution_count": 31, "id": "0b4698c0", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user accepting the {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': \"Hi,\\n\\nI've set up our call for Tuesday, April 22, at 2:00 PM for 30 minutes. Looking forward to it!\\n\\nBest,\\nLance\"}} tool call...\n"]}], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "c270f52a", "metadata": {}, "source": ["Look at the full message history, and see the trace:\n", "\n", "https://smith.langchain.com/public/57006770-6bb3-4e40-b990-143c373ebe60/r\n", "\n", "We can see that user feedback in incorporated into the tool calls. "]}, {"cell_type": "code", "execution_count": 32, "id": "1daf10d6", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Respond to the email: \n", "\n", "**Subject**: Tax season let's schedule call\n", "**From**: Project Manager \n", "**To**: Lance Martin \n", "\n", "Lance,\n", "\n", "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", "\n", "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", "\n", "Regards,\n", "Project Manager\n", "\n", "---\n", "\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " check_calendar_availability (call_5Cs51CxeKicG85x0xeeZgRxg)\n", " Call ID: call_5Cs51CxeKicG85x0xeeZgRxg\n", " Args:\n", " day: 2025-04-22\n", " check_calendar_availability (call_c7m8uYJaVH9x6l5zJdUj3HMk)\n", " Call ID: call_c7m8uYJaVH9x6l5zJdUj3HMk\n", " Args:\n", " day: 2025-04-24\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " schedule_meeting (call_2HHQvYV5EnkOzGfGITEnE772)\n", " Call ID: call_2HHQvYV5EnkOzGfGITEnE772\n", " Args:\n", " attendees: ['pm@client.com', 'lance@company.com']\n", " subject: Tax Planning Strategies Discussion\n", " duration_minutes: 45\n", " preferred_day: 2025-04-22\n", " start_time: 14\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "User gave feedback, which can we incorporate into the meeting request. Feedback: Please schedule this for 30 minutes instead of 45 minutes, and I prefer afternoon meetings after 2pm.\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " schedule_meeting (call_YbywvAlWYqWrbvH7zZqu6yUi)\n", " Call ID: call_YbywvAlWYqWrbvH7zZqu6yUi\n", " Args:\n", " attendees: ['pm@client.com', 'lance@company.com']\n", " subject: Tax Planning Strategies Discussion\n", " duration_minutes: 30\n", " preferred_day: 2025-04-22\n", " start_time: 14\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Meeting 'Tax Planning Strategies Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 30 minutes with 2 attendees\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_MYaMobBPrGW9ZcC8DXI55rFd)\n", " Call ID: call_MYaMobBPrGW9ZcC8DXI55rFd\n", " Args:\n", " to: pm@client.com\n", " subject: Re: Tax season let's schedule call\n", " content: Hello,\n", "\n", "Thank you for reaching out. I've scheduled a call for us to discuss tax planning strategies on Tuesday, April 22, 2025, at 2:00 PM. The meeting is set for 30 minutes.\n", "\n", "Looking forward to our discussion.\n", "\n", "Best regards,\n", "\n", "Lance Martin\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "User gave feedback, which can we incorporate into the email. Feedback: Shorter and less formal. Include a closing statement about looking forward to the meeting!\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_xVUwWSfechpEhekihDUmw0fE)\n", " Call ID: call_xVUwWSfechpEhekihDUmw0fE\n", " Args:\n", " to: pm@client.com\n", " subject: Re: Tax season let's schedule call\n", " content: Hi,\n", "\n", "I've set up our call for Tuesday, April 22, at 2:00 PM for 30 minutes. Looking forward to it!\n", "\n", "Best,\n", "Lance\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Email sent to pm@client.com with subject 'Re: Tax season let's schedule call' and content: Hi,\n", "\n", "I've set up our call for Tuesday, April 22, at 2:00 PM for 30 minutes. Looking forward to it!\n", "\n", "Best,\n", "Lance\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_Y1oqvAcB4yYoNNY15Wxnroh7)\n", " Call ID: call_Y1oqvAcB4yYoNNY15Wxnroh7\n", " Args:\n", " done: True\n"]}], "source": ["state = graph.get_state(thread_config_5)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "1d964e36", "metadata": {}, "source": ["Now let's try an email that calls the `Question` tool to provide feedback\n", "\n", "Finally, we test how feedback works with the `Question` tool:\n", "\n", "1. For the brunch invitation email, the agent asks about preferred day and time\n", "2. Instead of ignoring, we provide a substantive response with additional context:\n", " - Confirming we want to invite the people mentioned\n", " - Noting we need to check which weekend works best\n", " - Adding information about needing a reservation\n", "3. The agent uses this information to:\n", " - Draft a comprehensive email response incorporating all our feedback\n", " - Notice we didn't provide a specific day/time, so it suggests checking the calendar\n", " - Include the detail about making a reservation\n", "4. The complete email reflects both the original request and our additional guidance\n", "\n", "This demonstrates how question responses can shape the entire workflow:\n", "- Questions let the agent gather missing information\n", "- User responses can include both direct answers and additional context\n", "- The agent integrates all this information into its next actions\n", "- The final outcome reflects the collaborative intelligence of both human and AI"]}, {"cell_type": "code", "execution_count": 33, "id": "8827632a", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'Question', 'args': {'content': 'What day and time are you considering for the brunch in 3 weeks? I can check my calendar for availability.'}}\n"]}], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Partner \",\n", " \"subject\": \"Meet Jim and Lisa for brunch in 3 weeks?\",\n", " \"email_thread\": \"Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_6 = uuid.uuid4()\n", "thread_config_6 = {\"configurable\": {\"thread_id\": thread_id_6}}\n", "\n", "# Run the graph until the first interrupt\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_6):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "1d9f7f1b", "metadata": {}, "source": ["Provide feedback for the `Question` tool call"]}, {"cell_type": "code", "execution_count": 34, "id": "4979effd", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user providing feedback for the Question tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'partner@home.com', 'subject': 'Re: Meet Jim and Lisa for brunch in 3 weeks?', 'content': \"Hi,\\n\\nYes, let's definitely invite Jim and Lisa for brunch. I'll check my calendar to confirm which weekend works best for us. Also, since the new place on 17th is quite popular, we'll need to make a reservation in advance.\\n\\nI'll get back to you shortly with the best date.\\n\\nBest,\\nLance\"}}\n"]}], "source": ["print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Yes, let's invite them, but let me confirm which weekend works best. Also mention that we'll need to make a reservation since that place is popular.\"}]), config=thread_config_6):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "76b4ba9b", "metadata": {}, "source": ["Accept the `write_email` tool call"]}, {"cell_type": "code", "execution_count": 35, "id": "bfd34ec2", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user accepting the write_email tool call...\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_TWXVmN0lmZpypDo7TZwEEWuw)\n", " Call ID: call_TWXVmN0lmZpypDo7TZwEEWuw\n", " Args:\n", " done: True\n"]}], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_6):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "e214fe9e", "metadata": {}, "source": ["Look at the full message history, and see the trace:\n", "\n", "https://smith.langchain.com/public/f4c727c3-b1d9-47a5-b3d0-3451619db8a2/r\n", "\n", "We can see that user feedback in incorporated into the email response."]}, {"cell_type": "code", "execution_count": 36, "id": "070393eb", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Respond to the email: \n", "\n", "**Subject**: Meet Jim and Lisa for brunch in 3 weeks?\n", "**From**: Partner \n", "**To**: Lance Martin \n", "\n", "Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\n", "\n", "---\n", "\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Question (call_ebOvuGZuyzNxziFBrYtXifeJ)\n", " Call ID: call_ebOvuGZuyzNxziFBrYtXifeJ\n", " Args:\n", " content: What day and time are you considering for the brunch in 3 weeks? I can check my calendar for availability.\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "User answered the question, which can we can use for any follow up actions. Feedback: Yes, let's invite them, but let me confirm which weekend works best. Also mention that we'll need to make a reservation since that place is popular.\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_SWBLQ1lAEioAMc2KdMft7AhX)\n", " Call ID: call_SWBLQ1lAEioAMc2KdMft7AhX\n", " Args:\n", " to: partner@home.com\n", " subject: Re: Meet Jim and Lisa for brunch in 3 weeks?\n", " content: Hi,\n", "\n", "Yes, let's definitely invite Jim and Lisa for brunch. I'll check my calendar to confirm which weekend works best for us. Also, since the new place on 17th is quite popular, we'll need to make a reservation in advance.\n", "\n", "I'll get back to you shortly with the best date.\n", "\n", "Best,\n", "Lance\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Email sent to partner@home.com with subject 'Re: Meet Jim and Lisa for brunch in 3 weeks?' and content: Hi,\n", "\n", "Yes, let's definitely invite Jim and Lisa for brunch. I'll check my calendar to confirm which weekend works best for us. Also, since the new place on 17th is quite popular, we'll need to make a reservation in advance.\n", "\n", "I'll get back to you shortly with the best date.\n", "\n", "Best,\n", "Lance\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_TWXVmN0lmZpypDo7TZwEEWuw)\n", " Call ID: call_TWXVmN0lmZpypDo7TZwEEWuw\n", " Args:\n", " done: True\n"]}], "source": ["state = graph.get_state(thread_config_6)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "8036348c", "metadata": {}, "source": ["## Test Case for Notify Classification\n", "\n", "This test demonstrates how the system handles emails classified as \"NOTIFY\" and how users can respond to notifications:\n", "\n", "1. The triage system classifies important informational emails as \"NOTIFY\" when:\n", " - They contain important information (like security updates)\n", " - They don't require immediate action\n", " - They should be brought to the user's attention\n", " \n", "2. For notify classifications:\n", " - The workflow routes to the `triage_interrupt_handler`\n", " - The user sees the email content with options to ignore or respond\n", " - No default action is suggested\n", "\n", "3. In this test, we:\n", " - Process an IT security update email that gets classified as \"NOTIFY\"\n", " - Simulate the user deciding to respond with specific feedback\n", " - See how the agent drafts an appropriate response based on this feedback\n", " - Approve the response to be sent\n", "\n", "This showcases how the HITL system can transform a passive notification into an active response when the user decides one is warranted, bridging the gap between the initial \"NOTIFY\" classification and a full response."]}, {"cell_type": "code", "execution_count": 37, "id": "930e86cf", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udd14 Classification: NOTIFY - This email contains important information\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'Email Assistant: notify', 'args': {}}\n"]}], "source": ["# Notify - Important FYI Email\n", "email_input_notify = {\n", " \"to\": \"Team Members \",\n", " \"author\": \"IT Department \",\n", " \"subject\": \"Critical Security Update\",\n", " \"email_thread\": \"Dear Team,\\n\\nThis is an important security notification. We will be updating our authentication system this weekend. During the update window (Saturday 2am-4am), you will not be able to access company resources.\\n\\nPlease ensure you log out of all systems before the maintenance window.\\n\\nRegards,\\nIT Department\"\n", "}\n", "\n", "# Compile the graph with new thread\n", "checkpointer = MemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_7 = uuid.uuid4()\n", "thread_config_7 = {\"configurable\": {\"thread_id\": thread_id_7}}\n", "\n", "# Run the graph until the first interrupt - should be classified as \"notify\"\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_notify}, config=thread_config_7):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "c3d062bb", "metadata": {}, "source": ["Now simulate user deciding to respond with feedback.\n", "\n", "Although the email was classified as \"NOTIFY\" (meaning it normally wouldn't require a response), the HITL system gives users the flexibility to override this classification. In this step:\n", "\n", "1. We provide feedback indicating we want to acknowledge receipt of the security notice\n", "2. The `triage_interrupt_handler` processes this feedback:\n", " - It adds the user's guidance as a message to the state\n", " - It routes to the `response_agent` node instead of ending\n", "3. The response agent uses this guidance to draft an appropriate acknowledgment email\n", "4. An interrupt is created for the user to review this draft before sending\n", "\n", "This demonstrates an important capability: the ability for users to override the initial classification when they feel a response is warranted. This ensures that the system remains flexible and adaptable to user preferences, while still providing useful initial triage."]}, {"cell_type": "code", "execution_count": 38, "id": "6bd027a4", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user deciding to respond with feedback...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'it@company.com', 'subject': 'Re: Critical Security Update', 'content': 'Dear IT Department,\\n\\nThank you for the important security update notification. I acknowledge receipt of this notice and confirm that I will ensure to log out of all systems before the maintenance window this Saturday from 2am to 4am.\\n\\nRegards,\\nLance'}}\n"]}], "source": ["print(\"\\nSimulating user deciding to respond with feedback...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"We should acknowledge receipt of this important notice and confirm that we'll be logged out before the maintenance window.\"}]), config=thread_config_7):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "e84de751", "metadata": {}, "source": ["Accept the `write_email` tool call after feedback."]}, {"cell_type": "code", "execution_count": 39, "id": "073642cd", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user accepting the {'action': 'write_email', 'args': {'to': 'it@company.com', 'subject': 'Re: Critical Security Update', 'content': 'Dear IT Department,\\n\\nThank you for the important security update notification. I acknowledge receipt of this notice and confirm that I will ensure to log out of all systems before the maintenance window this Saturday from 2am to 4am.\\n\\nRegards,\\nLance'}} tool call...\n"]}], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_7):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "b270fa6c", "metadata": {}, "source": ["Look at the full message history, and see the trace:\n", "\n", "https://smith.langchain.com/public/6594f98f-eb83-4560-9c34-28ec22ada3dc/r\n", "\n", "We can see that user feedback causes agent to go reply to the email."]}, {"cell_type": "code", "execution_count": 40, "id": "a1bbc9b7", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Email to notify user about: \n", "\n", "**Subject**: Critical Security Update\n", "**From**: IT Department \n", "**To**: Team Members \n", "\n", "Dear Team,\n", "\n", "This is an important security notification. We will be updating our authentication system this weekend. During the update window (Saturday 2am-4am), you will not be able to access company resources.\n", "\n", "Please ensure you log out of all systems before the maintenance window.\n", "\n", "Regards,\n", "IT Department\n", "\n", "---\n", "\n", "================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "User wants to reply to the email. Use this feedback to respond: We should acknowledge receipt of this important notice and confirm that we'll be logged out before the maintenance window.\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_9fi3k78DE98MFtey5aYSaql7)\n", " Call ID: call_9fi3k78DE98MFtey5aYSaql7\n", " Args:\n", " to: it@company.com\n", " subject: Re: Critical Security Update\n", " content: Dear IT Department,\n", "\n", "Thank you for the important security update notification. I acknowledge receipt of this notice and confirm that I will ensure to log out of all systems before the maintenance window this Saturday from 2am to 4am.\n", "\n", "Regards,\n", "Lance\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Email sent to it@company.com with subject 'Re: Critical Security Update' and content: Dear IT Department,\n", "\n", "Thank you for the important security update notification. I acknowledge receipt of this notice and confirm that I will ensure to log out of all systems before the maintenance window this Saturday from 2am to 4am.\n", "\n", "Regards,\n", "Lance\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_K8dqQ4AIYjMz7QusZ5eZjrRY)\n", " Call ID: call_K8dqQ4AIYjMz7QusZ5eZjrRY\n", " Args:\n", " done: True\n"]}], "source": ["state = graph.get_state(thread_config_7)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "318fce72", "metadata": {}, "source": ["## Test Case for Notify + Ignore\n", "\n", "This test demonstrates the other path for notifications - when users choose to simply acknowledge without responding:\n", "\n", "1. We process a company picnic announcement email, which gets classified as \"NOTIFY\"\n", "2. The user decides this notification needs no response and chooses to ignore it\n", "3. The workflow ends immediately with no further action\n", "\n", "This scenario highlights several key aspects of the HITL system:\n", "- The initial triage correctly identifies information that's worth seeing but doesn't require action\n", "- Users can quickly process such notifications with minimal interaction\n", "- The system respects the user's decision not to act\n", "- The workflow efficiently ends without wasting time on unnecessary steps\n", "\n", "Together with the previous test, this demonstrates the complete notification handling workflow:\n", "- Some notifications warrant responses (previous test)\n", "- Others simply need acknowledgment (this test)\n", "- The user maintains control over which path to take"]}, {"cell_type": "code", "execution_count": 41, "id": "ff23e6e7", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udd14 Classification: NOTIFY - This email contains important information\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'Email Assistant: notify', 'args': {}}\n"]}], "source": ["# Notify - Important FYI Email\n", "email_input_notify = {\n", " \"to\": \"Team Members \",\n", " \"author\": \"HR Department \",\n", " \"subject\": \"Company Picnic Next Month\",\n", " \"email_thread\": \"Dear Team,\\n\\nWe're planning the annual company picnic for next month. The tentative date is Saturday, June 15th from noon to 4pm at Central Park. There will be food, games, and activities for families.\\n\\nMore details will follow in the coming weeks.\\n\\nRegards,\\nHR Department\"\n", "}\n", "\n", "# Compile the graph with new thread\n", "checkpointer = MemorySaver()\n", "graph = overall_workflow.compile(checkpointer=checkpointer)\n", "thread_id_8 = uuid.uuid4()\n", "thread_config_8 = {\"configurable\": {\"thread_id\": thread_id_8}}\n", "\n", "# Run the graph until the first interrupt - should be classified as \"notify\"\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_notify}, config=thread_config_8):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "8b44290b", "metadata": {}, "source": ["Now simulate user deciding to ignore the notification.\n", "\n", "In this step, we simulate the simplest path for notification handling - acknowledgment without action:\n", "\n", "1. We receive the interrupt with the notification about the company picnic\n", "2. We respond with the \"ignore\" response type\n", "3. The workflow immediately ends without creating any responses\n", "4. The message history shows only the notification itself, with no additional processing\n", "\n", "This straightforward path is actually critical for workflow efficiency:\n", "- It allows users to quickly process informational emails\n", "- It avoids the overhead of unnecessary response generation\n", "- It recognizes that many notifications simply need to be seen, not answered\n", "- It respects the user's time by ending the workflow immediately\n", "\n", "The complete message history shows how clean this path is - just the notification itself, with no additional messages once the user chooses to ignore it."]}, {"cell_type": "code", "execution_count": 42, "id": "d17d9526", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user deciding to ignore the notification...\n"]}], "source": ["print(\"\\nSimulating user deciding to ignore the notification...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_8):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "0d7269bb", "metadata": {}, "source": ["Look at the full message history, and see the trace:\n", "\n", "https://smith.langchain.com/public/8193f616-244f-471d-8ec6-bd39624a0c88/r\n", "\n", "Here, we can see that if we ignore the notification, we just end. "]}, {"cell_type": "code", "execution_count": 43, "id": "ae5dbaba", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Email to notify user about: \n", "\n", "**Subject**: Company Picnic Next Month\n", "**From**: HR Department \n", "**To**: Team Members \n", "\n", "Dear Team,\n", "\n", "We're planning the annual company picnic for next month. The tentative date is Saturday, June 15th from noon to 4pm at Central Park. There will be food, games, and activities for families.\n", "\n", "More details will follow in the coming weeks.\n", "\n", "Regards,\n", "HR Department\n", "\n", "---\n", "\n"]}], "source": ["state = graph.get_state(thread_config_8)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "afb1be24", "metadata": {}, "source": []}], "metadata": {"jupytext": {"cell_metadata_filter": "-all", "main_language": "python", "notebook_metadata_filter": "-all"}, "kernelspec": {"display_name": ".venv", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6"}}, "nbformat": 4, "nbformat_minor": 5} diff --git a/tests/memory_testing.ipynb b/tests/memory_testing.ipynb index e63b7c1..ed0f2eb 100644 --- a/tests/memory_testing.ipynb +++ b/tests/memory_testing.ipynb @@ -1,2640 +1 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "d047044f", - "metadata": {}, - "source": [ - "# Test Cases for Memory" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "9e2c818a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/rlm/Desktop/Code/interrupt_workshop\n" - ] - } - ], - "source": [ - "%cd ..\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "import uuid\n", - "from langgraph.store.memory import InMemoryStore\n", - "from langgraph.checkpoint.memory import MemorySaver\n", - "from email_assistant.email_assistant_hitl_memory import overall_workflow" - ] - }, - { - "cell_type": "markdown", - "id": "397114bf", - "metadata": {}, - "source": [ - "## Accept `write_email` and `schedule_meeting`\n", - "\n", - "Our first test examines what happens when a user accepts the agent's actions without modification. This baseline case helps us understand how the system behaves when no feedback is provided:\n", - "\n", - "1. We'll use the same tax planning email from our previous tests\n", - "2. The system will classify it as \"RESPOND\" and propose scheduling a meeting\n", - "3. We'll accept the meeting schedule without changes\n", - "4. The agent will generate an email confirming the meeting\n", - "5. We'll accept the email without changes\n", - "\n", - "This test demonstrates the default behavior of our memory-enabled system. When a user simply accepts proposed actions, we expect minimal or no memory updates since there's no explicit feedback to learn from. However, the system will still leverage existing memory (if any) when generating its responses." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "8be43b59", - "metadata": {}, - "outputs": [], - "source": [ - "from langgraph.types import Command\n", - "\n", - "# Helper function to display memory content\n", - "def display_memory_content(store, namespace=None):\n", - " # Display current memory content for all namespaces\n", - " print(\"\\n======= CURRENT MEMORY CONTENT =======\")\n", - " if namespace:\n", - " memory = store.get(namespace, \"user_preferences\")\n", - " if memory:\n", - " print(f\"\\n--- {namespace[1]} ---\")\n", - " print({\"preferences\": memory.value})\n", - " else:\n", - " print(f\"\\n--- {namespace[1]} ---\")\n", - " print(\"No memory found\")\n", - " else:\n", - " for namespace in [\n", - " (\"email_assistant\", \"triage_preferences\"),\n", - " (\"email_assistant\", \"response_preferences\"),\n", - " (\"email_assistant\", \"cal_preferences\"),\n", - " (\"email_assistant\", \"background\")\n", - " ]:\n", - " memory = store.get(namespace, \"user_preferences\")\n", - " if memory:\n", - " print(f\"\\n--- {namespace[1]} ---\")\n", - " print({\"preferences\": memory.value})\n", - " else:\n", - " print(f\"\\n--- {namespace[1]} ---\")\n", - " print(\"No memory found\")\n", - " print(\"=======================================\\n\")" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "649cee4f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- response_preferences ---\n", - "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- cal_preferences ---\n", - "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", - "=======================================\n", - "\n", - "\n", - "--- background ---\n", - "No memory found\n", - "=======================================\n", - "\n" - ] - } - ], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "store = InMemoryStore()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", - "thread_id_1 = uuid.uuid4()\n", - "thread_config_1 = {\"configurable\": {\"thread_id\": thread_id_1}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_1):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after first interrupt\n", - "display_memory_content(store)" - ] - }, - { - "cell_type": "markdown", - "id": "878e199e", - "metadata": {}, - "source": [ - "Accept the schedule_meeting tool call\n", - "\n", - "As we examine the initial `schedule_meeting` proposal, note how the system uses existing memory to inform its decisions:\n", - "\n", - "1. The default calendar preferences show a preference for 30-minute meetings, though the email requests 45 minutes\n", - "2. The agent still proposes a 45-minute meeting, respecting the sender's specific request\n", - "3. We accept this proposal without modification to see if simple acceptance triggers any memory updates\n", - "\n", - "After running this step, we'll check the memory contents to confirm whether acceptance alone leads to memory updates. Simple acceptance represents the baseline user experience - the system works as intended without requiring adjustments." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "9589423b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user accepting the schedule_meeting tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out. I am available on Tuesday, April 22nd at 2:00 PM for a 45-minute call to discuss tax planning strategies. I have scheduled the meeting accordingly.\\n\\nLooking forward to your suggestions and our discussion.\\n\\nBest regards,\\nLance'}}\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "e6b80f99", - "metadata": {}, - "source": [ - "Accept the write_email tool call\n", - "\n", - "Now we'll accept the email draft that confirms the meeting scheduling:\n", - "\n", - "1. The email draft is generated with knowledge of our calendar preferences\n", - "2. It includes details about the meeting time, duration, and purpose\n", - "3. We'll accept it without changes to complete the baseline test case\n", - "\n", - "After accepting, we'll check all memory stores to see if any updates occurred. As expected, simply accepting the agent's proposals doesn't provide strong learning signals - there's no clear feedback about what the user likes or dislikes about the agent's approach.\n", - "\n", - "The trace link shows the complete workflow execution, where we can see that the memory is used in the LLM call for response generation, but no memory updates occur, which is the expected behavior for simple acceptances." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "12035cf6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user accepting the write_email tool call...\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_02WPNb2qpXXVx43d3Z0VzWqv)\n", - " Call ID: call_02WPNb2qpXXVx43d3Z0VzWqv\n", - " Args:\n", - " done: True\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- response_preferences ---\n", - "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- cal_preferences ---\n", - "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", - "=======================================\n", - "\n", - "\n", - "--- background ---\n", - "No memory found\n", - "=======================================\n", - "\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after accepting the write_email tool call\n", - "display_memory_content(store)" - ] - }, - { - "cell_type": "markdown", - "id": "fcbc178d", - "metadata": {}, - "source": [ - "We can look at the full messages, and the trace: \n", - "\n", - "https://smith.langchain.com/public/86ff6474-29fe-452e-8829-b05a91b458eb/r\n", - "\n", - "You'll notice that memory is used in the LLM call to respond. \n", - "\n", - "But the memory store *not* updated, because we haven't added any feedback via HITL." - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "10ce8197", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Respond to the email: \n", - "\n", - "**Subject**: Tax season let's schedule call\n", - "**From**: Project Manager \n", - "**To**: Lance Martin \n", - "\n", - "Lance,\n", - "\n", - "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", - "\n", - "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", - "\n", - "Regards,\n", - "Project Manager\n", - "\n", - "---\n", - "\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " check_calendar_availability (call_6ZbPjmSN8h1bfl3tDCx5ssoj)\n", - " Call ID: call_6ZbPjmSN8h1bfl3tDCx5ssoj\n", - " Args:\n", - " day: 2025-04-22\n", - " check_calendar_availability (call_8M30JVmN7Wj2VlIcDqBE1995)\n", - " Call ID: call_8M30JVmN7Wj2VlIcDqBE1995\n", - " Args:\n", - " day: 2025-04-24\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " schedule_meeting (call_TChCTjTsVim5Zj8lnTPUtHs6)\n", - " Call ID: call_TChCTjTsVim5Zj8lnTPUtHs6\n", - " Args:\n", - " attendees: ['pm@client.com', 'lance@company.com']\n", - " subject: Tax Planning Strategies Discussion\n", - " duration_minutes: 45\n", - " preferred_day: 2025-04-22\n", - " start_time: 14\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Meeting 'Tax Planning Strategies Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 45 minutes with 2 attendees\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_KMR1rdut5FcbS82k3ckvCHm7)\n", - " Call ID: call_KMR1rdut5FcbS82k3ckvCHm7\n", - " Args:\n", - " to: pm@client.com\n", - " subject: Re: Tax season let's schedule call\n", - " content: Hello,\n", - "\n", - "Thank you for reaching out. I am available on Tuesday, April 22nd at 2:00 PM for a 45-minute call to discuss tax planning strategies. I have scheduled the meeting accordingly.\n", - "\n", - "Looking forward to your suggestions and our discussion.\n", - "\n", - "Best regards,\n", - "Lance\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Email sent to pm@client.com with subject 'Re: Tax season let's schedule call' and content: Hello,\n", - "\n", - "Thank you for reaching out. I am available on Tuesday, April 22nd at 2:00 PM for a 45-minute call to discuss tax planning strategies. I have scheduled the meeting accordingly.\n", - "\n", - "Looking forward to your suggestions and our discussion.\n", - "\n", - "Best regards,\n", - "Lance\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_02WPNb2qpXXVx43d3Z0VzWqv)\n", - " Call ID: call_02WPNb2qpXXVx43d3Z0VzWqv\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_1)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "58201a21", - "metadata": {}, - "source": [ - "## Edit `write_email` and `schedule_meeting`\n", - "\n", - "This test explores how the system learns from direct edits to its proposed actions. When users modify the agent's suggestions, it creates clear, specific learning signals about their preferences:\n", - "\n", - "1. We'll use the same tax planning email as before\n", - "2. When the agent proposes a 45-minute meeting, we'll edit it to:\n", - " - Change the duration to 30 minutes (matching our stored preference)\n", - " - Make the subject line more concise\n", - "3. When the agent drafts an email, we'll edit it to be:\n", - " - Shorter and less formal\n", - " - Structured differently\n", - "\n", - "Edits provide the most explicit feedback about user preferences, letting the system learn exactly what changes are desired. We expect to see specific, targeted updates to our memory stores that reflect these edits." - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "ac260423", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- cal_preferences ---\n", - "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n" - ] - } - ], - "source": [ - "# Same email as before\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph with new thread\n", - "checkpointer = MemorySaver()\n", - "store = InMemoryStore()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", - "thread_id_2 = uuid.uuid4()\n", - "thread_config_2 = {\"configurable\": {\"thread_id\": thread_id_2}}\n", - "\n", - "# Run the graph until the first interrupt - will be classified as \"respond\" and the agent will create a write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_2):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after first interrupt\n", - "display_memory_content(store,(\"email_assistant\", \"cal_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "5d73ba71", - "metadata": {}, - "source": [ - "Edit the `schedule_meeting` tool call\n", - "\n", - "When we edit the meeting proposal, we're providing direct, explicit feedback about our preferences. This creates a significant learning opportunity for the system:\n", - "\n", - "1. The agent initially proposes a 45-minute meeting (the duration requested in the email)\n", - "2. We edit it to 30 minutes and simplify the subject from \"Tax Planning Strategies Discussion\" to \"Tax Planning Discussion\"\n", - "3. This creates clear, specific feedback about our time preferences and naming conventions\n", - "\n", - "After the edit, we'll check the calendar preferences memory store to see how it's updated. The memory update should capture both:\n", - "- Our preference for shorter 30-minute meetings\n", - "- Our preference for more concise meeting subjects\n", - "\n", - "The trace reveals the precise memory update logic, showing how the system analyzes the difference between its proposal and our edits to extract meaningful patterns and preferences. We can see the detailed justification for each memory update, ensuring transparency in the learning process." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "af760977", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user editing the schedule_meeting tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out regarding tax planning strategies for this year. I have scheduled a 30-minute call for us on Tuesday, April 22nd at 2:00 PM to discuss your suggestions and review potential savings opportunities. If you need a longer duration, please let me know and I can adjust the meeting accordingly.\\n\\nLooking forward to our discussion.\\n\\nBest regards,\\nLance'}}\n", - "\n", - "Checking memory after editing schedule_meeting:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- cal_preferences ---\n", - "{'preferences': \"30 minute meetings are preferred, but 15 minute meetings are also acceptable. User prefers 30 minute meetings over longer durations such as 45 minutes when possible. User prefers concise meeting subjects (e.g., 'Tax Planning Discussion' instead of 'Tax Planning Strategies Discussion').\"}\n" - ] - } - ], - "source": [ - "# Now simulate user editing the schedule_meeting tool call\n", - "print(\"\\nSimulating user editing the schedule_meeting tool call...\")\n", - "edited_schedule_args = {\n", - " \"attendees\": [\"pm@client.com\", \"lance@company.com\"],\n", - " \"subject\": \"Tax Planning Discussion\",\n", - " \"duration_minutes\": 30, # Changed from 45 to 30\n", - " \"preferred_day\": \"2025-04-22\",\n", - " \"start_time\": 14 # 2pm\n", - "}\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_schedule_args}}]), config=thread_config_2):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after editing schedule_meeting\n", - "print(\"\\nChecking memory after editing schedule_meeting:\")\n", - "display_memory_content(store,(\"email_assistant\", \"cal_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "0dfc585a", - "metadata": {}, - "source": [ - "Looking at the memory after editing the calendar invitation, we can see that it's been updated with remarkably specific preferences:\n", - "\n", - "1. The system has identified that we prefer 30-minute meetings over longer durations\n", - "2. It's also captured our preference for concise meeting subjects\n", - "\n", - "What's particularly impressive about this memory update is:\n", - "- It doesn't just record our specific edit, but generalizes to a broader preference pattern\n", - "- It preserves all existing memory content while adding the new information\n", - "- It extracts multiple preference signals from a single edit interaction\n", - "\n", - "Now, let's edit the email draft to see how the system captures different types of communication preferences:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81a1fa37", - "metadata": {}, - "outputs": [], - "source": [ - "display_memory_content(store,(\"email_assistant\", \"response_preferences\"))\n", - "# Now simulate user editing the write_email tool call\n", - "print(\"\\nSimulating user editing the write_email tool call...\")\n", - "edited_email_args = {\n", - " \"to\": \"pm@client.com\",\n", - " \"subject\": \"Re: Tax season let's schedule call\",\n", - " \"content\": \"Thanks! I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\\n\\nBest regards,\\nLance Martin\"\n", - "}\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_email_args}}]), config=thread_config_2):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after editing write_email\n", - "print(\"\\nChecking memory after editing write_email:\")\n", - "display_memory_content(store,(\"email_assistant\", \"response_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "3ffbd5f9", - "metadata": {}, - "source": [ - "Our email edit reveals even more sophisticated learning capabilities:\n", - "\n", - "1. We've dramatically shortened and simplified the email content\n", - "2. We've changed the tone to be more casual\n", - "3. We've added a question asking for confirmation rather than assuming the time works\n", - "4. We've slightly altered the meeting details (day and time)\n", - "\n", - "Looking at the updated memory, we can see that the system has extracted a key insight about our communication style:\n", - "\n", - "```\n", - "When scheduling a meeting, ask the recipient to confirm if the proposed time works for them, rather than assuming and stating the meeting is already scheduled.\n", - "```\n", - "\n", - "This demonstrates the system's ability to:\n", - "- Analyze our edit not just at a superficial level, but to understand intent\n", - "- Extract generalizable principles from specific examples\n", - "- Preserve all existing guidance while adding new insights\n", - "- Maintain the organization and structure of the memory\n", - "\n", - "These targeted, high-quality memory updates will improve all future interactions without requiring repeated corrections." - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "8ad818d6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Respond to the email: \n", - "\n", - "**Subject**: Tax season let's schedule call\n", - "**From**: Project Manager \n", - "**To**: Lance Martin \n", - "\n", - "Lance,\n", - "\n", - "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", - "\n", - "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", - "\n", - "Regards,\n", - "Project Manager\n", - "\n", - "---\n", - "\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " check_calendar_availability (call_3lgcL8JNtxaakOR8NqZyVH2n)\n", - " Call ID: call_3lgcL8JNtxaakOR8NqZyVH2n\n", - " Args:\n", - " day: 2025-04-22\n", - " check_calendar_availability (call_Ru9AMMrCf7wlWPcS9gV7ohCq)\n", - " Call ID: call_Ru9AMMrCf7wlWPcS9gV7ohCq\n", - " Args:\n", - " day: 2025-04-24\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " schedule_meeting (call_C6OAozNqxP5JDw42Y0T0m4Fg)\n", - " Call ID: call_C6OAozNqxP5JDw42Y0T0m4Fg\n", - " Args:\n", - " attendees: ['pm@client.com', 'lance@company.com']\n", - " subject: Tax Planning Discussion\n", - " duration_minutes: 30\n", - " preferred_day: 2025-04-22\n", - " start_time: 14\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Meeting 'Tax Planning Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 30 minutes with 2 attendees\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_mvJ8FX2j8spcrtJnpU4dgmLC)\n", - " Call ID: call_mvJ8FX2j8spcrtJnpU4dgmLC\n", - " Args:\n", - " to: pm@client.com\n", - " subject: Re: Tax season let's schedule call\n", - " content: Thanks! I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\n", - "\n", - "Best regards,\n", - "Lance Martin\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Email sent to pm@client.com with subject 'Re: Tax season let's schedule call' and content: Thanks! I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\n", - "\n", - "Best regards,\n", - "Lance Martin\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_JRSWVKubpzaxPrU9deWet6YL)\n", - " Call ID: call_JRSWVKubpzaxPrU9deWet6YL\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_2)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "0e14918e", - "metadata": {}, - "source": [ - "## Ignore `write_email`, `schedule_meeting`, and `question`\n", - "\n", - "This test explores how the system learns from rejection. When users ignore (reject) the agent's suggestions, it creates a strong signal about content they don't want to handle:\n", - "\n", - "1. We'll first test ignoring a `schedule_meeting` request entirely\n", - "2. Then we'll test accepting a meeting but ignoring the follow-up email\n", - "3. Finally, we'll test ignoring a `question` for a different email context\n", - "\n", - "These rejection signals help the system learn what types of emails and actions a user prefers not to deal with, leading to more appropriate triage decisions in the future. We expect significant updates to the triage preferences memory after each ignore action." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "0d015c3f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n" - ] - } - ], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "store = InMemoryStore()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", - "thread_id_3 = uuid.uuid4()\n", - "thread_config_3 = {\"configurable\": {\"thread_id\": thread_id_3}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_3):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after first interrupt\n", - "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "c782e711", - "metadata": {}, - "source": [ - "Ignore the `schedule_meeting` tool call\n", - "\n", - "When we ignore a meeting scheduling request, we're signaling that we don't want to handle this type of email through the assistant. This creates a powerful learning opportunity about our triage preferences:\n", - "\n", - "1. The assistant initially classified the tax planning email as \"RESPOND\"\n", - "2. But by ignoring the scheduling request, we indicate we'd prefer not to handle this type of email\n", - "3. The system needs to update its triage classification preferences to reflect this rejection\n", - "\n", - "After ignoring the request, we'll check the triage preferences memory to see how the rejection affected the system's understanding. The memory update should show a new pattern added to the \"not worth responding to\" section, specifically about tax planning calls or similar recurring discussions.\n", - "\n", - "The trace shows how the system processes this rejection, identifies the pattern, and updates the memory with specific justification for why this type of email should be classified differently in the future." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "16c4d83b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user ignoring the schedule_meeting tool call...\n", - "\n", - "Checking memory after ignoring first tool call:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"Emails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n- Client requests to schedule tax planning calls (unless explicitly instructed otherwise)\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user ignoring the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_3):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after ignoring first tool call\n", - "print(\"\\nChecking memory after ignoring first tool call:\")\n", - "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "67462024", - "metadata": {}, - "source": [ - "Looking at the memory update after ignoring the schedule_meeting tool call, we can see a remarkable triage preference update:\n", - "\n", - "1. The system has added \"Client requests to schedule tax planning calls\" to the \"emails not worth responding to\" section\n", - "2. It correctly identified the general pattern (scheduling routine calls) rather than overfitting to just this specific instance\n", - "3. It included the parenthetical note \"(unless explicitly instructed otherwise)\" to maintain flexibility\n", - "\n", - "This update demonstrates the system's ability to:\n", - "- Infer general patterns from specific instances of rejection\n", - "- Update the triage filters that determine initial email classification\n", - "- Preserve the organization and priority of existing preferences\n", - "- Include appropriate qualifiers to avoid overly rigid rules\n", - "\n", - "Next, let's see what happens when we accept the meeting but reject the email draft:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "b869485a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", - "\n", - "Simulating user accepting the schedule_meeting tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out. I am available on Tuesday, April 22nd at 2:00 PM for a 45-minute call to discuss tax planning strategies. I have scheduled the meeting accordingly.\\n\\nLooking forward to your suggestions and the discussion.\\n\\nBest regards,\\nLance'}}\n", - "\n", - "Simulating user ignoring the write_email tool call...\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "User ignored this email draft. Ignore this email and end the workflow.\n", - "\n", - "Checking memory after ignoring second tool call:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n- Client requests to schedule routine calls (such as tax planning or similar recurring discussions)\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n" - ] - } - ], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "store = InMemoryStore()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", - "thread_id_3 = uuid.uuid4()\n", - "thread_config_3 = {\"configurable\": {\"thread_id\": thread_id_3}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_3):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_3):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "print(f\"\\nSimulating user ignoring the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_3):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after ignoring second tool call\n", - "print(\"\\nChecking memory after ignoring second tool call:\")\n", - "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "694db9f5", - "metadata": {}, - "source": [ - "When we accept the meeting but ignore the email draft, we're sending a more nuanced signal about our preferences:\n", - "\n", - "1. We're willing to schedule the meeting (accepting the first tool call)\n", - "2. But we don't want to send a confirmation email about it (ignoring the second tool call)\n", - "\n", - "Looking at the memory update, we see another evolution of our triage preferences:\n", - "\n", - "```\n", - "\"Client requests to schedule routine calls (such as tax planning or similar recurring discussions)\"\n", - "```\n", - "\n", - "The system has:\n", - "- Broadened the pattern from just \"tax planning calls\" to \"routine calls\" generally\n", - "- Added examples in parentheses for clarity\n", - "- Positioned this in the \"not worth responding to\" section\n", - "- Maintained all other existing preferences\n", - "\n", - "This demonstrates how the memory evolves over multiple interactions, becoming increasingly accurate and generalizable with each additional data point. The system is continuously refining its understanding based on our feedback patterns." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "272bb9ed", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Respond to the email: \n", - "\n", - "**Subject**: Tax season let's schedule call\n", - "**From**: Project Manager \n", - "**To**: Lance Martin \n", - "\n", - "Lance,\n", - "\n", - "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", - "\n", - "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", - "\n", - "Regards,\n", - "Project Manager\n", - "\n", - "---\n", - "\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " check_calendar_availability (call_l9Jn2ghL4T0aPuMhjpXf36Ga)\n", - " Call ID: call_l9Jn2ghL4T0aPuMhjpXf36Ga\n", - " Args:\n", - " day: 2025-04-22\n", - " check_calendar_availability (call_8eOXPjqVpaIRDkABD80XRYUU)\n", - " Call ID: call_8eOXPjqVpaIRDkABD80XRYUU\n", - " Args:\n", - " day: 2025-04-24\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " schedule_meeting (call_5L9UBPXPfnqhuhxlBvHSQncm)\n", - " Call ID: call_5L9UBPXPfnqhuhxlBvHSQncm\n", - " Args:\n", - " attendees: ['pm@client.com', 'lance@company.com']\n", - " subject: Tax Planning Strategies Discussion\n", - " duration_minutes: 45\n", - " preferred_day: 2025-04-22\n", - " start_time: 14\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Meeting 'Tax Planning Strategies Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 45 minutes with 2 attendees\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_635xNQnAYz2BBX1Y5NiGtuE6)\n", - " Call ID: call_635xNQnAYz2BBX1Y5NiGtuE6\n", - " Args:\n", - " to: pm@client.com\n", - " subject: Re: Tax season let's schedule call\n", - " content: Hello,\n", - "\n", - "Thank you for reaching out. I am available on Tuesday, April 22nd at 2:00 PM for a 45-minute call to discuss tax planning strategies. I have scheduled the meeting accordingly.\n", - "\n", - "Looking forward to your suggestions and the discussion.\n", - "\n", - "Best regards,\n", - "Lance\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "User ignored this email draft. Ignore this email and end the workflow.\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_3)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "597930b3", - "metadata": {}, - "source": [ - "Now let's try an email that calls the `Question` tool\n", - "\n", - "For our third rejection test, we'll use a different type of email - a casual social invitation about brunch. This gives us insight into how the system learns about personal vs. professional communication preferences:\n", - "\n", - "1. The system classifies this personal invitation as \"RESPOND\"\n", - "2. Rather than answering directly, it uses the Question tool to ask for clarification\n", - "3. We'll ignore this question, indicating we don't want to handle these types of emails through the assistant\n", - "\n", - "This test shows how ignoring questions (not just actions) can also update our triage preferences. By rejecting the clarification attempt, we signal that this entire category of email doesn't warrant response through the assistant." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "efb91337", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'partner@home.com', 'subject': 'Re: Meet Jim and Lisa for brunch in 3 weeks?', 'content': 'That sounds like a great idea! I checked my calendar for three weeks from now (Thursday, May 8th), and I’m available at 9:00 AM, 2:00 PM, or 4:00 PM. Do any of those times work for you, Jim, and Lisa? The new place on 17th sounds perfect.\\n\\nLet me know what everyone prefers and I’ll confirm the reservation.'}}\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n" - ] - } - ], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Partner \",\n", - " \"subject\": \"Meet Jim and Lisa for brunch in 3 weeks?\",\n", - " \"email_thread\": \"Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "store = InMemoryStore()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", - "thread_id_4 = uuid.uuid4()\n", - "thread_config_4 = {\"configurable\": {\"thread_id\": thread_id_4}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_4):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after first interrupt for Question tool\n", - "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "2bb6581a", - "metadata": {}, - "source": [ - "Ignore the `question` tool call\n", - "\n", - "When we ignore a question from the assistant about a personal social invitation, we're providing yet another type of feedback:\n", - "\n", - "1. The system initially tries to get clarification before responding\n", - "2. By ignoring the question, we indicate we don't even want to engage with this type of email\n", - "3. This suggests the entire category of social invitations should be handled differently\n", - "\n", - "After ignoring, we'll check the triage preferences again to see how they've evolved. We expect to see a new entry about social invitations added to the \"not worth responding to\" section.\n", - "\n", - "The memory update justification analyzes our rejection of the question about an informal social invitation and extracts a general pattern about our preference not to handle social invitations through the assistant." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "f227a298", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user ignoring the write_email tool call...\n", - "\n", - "Checking memory after ignoring Question tool:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"Emails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n- Social invitations from partner (e.g., brunch plans, casual meetups)\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user ignoring the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_4):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after ignoring Question tool\n", - "print(\"\\nChecking memory after ignoring Question tool:\")\n", - "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "20bea846", - "metadata": {}, - "source": [ - "Looking at the memory update after ignoring the question about brunch plans, we see another sophisticated triage preference update:\n", - "\n", - "```\n", - "\"Social invitations from partner (e.g., brunch plans, casual meetups)\"\n", - "```\n", - "\n", - "This demonstrates how the system:\n", - "1. Correctly identifies personal social invitations as a distinct category\n", - "2. Specifically notes they're from \"partner\" - showing it's learning to distinguish senders\n", - "3. Provides examples to clarify the pattern\n", - "4. Adds this to the \"not worth responding to\" section\n", - "\n", - "These three ignores have collectively taught the system quite a bit about what types of emails we prefer not to handle through the assistant:\n", - "- Tax planning calls and routine client scheduling\n", - "- Social invitations from partners\n", - "- Each with appropriate specificity and generalizability\n", - "\n", - "In a real-world scenario, these learned preferences would ensure that similar future emails would be classified differently, saving the user time by automatically filtering out categories they've previously rejected." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "2ca65c1b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Respond to the email: \n", - "\n", - "**Subject**: Meet Jim and Lisa for brunch in 3 weeks?\n", - "**From**: Partner \n", - "**To**: Lance Martin \n", - "\n", - "Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\n", - "\n", - "---\n", - "\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " check_calendar_availability (call_5jTHskfgPCFQKakHl2dFxvsD)\n", - " Call ID: call_5jTHskfgPCFQKakHl2dFxvsD\n", - " Args:\n", - " day: 2025-05-08\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-05-08: 9:00 AM, 2:00 PM, 4:00 PM\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_3tvSXyHFCWGk4yzbBYMCdEWz)\n", - " Call ID: call_3tvSXyHFCWGk4yzbBYMCdEWz\n", - " Args:\n", - " to: partner@home.com\n", - " subject: Re: Meet Jim and Lisa for brunch in 3 weeks?\n", - " content: That sounds like a great idea! I checked my calendar for three weeks from now (Thursday, May 8th), and I’m available at 9:00 AM, 2:00 PM, or 4:00 PM. Do any of those times work for you, Jim, and Lisa? The new place on 17th sounds perfect.\n", - "\n", - "Let me know what everyone prefers and I’ll confirm the reservation.\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "User ignored this email draft. Ignore this email and end the workflow.\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_4)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "6d92a42b", - "metadata": {}, - "source": [ - "## Respond (with feedback) `write_email`, `schedule_meeting`, and `question`\n", - "\n", - "Our final test set explores the \"response\" feedback pattern - providing guidance without directly editing or accepting. This conversational feedback mechanism offers a middle ground between acceptance and editing:\n", - "\n", - "1. First, we'll test feedback for meeting scheduling by requesting:\n", - " - Shorter duration (30 minutes instead of 45)\n", - " - Afternoon meeting times (after 2pm)\n", - " \n", - "2. Next, we'll test feedback for email drafting by requesting:\n", - " - Shorter, less formal language\n", - " - A specific closing statement about looking forward to the meeting\n", - " \n", - "3. Finally, we'll test feedback for questions by providing:\n", - " - A direct answer with additional context\n", - " - Specific preferences (brunch location, time)\n", - "\n", - "This natural language feedback approach lets users guide the assistant without having to do the work themselves. We expect to see detailed memory updates that extract the general principles from our specific feedback." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "07676231", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- cal_preferences ---\n", - "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n" - ] - } - ], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Project Manager \",\n", - " \"subject\": \"Tax season let's schedule call\",\n", - " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "store = InMemoryStore()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", - "thread_id_5 = uuid.uuid4()\n", - "thread_config_5 = {\"configurable\": {\"thread_id\": thread_id_5}}\n", - "\n", - "# Run the graph until the first interrupt \n", - "# Email will be classified as \"respond\" \n", - "# Agent will create a schedule_meeting and write_email tool call\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after first interrupt \n", - "display_memory_content(store, (\"email_assistant\", \"cal_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "b85fc45d", - "metadata": {}, - "source": [ - "Provide feedback for the `schedule_meeting` tool call\n", - "\n", - "Instead of directly editing the meeting proposal or simply accepting it, we'll provide natural language feedback:\n", - "\n", - "1. We request a 30-minute meeting instead of 45 minutes\n", - "2. We express a preference for afternoon meetings after 2pm\n", - "3. The system must interpret this feedback and generate a new proposal\n", - "\n", - "This conversational approach is often more natural and efficient than direct editing, especially for mobile users or those who prefer to give high-level direction rather than detailed edits.\n", - "\n", - "After providing feedback, we'll examine the calendar preferences memory to see how this natural language guidance is captured. We expect to see the system extract both the meeting duration and time-of-day preferences as general principles." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "30a151f1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user providing feedback for the schedule_meeting tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 30, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", - "\n", - "Checking memory after providing feedback for schedule_meeting:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- cal_preferences ---\n", - "{'preferences': '30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\nAfternoon meetings after 2pm are preferred.'}\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Please schedule this for 30 minutes instead of 45 minutes, and I prefer afternoon meetings after 2pm.\"}]), config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after providing feedback for schedule_meeting\n", - "print(\"\\nChecking memory after providing feedback for schedule_meeting:\")\n", - "display_memory_content(store, (\"email_assistant\", \"cal_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "8088757c", - "metadata": {}, - "source": [ - "Our memory check after providing feedback shows an elegantly simple calendar preference update:\n", - "\n", - "```\n", - "30 minute meetings are preferred, but 15 minute meetings are also acceptable.\n", - "Afternoon meetings after 2pm are preferred.\n", - "```\n", - "\n", - "The system has:\n", - "1. Captured both aspects of our feedback (duration and time of day)\n", - "2. Preserved the existing preference about 15-minute meetings\n", - "3. Added our preference for afternoon meetings after 2pm as a new line\n", - "4. Kept the format clean and readable\n", - "\n", - "This natural language feedback mechanism creates the same quality of memory updates as direct editing but requires less effort from the user. The system is able to extract structured preferences from unstructured feedback, showing its ability to learn from conversational interactions.\n", - "\n", - "Let's accept this revised meeting proposal and move to the email draft:" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "545063be", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user accepting the schedule_meeting tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out regarding tax planning strategies. I am available on Tuesday, April 22nd at 2:00 PM, and have scheduled a 30-minute call for us to discuss your suggestions. If you need more time or would prefer a different slot, please let me know.\\n\\nLooking forward to our conversation.\\n\\nBest regards,\\nLance'}}\n", - "\n", - "Checking memory after accepting schedule_meeting after feedback:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- response_preferences ---\n", - "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- cal_preferences ---\n", - "{'preferences': '30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\nAfternoon meetings after 2pm are preferred.'}\n", - "=======================================\n", - "\n", - "\n", - "--- background ---\n", - "No memory found\n", - "=======================================\n", - "\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after accepting schedule_meeting after feedback\n", - "print(\"\\nChecking memory after accepting schedule_meeting after feedback:\")\n", - "display_memory_content(store, (\"email_assistant\", \"response_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "e72ede94", - "metadata": {}, - "source": [ - "Now provide feedback for the `write_email` tool call\n", - "\n", - "Similar to our meeting feedback, we'll now provide natural language guidance for the email draft:\n", - "\n", - "1. We request \"shorter and less formal\" language - a style preference\n", - "2. We ask for a specific closing statement about looking forward to the meeting\n", - "3. The system must interpret this guidance and rewrite the email accordingly\n", - "\n", - "After providing this feedback, we'll check the response preferences memory to see how these style and structure preferences are captured. We expect to see generalizable guidelines about email brevity, formality, and closing statements added to our preference profile." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "9831ad2d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user providing feedback for the write_email tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hi,\\n\\nThanks for reaching out. I’ve scheduled us for a 30-minute call on Tuesday, April 22nd at 2:00 PM to go over tax planning. Let me know if you need a different time.\\n\\nLooking forward to our chat!\\n\\nBest,\\nLance'}}\n", - "\n", - "Checking memory after providing feedback for write_email:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- response_preferences ---\n", - "{'preferences': \"Use professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n- When scheduling meetings, prefer afternoon times after 2pm when possible, and default to 30-minute durations unless otherwise specified.\\n\\nWhen writing email responses:\\n- Favor shorter and less formal language when possible, unless the context requires formality.\\n- Include a closing statement expressing that you look forward to the meeting or conversation when confirming appointments.\"}\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Shorter and less formal. Include a closing statement about looking forward to the meeting!\"}]), config=thread_config_5):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after providing feedback for write_email\n", - "print(\"\\nChecking memory after providing feedback for write_email:\")\n", - "display_memory_content(store, (\"email_assistant\", \"response_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "b5b360a2", - "metadata": {}, - "source": [ - "The memory update after our email feedback shows highly sophisticated learning about both meeting scheduling and email writing preferences:\n", - "\n", - "1. The system has added a complete new section to the response preferences entitled \"When writing email responses\" with two key preferences:\n", - " - \"Favor shorter and less formal language when possible, unless the context requires formality\"\n", - " - \"Include a closing statement expressing that you look forward to the meeting or conversation when confirming appointments\"\n", - "\n", - "2. It has also added a new bullet point to the \"When responding to meeting scheduling requests\" section:\n", - " - \"When scheduling meetings, prefer afternoon times after 2pm when possible, and default to 30-minute durations unless otherwise specified\"\n", - "\n", - "This demonstrates the system's ability to:\n", - "- Organize learned preferences into appropriate categories\n", - "- Extract multiple insights from a single feedback instance\n", - "- Apply meeting preferences to both calendar and email contexts\n", - "- Capture nuance with appropriate qualifiers (\"when possible,\" \"unless otherwise specified\")\n", - "- Maintain the hierarchical structure of the memory\n", - "\n", - "The resulting email shows all these preferences applied: it's shorter, less formal, includes a closing statement about looking forward to the chat, and correctly references the 30-minute meeting time." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "8c64999e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user accepting the write_email tool call...\n", - "\n", - "Checking memory after accepting write_email after feedback:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- response_preferences ---\n", - "{'preferences': \"Use professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n- When scheduling meetings, prefer afternoon times after 2pm when possible, and default to 30-minute durations unless otherwise specified.\\n\\nWhen writing email responses:\\n- Favor shorter and less formal language when possible, unless the context requires formality.\\n- Include a closing statement expressing that you look forward to the meeting or conversation when confirming appointments.\"}\n", - "=======================================\n", - "\n", - "\n", - "--- cal_preferences ---\n", - "{'preferences': '30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\nAfternoon meetings after 2pm are preferred.'}\n", - "=======================================\n", - "\n", - "\n", - "--- background ---\n", - "No memory found\n", - "=======================================\n", - "\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after accepting write_email after feedback\n", - "print(\"\\nChecking memory after accepting write_email after feedback:\")\n", - "display_memory_content(store, (\"email_assistant\", \"response_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "f85e63cb", - "metadata": {}, - "source": [ - "Look at the full message history." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "fa9cf91d", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Respond to the email: \n", - "\n", - "**Subject**: Tax season let's schedule call\n", - "**From**: Project Manager \n", - "**To**: Lance Martin \n", - "\n", - "Lance,\n", - "\n", - "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", - "\n", - "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", - "\n", - "Regards,\n", - "Project Manager\n", - "\n", - "---\n", - "\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " check_calendar_availability (call_SwwMW5F9zEWmajYTDLjKOoFN)\n", - " Call ID: call_SwwMW5F9zEWmajYTDLjKOoFN\n", - " Args:\n", - " day: 2025-04-22\n", - " check_calendar_availability (call_nIBAphQD4FFOdW89Ikc1uNfV)\n", - " Call ID: call_nIBAphQD4FFOdW89Ikc1uNfV\n", - " Args:\n", - " day: 2025-04-24\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " schedule_meeting (call_uUvycUMHS86YjPSYFKV1dpzq)\n", - " Call ID: call_uUvycUMHS86YjPSYFKV1dpzq\n", - " Args:\n", - " attendees: ['pm@client.com', 'lance@company.com']\n", - " subject: Tax Planning Strategies Discussion\n", - " duration_minutes: 45\n", - " preferred_day: 2025-04-22\n", - " start_time: 14\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "User gave feedback, which can we incorporate into the meeting request. Feedback: Please schedule this for 30 minutes instead of 45 minutes, and I prefer afternoon meetings after 2pm.\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " schedule_meeting (call_isFznpqEGfVb7pBmY2itGtgY)\n", - " Call ID: call_isFznpqEGfVb7pBmY2itGtgY\n", - " Args:\n", - " attendees: ['pm@client.com', 'lance@company.com']\n", - " subject: Tax Planning Strategies Discussion\n", - " duration_minutes: 30\n", - " preferred_day: 2025-04-22\n", - " start_time: 14\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Meeting 'Tax Planning Strategies Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 30 minutes with 2 attendees\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_GIsMacl5jL7qsjSM47WdK738)\n", - " Call ID: call_GIsMacl5jL7qsjSM47WdK738\n", - " Args:\n", - " to: pm@client.com\n", - " subject: Re: Tax season let's schedule call\n", - " content: Hello,\n", - "\n", - "Thank you for reaching out regarding tax planning strategies. I am available on Tuesday, April 22nd at 2:00 PM, and have scheduled a 30-minute call for us to discuss your suggestions. If you need more time or would prefer a different slot, please let me know.\n", - "\n", - "Looking forward to our conversation.\n", - "\n", - "Best regards,\n", - "Lance\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "User gave feedback, which can we incorporate into the email. Feedback: Shorter and less formal. Include a closing statement about looking forward to the meeting!\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_0vCEdmEMmvBYGh24gvd9pqvh)\n", - " Call ID: call_0vCEdmEMmvBYGh24gvd9pqvh\n", - " Args:\n", - " to: pm@client.com\n", - " subject: Re: Tax season let's schedule call\n", - " content: Hi,\n", - "\n", - "Thanks for reaching out. I’ve scheduled us for a 30-minute call on Tuesday, April 22nd at 2:00 PM to go over tax planning. Let me know if you need a different time.\n", - "\n", - "Looking forward to our chat!\n", - "\n", - "Best,\n", - "Lance\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Email sent to pm@client.com with subject 'Re: Tax season let's schedule call' and content: Hi,\n", - "\n", - "Thanks for reaching out. I’ve scheduled us for a 30-minute call on Tuesday, April 22nd at 2:00 PM to go over tax planning. Let me know if you need a different time.\n", - "\n", - "Looking forward to our chat!\n", - "\n", - "Best,\n", - "Lance\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_UXqUyEknR5jjpA39rKdlEcc9)\n", - " Call ID: call_UXqUyEknR5jjpA39rKdlEcc9\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_5)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "0bed8841", - "metadata": {}, - "source": [ - "Now let's try an email that calls the `Question` tool to provide feedback\n", - "\n", - "Our final test examines feedback for questions. When the assistant needs clarification before proceeding, users can provide detailed information beyond just answering the question:\n", - "\n", - "1. For the brunch invitation email, we'll provide feedback that includes:\n", - " - Confirmation that we want to invite the people mentioned\n", - " - A specific location preference (Jack's)\n", - " - A time preference (before 11am)\n", - " \n", - "2. This gives the system multiple pieces of information:\n", - " - A direct answer to the question (yes, let's invite them)\n", - " - Additional context and preferences not explicitly asked for\n", - "\n", - "This tests the system's ability to process compound feedback and extract multiple data points from a single response." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "e111a459", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "📧 Classification: RESPOND - This email requires a response\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'Question', 'args': {'content': 'Do you want me to check your calendar and suggest specific dates and times for the brunch, or would you like to propose a particular day and time to Jim and Lisa first?'}}\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- response_preferences ---\n", - "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- cal_preferences ---\n", - "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", - "=======================================\n", - "\n", - "\n", - "--- background ---\n", - "No memory found\n", - "=======================================\n", - "\n" - ] - } - ], - "source": [ - "# Respond - Meeting Request Email\n", - "email_input_respond = {\n", - " \"to\": \"Lance Martin \",\n", - " \"author\": \"Partner \",\n", - " \"subject\": \"Meet Jim and Lisa for brunch in 3 weeks?\",\n", - " \"email_thread\": \"Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\"\n", - "}\n", - "\n", - "# Compile the graph\n", - "checkpointer = MemorySaver()\n", - "store = InMemoryStore()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", - "thread_id_6 = uuid.uuid4()\n", - "thread_config_6 = {\"configurable\": {\"thread_id\": thread_id_6}}\n", - "\n", - "# Run the graph until the first interrupt\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_6):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after first interrupt for Question tool\n", - "display_memory_content(store)" - ] - }, - { - "cell_type": "markdown", - "id": "c540ebff", - "metadata": {}, - "source": [ - "Provide feedback for the `Question` tool call\n", - "\n", - "When the assistant asks about our preferences for the brunch invitation, we'll respond with rich, multi-faceted feedback:\n", - "\n", - "1. We confirm we want to invite Jim and Lisa\n", - "2. We specify a location preference (Jack's, not the place on 17th)\n", - "3. We express a time preference (before 11am)\n", - "\n", - "This tests the system's ability to handle compound responses that both answer the direct question and provide additional context. Note that we're not just answering \"yes\" or \"no\" - we're providing a rich context that should influence the assistant's next actions.\n", - "\n", - "An ideal system would use this feedback to both respond to the immediate email and update background knowledge that could be relevant for future similar social invitations. In our current implementation, we don't update background knowledge from question responses, but this would be a straightforward enhancement." - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "61d8bfef", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user providing feedback for the Question tool call...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'partner@home.com', 'subject': 'Brunch with Jim and Lisa in 3 Weeks', 'content': 'Great idea! I’ll reach out to Jim and Lisa to invite them to brunch at Jack’s in three weeks. I’ll suggest Saturday, May 8th, with a start time before 11am—9:00 AM is available on our calendar. Let me know if you’d like to suggest a different time or if this works for you before I send the invite.'}}\n", - "\n", - "Checking memory after providing feedback for Question:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- response_preferences ---\n", - "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- cal_preferences ---\n", - "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", - "=======================================\n", - "\n", - "\n", - "--- background ---\n", - "No memory found\n", - "=======================================\n", - "\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Yes, let's invite them, I really like brunch at Jack's, ideally before 11am.\"}]), config=thread_config_6):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after providing feedback for Question\n", - "print(\"\\nChecking memory after providing feedback for Question:\")\n", - "display_memory_content(store)" - ] - }, - { - "cell_type": "markdown", - "id": "7b9dbba1", - "metadata": {}, - "source": [ - "Currently, we don't update `background_information` when the user provides feedback for the `Question` tool, but this would be a valuable enhancement.\n", - "\n", - "Looking at how the system handled our question response:\n", - "\n", - "1. It correctly incorporated all three key pieces of information:\n", - " - Our affirmative decision to invite Jim and Lisa\n", - " - Our location preference (Jack's, not the place on 17th)\n", - " - Our time preference (before 11am)\n", - "\n", - "2. It drafted a complete email that:\n", - " - References reaching out to Jim and Lisa\n", - " - Specifies Jack's as the location\n", - " - Suggests a 9:00 AM time (before 11am as requested)\n", - " - Asks for confirmation before sending an invite\n", - "\n", - "This demonstrates the system's ability to extract and use detailed information from natural language feedback, even when not explicitly updating memory. The email correctly incorporates all aspects of our feedback and presents a coherent plan based on our preferences.\n", - "\n", - "A future enhancement could store these preferences (location preferences, time preferences for social events) in the background information memory for use in future similar situations." - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "5b4c0f6a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user accepting the write_email tool call...\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_k6NQ6GqqEIE1uNwVQXmx6xU6)\n", - " Call ID: call_k6NQ6GqqEIE1uNwVQXmx6xU6\n", - " Args:\n", - " done: True\n", - "\n", - "Checking memory after accepting write_email after answering Question:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- response_preferences ---\n", - "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- cal_preferences ---\n", - "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", - "=======================================\n", - "\n", - "\n", - "--- background ---\n", - "No memory found\n", - "=======================================\n", - "\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_6):\n", - " # Inspect response_agent most recent message\n", - " if 'response_agent' in chunk:\n", - " chunk['response_agent']['messages'][-1].pretty_print()\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after accepting write_email after answering Question\n", - "print(\"\\nChecking memory after accepting write_email after answering Question:\")\n", - "display_memory_content(store)" - ] - }, - { - "cell_type": "markdown", - "id": "c80ed860", - "metadata": {}, - "source": [ - "Look at the full message history" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "e61fab2f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Respond to the email: \n", - "\n", - "**Subject**: Meet Jim and Lisa for brunch in 3 weeks?\n", - "**From**: Partner \n", - "**To**: Lance Martin \n", - "\n", - "Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\n", - "\n", - "---\n", - "\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Question (call_dF0WQiJIDf5ovo5veMixbBm9)\n", - " Call ID: call_dF0WQiJIDf5ovo5veMixbBm9\n", - " Args:\n", - " content: Do you want me to check your calendar and suggest specific dates and times for the brunch, or would you like to propose a particular day and time to Jim and Lisa first?\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "User answered the question, which can we can use for any follow up actions. Feedback: Yes, let's invite them, I really like brunch at Jack's, ideally before 11am.\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " check_calendar_availability (call_07aoxyrzmikMZPDN9XQ9rLbp)\n", - " Call ID: call_07aoxyrzmikMZPDN9XQ9rLbp\n", - " Args:\n", - " day: 2025-05-08\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Available times on 2025-05-08: 9:00 AM, 2:00 PM, 4:00 PM\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_Vq5Ube1Y2Wbr9xzB7bh1aIqw)\n", - " Call ID: call_Vq5Ube1Y2Wbr9xzB7bh1aIqw\n", - " Args:\n", - " to: partner@home.com\n", - " subject: Brunch with Jim and Lisa in 3 Weeks\n", - " content: Great idea! I’ll reach out to Jim and Lisa to invite them to brunch at Jack’s in three weeks. I’ll suggest Saturday, May 8th, with a start time before 11am—9:00 AM is available on our calendar. Let me know if you’d like to suggest a different time or if this works for you before I send the invite.\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Email sent to partner@home.com with subject 'Brunch with Jim and Lisa in 3 Weeks' and content: Great idea! I’ll reach out to Jim and Lisa to invite them to brunch at Jack’s in three weeks. I’ll suggest Saturday, May 8th, with a start time before 11am—9:00 AM is available on our calendar. Let me know if you’d like to suggest a different time or if this works for you before I send the invite.\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_k6NQ6GqqEIE1uNwVQXmx6xU6)\n", - " Call ID: call_k6NQ6GqqEIE1uNwVQXmx6xU6\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_6)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "a3b11d0c", - "metadata": {}, - "source": [ - "## Test Case for Notify Classification\n", - "\n", - "This test explores how memory updates when an email is initially classified as \"NOTIFY\" but the user decides it needs a response:\n", - "\n", - "1. The triage system initially classifies IT security updates as notifications\n", - "2. But the user decides this particular notification warrants acknowledgment\n", - "3. This creates a learning opportunity about which notification types need responses\n", - "\n", - "The \"notify\" category is designed for important information that doesn't need immediate action. But user feedback can help the system learn which subset of notifications actually do warrant responses, refining the initial classification over time." - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "a6e8a62a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "🔔 Classification: NOTIFY - This email contains important information\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'Email Assistant: notify', 'args': {}}\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n" - ] - } - ], - "source": [ - "# Notify - Important FYI Email\n", - "email_input_notify = {\n", - " \"to\": \"Team Members \",\n", - " \"author\": \"IT Department \",\n", - " \"subject\": \"Critical Security Update\",\n", - " \"email_thread\": \"Dear Team,\\n\\nThis is an important security notification. We will be updating our authentication system this weekend. During the update window (Saturday 2am-4am), you will not be able to access company resources.\\n\\nPlease ensure you log out of all systems before the maintenance window.\\n\\nRegards,\\nIT Department\"\n", - "}\n", - "\n", - "# Compile the graph with new thread\n", - "checkpointer = MemorySaver()\n", - "store = InMemoryStore()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", - "thread_id_7 = uuid.uuid4()\n", - "thread_config_7 = {\"configurable\": {\"thread_id\": thread_id_7}}\n", - "\n", - "# Run the graph until the first interrupt - should be classified as \"notify\"\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_notify}, config=thread_config_7):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after first interrupt for Notify\n", - "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "1b02f3be", - "metadata": {}, - "source": [ - "Now simulate user deciding to respond with feedback.\n", - "\n", - "This test explores the reclassification pathway from \"notify\" to \"respond\":\n", - "\n", - "1. The system initially classifies a security update as information-only (\"notify\")\n", - "2. We're presented with this notification without a suggested action\n", - "3. We decide this security update actually requires acknowledgment\n", - "4. We provide feedback indicating we want to respond and confirm our compliance\n", - "\n", - "This represents an important learning signal - that security notifications, particularly those requesting specific user actions, should be treated as items requiring response rather than just information.\n", - "\n", - "After providing this feedback, we'll check the triage preferences memory to see how this \"override\" affects future classifications. We expect to see security notifications with action requests moved into the \"worth responding to\" section." - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "510235cd", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user deciding to respond with feedback...\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'write_email', 'args': {'to': 'it@company.com', 'subject': 'Re: Critical Security Update', 'content': 'Dear IT Department,\\n\\nThank you for the notification regarding the upcoming authentication system update. I acknowledge receipt of this important notice and will ensure that I am logged out of all systems before the maintenance window begins on Saturday from 2am to 4am.\\n\\nBest regards,\\nLance'}}\n", - "\n", - "Checking memory after responding with feedback to Notify:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"Emails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n- Important security notifications from IT Department requiring acknowledgment or confirmation of action\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\"}\n" - ] - } - ], - "source": [ - "print(\"\\nSimulating user deciding to respond with feedback...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"We should acknowledge receipt of this important notice and confirm that we'll be logged out before the maintenance window.\"}]), config=thread_config_7):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after responding with feedback to Notify\n", - "print(\"\\nChecking memory after responding with feedback to Notify:\")\n", - "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "0c4b1139", - "metadata": {}, - "source": [ - "The memory update after our decision to respond to a security notification shows a remarkable triage preference refinement:\n", - "\n", - "1. The system has added a new bullet to the \"Emails that are worth responding to\" section:\n", - " ```\n", - " \"Important security notifications from IT Department requiring acknowledgment or confirmation of action\"\n", - " ```\n", - "\n", - "2. This update demonstrates:\n", - " - Precision: It specifically identifies \"security notifications\" (not all IT emails)\n", - " - Source awareness: It's from the \"IT Department\" specifically\n", - " - Action trigger: It identifies notifications \"requiring acknowledgment or confirmation\"\n", - " - Category reorganization: It moves this from \"notify\" to \"respond\" without removing notifications entirely\n", - "\n", - "The system didn't just record our specific override - it analyzed the *reason* we might want to respond (acknowledging required action) and created a generalizable rule. This would ensure that future similar security notifications requesting specific user actions would be correctly classified as needing response." - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "85fa053f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user accepting the write_email tool call...\n", - "\n", - "Checking memory after accepting write_email for Notify:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"Emails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n- Important security notifications from IT Department requiring acknowledgment or confirmation of action\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\"}\n", - "=======================================\n", - "\n", - "\n", - "--- response_preferences ---\n", - "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", - "=======================================\n", - "\n", - "\n", - "--- cal_preferences ---\n", - "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", - "=======================================\n", - "\n", - "\n", - "--- background ---\n", - "No memory found\n", - "=======================================\n", - "\n" - ] - } - ], - "source": [ - "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_7):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after accepting write_email for Notify\n", - "print(\"\\nChecking memory after accepting write_email for Notify:\")\n", - "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "4878373b", - "metadata": {}, - "source": [ - "Look at the full message history." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "0501ff69", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Email to notify user about: \n", - "\n", - "**Subject**: Critical Security Update\n", - "**From**: IT Department \n", - "**To**: Team Members \n", - "\n", - "Dear Team,\n", - "\n", - "This is an important security notification. We will be updating our authentication system this weekend. During the update window (Saturday 2am-4am), you will not be able to access company resources.\n", - "\n", - "Please ensure you log out of all systems before the maintenance window.\n", - "\n", - "Regards,\n", - "IT Department\n", - "\n", - "---\n", - "\n", - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "User wants to reply to the email. Use this feedback to respond: We should acknowledge receipt of this important notice and confirm that we'll be logged out before the maintenance window.\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " write_email (call_8tZq4eSFWr2WE01sPgEfgFr0)\n", - " Call ID: call_8tZq4eSFWr2WE01sPgEfgFr0\n", - " Args:\n", - " to: it@company.com\n", - " subject: Re: Critical Security Update\n", - " content: Dear IT Department,\n", - "\n", - "Thank you for the notification regarding the upcoming authentication system update. I acknowledge receipt of this important notice and will ensure that I am logged out of all systems before the maintenance window begins on Saturday from 2am to 4am.\n", - "\n", - "Best regards,\n", - "Lance\n", - "=================================\u001b[1m Tool Message \u001b[0m=================================\n", - "\n", - "Email sent to it@company.com with subject 'Re: Critical Security Update' and content: Dear IT Department,\n", - "\n", - "Thank you for the notification regarding the upcoming authentication system update. I acknowledge receipt of this important notice and will ensure that I am logged out of all systems before the maintenance window begins on Saturday from 2am to 4am.\n", - "\n", - "Best regards,\n", - "Lance\n", - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "Tool Calls:\n", - " Done (call_or3Ir137SK0RmLgs2qFrMcWB)\n", - " Call ID: call_or3Ir137SK0RmLgs2qFrMcWB\n", - " Args:\n", - " done: True\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_7)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "c8e48281", - "metadata": {}, - "source": [ - "## Test Case for Notify + Ignore\n", - "\n", - "Our final test explores the opposite pattern - when a user decides notifications don't even warrant being shown:\n", - "\n", - "1. The system classifies a company picnic announcement as a notification\n", - "2. The user decides this doesn't even warrant notification status\n", - "3. This creates a signal to further refine the classification boundaries\n", - "\n", - "By ignoring certain types of notifications, users can teach the system which information is truly important to them versus which can be filtered out entirely. This completes the full spectrum of classification refinement." - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "74e1fe6f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running the graph until the first interrupt...\n", - "🔔 Classification: NOTIFY - This email contains important information\n", - "\n", - "INTERRUPT OBJECT:\n", - "Action Request: {'action': 'Email Assistant: notify', 'args': {}}\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n" - ] - } - ], - "source": [ - "# Notify - Important FYI Email\n", - "email_input_notify = {\n", - " \"to\": \"Team Members \",\n", - " \"author\": \"HR Department \",\n", - " \"subject\": \"Company Picnic Next Month\",\n", - " \"email_thread\": \"Dear Team,\\n\\nWe're planning the annual company picnic for next month. The tentative date is Saturday, June 15th from noon to 4pm at Central Park. There will be food, games, and activities for families.\\n\\nMore details will follow in the coming weeks.\\n\\nRegards,\\nHR Department\"\n", - "}\n", - "\n", - "# Compile the graph with new thread\n", - "checkpointer = MemorySaver()\n", - "store = InMemoryStore()\n", - "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", - "thread_id_8 = uuid.uuid4()\n", - "thread_config_8 = {\"configurable\": {\"thread_id\": thread_id_8}}\n", - "\n", - "# Run the graph until the first interrupt - should be classified as \"notify\"\n", - "print(\"Running the graph until the first interrupt...\")\n", - "for chunk in graph.stream({\"email_input\": email_input_notify}, config=thread_config_8):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after first interrupt for Notify + Ignore\n", - "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "911a8c79", - "metadata": {}, - "source": [ - "Now simulate user deciding to ignore the notification.\n", - "\n", - "In this final test, we explore the downgrade pathway from \"notify\" to \"ignore\":\n", - "\n", - "1. The system initially classifies a company picnic announcement as \"notify\"\n", - "2. We decide we don't even want to be notified about these social events\n", - "3. By choosing \"ignore,\" we signal this entire category should be filtered out\n", - "\n", - "This represents another important learning signal - that certain types of company announcements (particularly social events) shouldn't even be surfaced as notifications, further refining our triage preferences.\n", - "\n", - "After ignoring, we'll check the triage preferences memory for the final time to see how this override affects future classifications. We expect to see company social events moved from the \"notify\" section to the \"not worth responding to\" section." - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "088e7bc7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Simulating user deciding to ignore the notification...\n", - "\n", - "Checking memory after ignoring Notify:\n", - "\n", - "======= CURRENT MEMORY CONTENT =======\n", - "\n", - "--- triage_preferences ---\n", - "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n- Company social event announcements (e.g., company picnic)\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n" - ] - } - ], - "source": [ - "print(\"\\nSimulating user deciding to ignore the notification...\")\n", - "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_8):\n", - " # Inspect interrupt object if present\n", - " if '__interrupt__' in chunk:\n", - " Interrupt_Object = chunk['__interrupt__'][0]\n", - " print(\"\\nINTERRUPT OBJECT:\")\n", - " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", - "\n", - "# Check memory after ignoring Notify\n", - "print(\"\\nChecking memory after ignoring Notify:\")\n", - "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))" - ] - }, - { - "cell_type": "markdown", - "id": "05136d96", - "metadata": {}, - "source": [ - "The final memory update completes our triage preference refinement journey:\n", - "\n", - "1. The system has added \"Company social event announcements (e.g., company picnic)\" to the \"Emails not worth responding to\" section\n", - "\n", - "2. This demonstrates:\n", - " - Content-based categorization: It identifies \"social event announcements\" specifically\n", - " - Example inclusion: It provides an example \"(company picnic)\" for clarity\n", - " - Category downgrade: It moves this from \"notify\" to \"not worth responding to\"\n", - " - Structural preservation: It maintains the original memory organization\n", - "\n", - "Through our series of tests, we've now explored the full spectrum of triage learning:\n", - "- Upgrading notifications to responses (security updates)\n", - "- Downgrading notifications to ignores (company social events)\n", - "- Downgrading responses to ignores (personal social invitations, routine calls)\n", - "- Refining response preferences (meeting duration, timing, email style)\n", - "\n", - "Each interaction has produced targeted, intelligent memory updates that maintain the overall structure while adding new insights. This continuous refinement creates an increasingly personalized assistant that learns and adapts to user preferences over time." - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "027f3f4e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "Email to notify user about: \n", - "\n", - "**Subject**: Company Picnic Next Month\n", - "**From**: HR Department \n", - "**To**: Team Members \n", - "\n", - "Dear Team,\n", - "\n", - "We're planning the annual company picnic for next month. The tentative date is Saturday, June 15th from noon to 4pm at Central Park. There will be food, games, and activities for families.\n", - "\n", - "More details will follow in the coming weeks.\n", - "\n", - "Regards,\n", - "HR Department\n", - "\n", - "---\n", - "\n", - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "The user decided to ignore the email even though it was classified as notify. Update triage preferences to capture this.\n" - ] - } - ], - "source": [ - "state = graph.get_state(thread_config_8)\n", - "for m in state.values['messages']:\n", - " m.pretty_print()" - ] - }, - { - "cell_type": "markdown", - "id": "b075a3ea", - "metadata": {}, - "source": [ - "## Testing with Local Deployment\n", - "\n", - "You can find this graph with memory integration in the `src/email_assistant` directory:\n", - "\n", - "* `src/email_assistant/email_assistant_hitl_memory.py`\n", - "\n", - "Testing this locally gives you the full experience of a memory-enabled HITL system:\n", - "\n", - "1. **Start the local server**: Run `langgraph dev` to launch the agent locally\n", - "2. **Connect Agent Inbox**: Use the graph URL from the `langgraph.json` file\n", - "3. **Submit test emails**: Try different email types to see classification in action\n", - "4. **Provide various feedback types**: Try accepting, editing, ignoring, and responding\n", - "5. **Observe memory evolution**: Check the Memory tab in LangGraph Studio to see changes\n", - "\n", - "![inbox](img/agent-inbox-edit.png)\n", - "\n", - "The Memory tab in LangGraph Studio offers a real-time view of how your preferences are being captured and updated with each interaction:\n", - "\n", - "![studio-img](img/memory-studio.png)\n", - "\n", - "Through continued use, the system becomes increasingly personalized:\n", - "- It learns which emails you want to respond to, be notified about, or ignore\n", - "- It adapts to your communication style preferences\n", - "- It remembers your scheduling preferences\n", - "- It refines its understanding with each interaction\n", - "\n", - "This combination of HITL and memory creates a system that balances automation with control - handling routine tasks automatically while learning from your feedback to become more aligned with your preferences over time." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f60fa538", - "metadata": {}, - "outputs": [], - "source": [ - "! langgraph dev" - ] - }, - { - "cell_type": "markdown", - "id": "43b6319d", - "metadata": {}, - "source": [ - "![inbox](img/agent-inbox-edit.png)\n", - "\n", - "As you provide feedback or edit replies, you can see memories accumulate in the `memory` tab in LangGraph Studio.\n", - "\n", - "![studio-img](img/memory-studio.png)\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "f2ad7580", - "metadata": {}, - "source": [ - "### Use LangMem to add background memory! \n", - "\n", - "Add this to the `llm_call` node:\n", - "\n", - "```\n", - " # Search for existing background memory\n", - " # TODO: Here, semantic search over a facts collection of background information from emails could be added. \n", - " # background = get_memory(store, (\"email_assistant\", \"background\"), default_background)\n", - "```\n", - "\n", - "Add this to the interrupt handler node:\n", - "\n", - "```\n", - "elif tool_call[\"name\"] == \"Question\":\n", - " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", - " result.append({\"role\": \"tool\", \"content\": f\"User answered the question, which can we can use for any follow up actions. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", - " # TODO: Here, we could update the background information with the user's answer. \n", - " # update_memory(store, (\"email_assistant\", \"background\"), [{\n", - " # \"role\": \"user\",\n", - " # \"content\": f\"Update background information based upon these messages:\"\n", - " # }] + state[\"messages\"] + result)\n", - "```\n", - "\n", - "Consider using LangMem: \n", - "https://langchain-ai.github.io/langmem/" - ] - } - ], - "metadata": { - "jupytext": { - "cell_metadata_filter": "-all", - "main_language": "python", - "notebook_metadata_filter": "-all" - }, - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} +{"cells": [{"cell_type": "markdown", "id": "d047044f", "metadata": {}, "source": ["# Test Cases for Memory"]}, {"cell_type": "code", "execution_count": 1, "id": "9e2c818a", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["/Users/rlm/Desktop/Code/interrupt_workshop\n"]}], "source": ["%load_ext autoreload\n", "%autoreload 2\n", "\n", "import uuid\n", "from langgraph.store.memory import InMemoryStore\n", "from langgraph.checkpoint.memory import MemorySaver\n", "from email_assistant.email_assistant_hitl_memory import overall_workflow"]}, {"cell_type": "markdown", "id": "397114bf", "metadata": {}, "source": ["## Accept `write_email` and `schedule_meeting`\n", "\n", "Our first test examines what happens when a user accepts the agent's actions without modification. This baseline case helps us understand how the system behaves when no feedback is provided:\n", "\n", "1. We'll use the same tax planning email from our previous tests\n", "2. The system will classify it as \"RESPOND\" and propose scheduling a meeting\n", "3. We'll accept the meeting schedule without changes\n", "4. The agent will generate an email confirming the meeting\n", "5. We'll accept the email without changes\n", "\n", "This test demonstrates the default behavior of our memory-enabled system. When a user simply accepts proposed actions, we expect minimal or no memory updates since there's no explicit feedback to learn from. However, the system will still leverage existing memory (if any) when generating its responses."]}, {"cell_type": "code", "execution_count": 2, "id": "8be43b59", "metadata": {}, "outputs": [], "source": ["from langgraph.types import Command\n", "\n", "# Helper function to display memory content\n", "def display_memory_content(store, namespace=None):\n", " # Display current memory content for all namespaces\n", " print(\"\\n======= CURRENT MEMORY CONTENT =======\")\n", " if namespace:\n", " memory = store.get(namespace, \"user_preferences\")\n", " if memory:\n", " print(f\"\\n--- {namespace[1]} ---\")\n", " print({\"preferences\": memory.value})\n", " else:\n", " print(f\"\\n--- {namespace[1]} ---\")\n", " print(\"No memory found\")\n", " else:\n", " for namespace in [\n", " (\"email_assistant\", \"triage_preferences\"),\n", " (\"email_assistant\", \"response_preferences\"),\n", " (\"email_assistant\", \"cal_preferences\"),\n", " (\"email_assistant\", \"background\")\n", " ]:\n", " memory = store.get(namespace, \"user_preferences\")\n", " if memory:\n", " print(f\"\\n--- {namespace[1]} ---\")\n", " print({\"preferences\": memory.value})\n", " else:\n", " print(f\"\\n--- {namespace[1]} ---\")\n", " print(\"No memory found\")\n", " print(\"=======================================\\n\")"]}, {"cell_type": "code", "execution_count": 25, "id": "649cee4f", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", "=======================================\n", "\n", "\n", "--- response_preferences ---\n", "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", "=======================================\n", "\n", "\n", "--- cal_preferences ---\n", "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", "=======================================\n", "\n", "\n", "--- background ---\n", "No memory found\n", "=======================================\n", "\n"]}], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "store = InMemoryStore()\n", "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", "thread_id_1 = uuid.uuid4()\n", "thread_config_1 = {\"configurable\": {\"thread_id\": thread_id_1}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_1):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after first interrupt\n", "display_memory_content(store)"]}, {"cell_type": "markdown", "id": "878e199e", "metadata": {}, "source": ["Accept the schedule_meeting tool call\n", "\n", "As we examine the initial `schedule_meeting` proposal, note how the system uses existing memory to inform its decisions:\n", "\n", "1. The default calendar preferences show a preference for 30-minute meetings, though the email requests 45 minutes\n", "2. The agent still proposes a 45-minute meeting, respecting the sender's specific request\n", "3. We accept this proposal without modification to see if simple acceptance triggers any memory updates\n", "\n", "After running this step, we'll check the memory contents to confirm whether acceptance alone leads to memory updates. Simple acceptance represents the baseline user experience - the system works as intended without requiring adjustments."]}, {"cell_type": "code", "execution_count": 26, "id": "9589423b", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user accepting the schedule_meeting tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out. I am available on Tuesday, April 22nd at 2:00 PM for a 45-minute call to discuss tax planning strategies. I have scheduled the meeting accordingly.\\n\\nLooking forward to your suggestions and our discussion.\\n\\nBest regards,\\nLance'}}\n"]}], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")"]}, {"cell_type": "markdown", "id": "e6b80f99", "metadata": {}, "source": ["Accept the write_email tool call\n", "\n", "Now we'll accept the email draft that confirms the meeting scheduling:\n", "\n", "1. The email draft is generated with knowledge of our calendar preferences\n", "2. It includes details about the meeting time, duration, and purpose\n", "3. We'll accept it without changes to complete the baseline test case\n", "\n", "After accepting, we'll check all memory stores to see if any updates occurred. As expected, simply accepting the agent's proposals doesn't provide strong learning signals - there's no clear feedback about what the user likes or dislikes about the agent's approach.\n", "\n", "The trace link shows the complete workflow execution, where we can see that the memory is used in the LLM call for response generation, but no memory updates occur, which is the expected behavior for simple acceptances."]}, {"cell_type": "code", "execution_count": 27, "id": "12035cf6", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user accepting the write_email tool call...\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_02WPNb2qpXXVx43d3Z0VzWqv)\n", " Call ID: call_02WPNb2qpXXVx43d3Z0VzWqv\n", " Args:\n", " done: True\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", "=======================================\n", "\n", "\n", "--- response_preferences ---\n", "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", "=======================================\n", "\n", "\n", "--- cal_preferences ---\n", "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", "=======================================\n", "\n", "\n", "--- background ---\n", "No memory found\n", "=======================================\n", "\n"]}], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_1):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after accepting the write_email tool call\n", "display_memory_content(store)"]}, {"cell_type": "markdown", "id": "fcbc178d", "metadata": {}, "source": ["We can look at the full messages, and the trace: \n", "\n", "https://smith.langchain.com/public/86ff6474-29fe-452e-8829-b05a91b458eb/r\n", "\n", "You'll notice that memory is used in the LLM call to respond. \n", "\n", "But the memory store *not* updated, because we haven't added any feedback via HITL."]}, {"cell_type": "code", "execution_count": 28, "id": "10ce8197", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Respond to the email: \n", "\n", "**Subject**: Tax season let's schedule call\n", "**From**: Project Manager \n", "**To**: Lance Martin \n", "\n", "Lance,\n", "\n", "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", "\n", "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", "\n", "Regards,\n", "Project Manager\n", "\n", "---\n", "\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " check_calendar_availability (call_6ZbPjmSN8h1bfl3tDCx5ssoj)\n", " Call ID: call_6ZbPjmSN8h1bfl3tDCx5ssoj\n", " Args:\n", " day: 2025-04-22\n", " check_calendar_availability (call_8M30JVmN7Wj2VlIcDqBE1995)\n", " Call ID: call_8M30JVmN7Wj2VlIcDqBE1995\n", " Args:\n", " day: 2025-04-24\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " schedule_meeting (call_TChCTjTsVim5Zj8lnTPUtHs6)\n", " Call ID: call_TChCTjTsVim5Zj8lnTPUtHs6\n", " Args:\n", " attendees: ['pm@client.com', 'lance@company.com']\n", " subject: Tax Planning Strategies Discussion\n", " duration_minutes: 45\n", " preferred_day: 2025-04-22\n", " start_time: 14\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Meeting 'Tax Planning Strategies Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 45 minutes with 2 attendees\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_KMR1rdut5FcbS82k3ckvCHm7)\n", " Call ID: call_KMR1rdut5FcbS82k3ckvCHm7\n", " Args:\n", " to: pm@client.com\n", " subject: Re: Tax season let's schedule call\n", " content: Hello,\n", "\n", "Thank you for reaching out. I am available on Tuesday, April 22nd at 2:00 PM for a 45-minute call to discuss tax planning strategies. I have scheduled the meeting accordingly.\n", "\n", "Looking forward to your suggestions and our discussion.\n", "\n", "Best regards,\n", "Lance\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Email sent to pm@client.com with subject 'Re: Tax season let's schedule call' and content: Hello,\n", "\n", "Thank you for reaching out. I am available on Tuesday, April 22nd at 2:00 PM for a 45-minute call to discuss tax planning strategies. I have scheduled the meeting accordingly.\n", "\n", "Looking forward to your suggestions and our discussion.\n", "\n", "Best regards,\n", "Lance\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_02WPNb2qpXXVx43d3Z0VzWqv)\n", " Call ID: call_02WPNb2qpXXVx43d3Z0VzWqv\n", " Args:\n", " done: True\n"]}], "source": ["state = graph.get_state(thread_config_1)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "58201a21", "metadata": {}, "source": ["## Edit `write_email` and `schedule_meeting`\n", "\n", "This test explores how the system learns from direct edits to its proposed actions. When users modify the agent's suggestions, it creates clear, specific learning signals about their preferences:\n", "\n", "1. We'll use the same tax planning email as before\n", "2. When the agent proposes a 45-minute meeting, we'll edit it to:\n", " - Change the duration to 30 minutes (matching our stored preference)\n", " - Make the subject line more concise\n", "3. When the agent drafts an email, we'll edit it to be:\n", " - Shorter and less formal\n", " - Structured differently\n", "\n", "Edits provide the most explicit feedback about user preferences, letting the system learn exactly what changes are desired. We expect to see specific, targeted updates to our memory stores that reflect these edits."]}, {"cell_type": "code", "execution_count": 29, "id": "ac260423", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- cal_preferences ---\n", "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n"]}], "source": ["# Same email as before\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph with new thread\n", "checkpointer = MemorySaver()\n", "store = InMemoryStore()\n", "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", "thread_id_2 = uuid.uuid4()\n", "thread_config_2 = {\"configurable\": {\"thread_id\": thread_id_2}}\n", "\n", "# Run the graph until the first interrupt - will be classified as \"respond\" and the agent will create a write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_2):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after first interrupt\n", "display_memory_content(store,(\"email_assistant\", \"cal_preferences\"))"]}, {"cell_type": "markdown", "id": "5d73ba71", "metadata": {}, "source": ["Edit the `schedule_meeting` tool call\n", "\n", "When we edit the meeting proposal, we're providing direct, explicit feedback about our preferences. This creates a significant learning opportunity for the system:\n", "\n", "1. The agent initially proposes a 45-minute meeting (the duration requested in the email)\n", "2. We edit it to 30 minutes and simplify the subject from \"Tax Planning Strategies Discussion\" to \"Tax Planning Discussion\"\n", "3. This creates clear, specific feedback about our time preferences and naming conventions\n", "\n", "After the edit, we'll check the calendar preferences memory store to see how it's updated. The memory update should capture both:\n", "- Our preference for shorter 30-minute meetings\n", "- Our preference for more concise meeting subjects\n", "\n", "The trace reveals the precise memory update logic, showing how the system analyzes the difference between its proposal and our edits to extract meaningful patterns and preferences. We can see the detailed justification for each memory update, ensuring transparency in the learning process."]}, {"cell_type": "code", "execution_count": 30, "id": "af760977", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user editing the schedule_meeting tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out regarding tax planning strategies for this year. I have scheduled a 30-minute call for us on Tuesday, April 22nd at 2:00 PM to discuss your suggestions and review potential savings opportunities. If you need a longer duration, please let me know and I can adjust the meeting accordingly.\\n\\nLooking forward to our discussion.\\n\\nBest regards,\\nLance'}}\n", "\n", "Checking memory after editing schedule_meeting:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- cal_preferences ---\n", "{'preferences': \"30 minute meetings are preferred, but 15 minute meetings are also acceptable. User prefers 30 minute meetings over longer durations such as 45 minutes when possible. User prefers concise meeting subjects (e.g., 'Tax Planning Discussion' instead of 'Tax Planning Strategies Discussion').\"}\n"]}], "source": ["# Now simulate user editing the schedule_meeting tool call\n", "print(\"\\nSimulating user editing the schedule_meeting tool call...\")\n", "edited_schedule_args = {\n", " \"attendees\": [\"pm@client.com\", \"lance@company.com\"],\n", " \"subject\": \"Tax Planning Discussion\",\n", " \"duration_minutes\": 30, # Changed from 45 to 30\n", " \"preferred_day\": \"2025-04-22\",\n", " \"start_time\": 14 # 2pm\n", "}\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_schedule_args}}]), config=thread_config_2):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after editing schedule_meeting\n", "print(\"\\nChecking memory after editing schedule_meeting:\")\n", "display_memory_content(store,(\"email_assistant\", \"cal_preferences\"))"]}, {"cell_type": "markdown", "id": "0dfc585a", "metadata": {}, "source": ["Looking at the memory after editing the calendar invitation, we can see that it's been updated with remarkably specific preferences:\n", "\n", "1. The system has identified that we prefer 30-minute meetings over longer durations\n", "2. It's also captured our preference for concise meeting subjects\n", "\n", "What's particularly impressive about this memory update is:\n", "- It doesn't just record our specific edit, but generalizes to a broader preference pattern\n", "- It preserves all existing memory content while adding the new information\n", "- It extracts multiple preference signals from a single edit interaction\n", "\n", "Now, let's edit the email draft to see how the system captures different types of communication preferences:"]}, {"cell_type": "code", "execution_count": null, "id": "81a1fa37", "metadata": {}, "outputs": [], "source": ["display_memory_content(store,(\"email_assistant\", \"response_preferences\"))\n", "# Now simulate user editing the write_email tool call\n", "print(\"\\nSimulating user editing the write_email tool call...\")\n", "edited_email_args = {\n", " \"to\": \"pm@client.com\",\n", " \"subject\": \"Re: Tax season let's schedule call\",\n", " \"content\": \"Thanks! I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\\n\\nBest regards,\\nLance Martin\"\n", "}\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"edit\", \"args\": {\"args\": edited_email_args}}]), config=thread_config_2):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after editing write_email\n", "print(\"\\nChecking memory after editing write_email:\")\n", "display_memory_content(store,(\"email_assistant\", \"response_preferences\"))"]}, {"cell_type": "markdown", "id": "3ffbd5f9", "metadata": {}, "source": ["Our email edit reveals even more sophisticated learning capabilities:\n", "\n", "1. We've dramatically shortened and simplified the email content\n", "2. We've changed the tone to be more casual\n", "3. We've added a question asking for confirmation rather than assuming the time works\n", "4. We've slightly altered the meeting details (day and time)\n", "\n", "Looking at the updated memory, we can see that the system has extracted a key insight about our communication style:\n", "\n", "```\n", "When scheduling a meeting, ask the recipient to confirm if the proposed time works for them, rather than assuming and stating the meeting is already scheduled.\n", "```\n", "\n", "This demonstrates the system's ability to:\n", "- Analyze our edit not just at a superficial level, but to understand intent\n", "- Extract generalizable principles from specific examples\n", "- Preserve all existing guidance while adding new insights\n", "- Maintain the organization and structure of the memory\n", "\n", "These targeted, high-quality memory updates will improve all future interactions without requiring repeated corrections."]}, {"cell_type": "code", "execution_count": 32, "id": "8ad818d6", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Respond to the email: \n", "\n", "**Subject**: Tax season let's schedule call\n", "**From**: Project Manager \n", "**To**: Lance Martin \n", "\n", "Lance,\n", "\n", "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", "\n", "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", "\n", "Regards,\n", "Project Manager\n", "\n", "---\n", "\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " check_calendar_availability (call_3lgcL8JNtxaakOR8NqZyVH2n)\n", " Call ID: call_3lgcL8JNtxaakOR8NqZyVH2n\n", " Args:\n", " day: 2025-04-22\n", " check_calendar_availability (call_Ru9AMMrCf7wlWPcS9gV7ohCq)\n", " Call ID: call_Ru9AMMrCf7wlWPcS9gV7ohCq\n", " Args:\n", " day: 2025-04-24\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " schedule_meeting (call_C6OAozNqxP5JDw42Y0T0m4Fg)\n", " Call ID: call_C6OAozNqxP5JDw42Y0T0m4Fg\n", " Args:\n", " attendees: ['pm@client.com', 'lance@company.com']\n", " subject: Tax Planning Discussion\n", " duration_minutes: 30\n", " preferred_day: 2025-04-22\n", " start_time: 14\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Meeting 'Tax Planning Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 30 minutes with 2 attendees\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_mvJ8FX2j8spcrtJnpU4dgmLC)\n", " Call ID: call_mvJ8FX2j8spcrtJnpU4dgmLC\n", " Args:\n", " to: pm@client.com\n", " subject: Re: Tax season let's schedule call\n", " content: Thanks! I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\n", "\n", "Best regards,\n", "Lance Martin\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Email sent to pm@client.com with subject 'Re: Tax season let's schedule call' and content: Thanks! I scheduled a 30-minute call next Thursday at 3:00 PM. Would that work for you?\n", "\n", "Best regards,\n", "Lance Martin\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_JRSWVKubpzaxPrU9deWet6YL)\n", " Call ID: call_JRSWVKubpzaxPrU9deWet6YL\n", " Args:\n", " done: True\n"]}], "source": ["state = graph.get_state(thread_config_2)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "0e14918e", "metadata": {}, "source": ["## Ignore `write_email`, `schedule_meeting`, and `question`\n", "\n", "This test explores how the system learns from rejection. When users ignore (reject) the agent's suggestions, it creates a strong signal about content they don't want to handle:\n", "\n", "1. We'll first test ignoring a `schedule_meeting` request entirely\n", "2. Then we'll test accepting a meeting but ignoring the follow-up email\n", "3. Finally, we'll test ignoring a `question` for a different email context\n", "\n", "These rejection signals help the system learn what types of emails and actions a user prefers not to deal with, leading to more appropriate triage decisions in the future. We expect significant updates to the triage preferences memory after each ignore action."]}, {"cell_type": "code", "execution_count": 9, "id": "0d015c3f", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n"]}], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "store = InMemoryStore()\n", "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", "thread_id_3 = uuid.uuid4()\n", "thread_config_3 = {\"configurable\": {\"thread_id\": thread_id_3}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_3):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after first interrupt\n", "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))"]}, {"cell_type": "markdown", "id": "c782e711", "metadata": {}, "source": ["Ignore the `schedule_meeting` tool call\n", "\n", "When we ignore a meeting scheduling request, we're signaling that we don't want to handle this type of email through the assistant. This creates a powerful learning opportunity about our triage preferences:\n", "\n", "1. The assistant initially classified the tax planning email as \"RESPOND\"\n", "2. But by ignoring the scheduling request, we indicate we'd prefer not to handle this type of email\n", "3. The system needs to update its triage classification preferences to reflect this rejection\n", "\n", "After ignoring the request, we'll check the triage preferences memory to see how the rejection affected the system's understanding. The memory update should show a new pattern added to the \"not worth responding to\" section, specifically about tax planning calls or similar recurring discussions.\n", "\n", "The trace shows how the system processes this rejection, identifies the pattern, and updates the memory with specific justification for why this type of email should be classified differently in the future."]}, {"cell_type": "code", "execution_count": 10, "id": "16c4d83b", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user ignoring the schedule_meeting tool call...\n", "\n", "Checking memory after ignoring first tool call:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"Emails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n- Client requests to schedule tax planning calls (unless explicitly instructed otherwise)\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n"]}], "source": ["print(f\"\\nSimulating user ignoring the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_3):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after ignoring first tool call\n", "print(\"\\nChecking memory after ignoring first tool call:\")\n", "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))"]}, {"cell_type": "markdown", "id": "67462024", "metadata": {}, "source": ["Looking at the memory update after ignoring the schedule_meeting tool call, we can see a remarkable triage preference update:\n", "\n", "1. The system has added \"Client requests to schedule tax planning calls\" to the \"emails not worth responding to\" section\n", "2. It correctly identified the general pattern (scheduling routine calls) rather than overfitting to just this specific instance\n", "3. It included the parenthetical note \"(unless explicitly instructed otherwise)\" to maintain flexibility\n", "\n", "This update demonstrates the system's ability to:\n", "- Infer general patterns from specific instances of rejection\n", "- Update the triage filters that determine initial email classification\n", "- Preserve the organization and priority of existing preferences\n", "- Include appropriate qualifiers to avoid overly rigid rules\n", "\n", "Next, let's see what happens when we accept the meeting but reject the email draft:"]}, {"cell_type": "code", "execution_count": 11, "id": "b869485a", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", "\n", "Simulating user accepting the schedule_meeting tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out. I am available on Tuesday, April 22nd at 2:00 PM for a 45-minute call to discuss tax planning strategies. I have scheduled the meeting accordingly.\\n\\nLooking forward to your suggestions and the discussion.\\n\\nBest regards,\\nLance'}}\n", "\n", "Simulating user ignoring the write_email tool call...\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "User ignored this email draft. Ignore this email and end the workflow.\n", "\n", "Checking memory after ignoring second tool call:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n- Client requests to schedule routine calls (such as tax planning or similar recurring discussions)\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n"]}], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "store = InMemoryStore()\n", "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", "thread_id_3 = uuid.uuid4()\n", "thread_config_3 = {\"configurable\": {\"thread_id\": thread_id_3}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_3):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_3):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "print(f\"\\nSimulating user ignoring the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_3):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after ignoring second tool call\n", "print(\"\\nChecking memory after ignoring second tool call:\")\n", "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))"]}, {"cell_type": "markdown", "id": "694db9f5", "metadata": {}, "source": ["When we accept the meeting but ignore the email draft, we're sending a more nuanced signal about our preferences:\n", "\n", "1. We're willing to schedule the meeting (accepting the first tool call)\n", "2. But we don't want to send a confirmation email about it (ignoring the second tool call)\n", "\n", "Looking at the memory update, we see another evolution of our triage preferences:\n", "\n", "```\n", "\"Client requests to schedule routine calls (such as tax planning or similar recurring discussions)\"\n", "```\n", "\n", "The system has:\n", "- Broadened the pattern from just \"tax planning calls\" to \"routine calls\" generally\n", "- Added examples in parentheses for clarity\n", "- Positioned this in the \"not worth responding to\" section\n", "- Maintained all other existing preferences\n", "\n", "This demonstrates how the memory evolves over multiple interactions, becoming increasingly accurate and generalizable with each additional data point. The system is continuously refining its understanding based on our feedback patterns."]}, {"cell_type": "code", "execution_count": 13, "id": "272bb9ed", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Respond to the email: \n", "\n", "**Subject**: Tax season let's schedule call\n", "**From**: Project Manager \n", "**To**: Lance Martin \n", "\n", "Lance,\n", "\n", "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", "\n", "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", "\n", "Regards,\n", "Project Manager\n", "\n", "---\n", "\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " check_calendar_availability (call_l9Jn2ghL4T0aPuMhjpXf36Ga)\n", " Call ID: call_l9Jn2ghL4T0aPuMhjpXf36Ga\n", " Args:\n", " day: 2025-04-22\n", " check_calendar_availability (call_8eOXPjqVpaIRDkABD80XRYUU)\n", " Call ID: call_8eOXPjqVpaIRDkABD80XRYUU\n", " Args:\n", " day: 2025-04-24\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " schedule_meeting (call_5L9UBPXPfnqhuhxlBvHSQncm)\n", " Call ID: call_5L9UBPXPfnqhuhxlBvHSQncm\n", " Args:\n", " attendees: ['pm@client.com', 'lance@company.com']\n", " subject: Tax Planning Strategies Discussion\n", " duration_minutes: 45\n", " preferred_day: 2025-04-22\n", " start_time: 14\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Meeting 'Tax Planning Strategies Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 45 minutes with 2 attendees\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_635xNQnAYz2BBX1Y5NiGtuE6)\n", " Call ID: call_635xNQnAYz2BBX1Y5NiGtuE6\n", " Args:\n", " to: pm@client.com\n", " subject: Re: Tax season let's schedule call\n", " content: Hello,\n", "\n", "Thank you for reaching out. I am available on Tuesday, April 22nd at 2:00 PM for a 45-minute call to discuss tax planning strategies. I have scheduled the meeting accordingly.\n", "\n", "Looking forward to your suggestions and the discussion.\n", "\n", "Best regards,\n", "Lance\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "User ignored this email draft. Ignore this email and end the workflow.\n"]}], "source": ["state = graph.get_state(thread_config_3)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "597930b3", "metadata": {}, "source": ["Now let's try an email that calls the `Question` tool\n", "\n", "For our third rejection test, we'll use a different type of email - a casual social invitation about brunch. This gives us insight into how the system learns about personal vs. professional communication preferences:\n", "\n", "1. The system classifies this personal invitation as \"RESPOND\"\n", "2. Rather than answering directly, it uses the Question tool to ask for clarification\n", "3. We'll ignore this question, indicating we don't want to handle these types of emails through the assistant\n", "\n", "This test shows how ignoring questions (not just actions) can also update our triage preferences. By rejecting the clarification attempt, we signal that this entire category of email doesn't warrant response through the assistant."]}, {"cell_type": "code", "execution_count": 14, "id": "efb91337", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'partner@home.com', 'subject': 'Re: Meet Jim and Lisa for brunch in 3 weeks?', 'content': 'That sounds like a great idea! I checked my calendar for three weeks from now (Thursday, May 8th), and I\u2019m available at 9:00 AM, 2:00 PM, or 4:00 PM. Do any of those times work for you, Jim, and Lisa? The new place on 17th sounds perfect.\\n\\nLet me know what everyone prefers and I\u2019ll confirm the reservation.'}}\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n"]}], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Partner \",\n", " \"subject\": \"Meet Jim and Lisa for brunch in 3 weeks?\",\n", " \"email_thread\": \"Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "store = InMemoryStore()\n", "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", "thread_id_4 = uuid.uuid4()\n", "thread_config_4 = {\"configurable\": {\"thread_id\": thread_id_4}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_4):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after first interrupt for Question tool\n", "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))"]}, {"cell_type": "markdown", "id": "2bb6581a", "metadata": {}, "source": ["Ignore the `question` tool call\n", "\n", "When we ignore a question from the assistant about a personal social invitation, we're providing yet another type of feedback:\n", "\n", "1. The system initially tries to get clarification before responding\n", "2. By ignoring the question, we indicate we don't even want to engage with this type of email\n", "3. This suggests the entire category of social invitations should be handled differently\n", "\n", "After ignoring, we'll check the triage preferences again to see how they've evolved. We expect to see a new entry about social invitations added to the \"not worth responding to\" section.\n", "\n", "The memory update justification analyzes our rejection of the question about an informal social invitation and extracts a general pattern about our preference not to handle social invitations through the assistant."]}, {"cell_type": "code", "execution_count": 15, "id": "f227a298", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user ignoring the write_email tool call...\n", "\n", "Checking memory after ignoring Question tool:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"Emails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n- Social invitations from partner (e.g., brunch plans, casual meetups)\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n"]}], "source": ["print(f\"\\nSimulating user ignoring the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_4):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after ignoring Question tool\n", "print(\"\\nChecking memory after ignoring Question tool:\")\n", "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))"]}, {"cell_type": "markdown", "id": "20bea846", "metadata": {}, "source": ["Looking at the memory update after ignoring the question about brunch plans, we see another sophisticated triage preference update:\n", "\n", "```\n", "\"Social invitations from partner (e.g., brunch plans, casual meetups)\"\n", "```\n", "\n", "This demonstrates how the system:\n", "1. Correctly identifies personal social invitations as a distinct category\n", "2. Specifically notes they're from \"partner\" - showing it's learning to distinguish senders\n", "3. Provides examples to clarify the pattern\n", "4. Adds this to the \"not worth responding to\" section\n", "\n", "These three ignores have collectively taught the system quite a bit about what types of emails we prefer not to handle through the assistant:\n", "- Tax planning calls and routine client scheduling\n", "- Social invitations from partners\n", "- Each with appropriate specificity and generalizability\n", "\n", "In a real-world scenario, these learned preferences would ensure that similar future emails would be classified differently, saving the user time by automatically filtering out categories they've previously rejected."]}, {"cell_type": "code", "execution_count": 16, "id": "2ca65c1b", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Respond to the email: \n", "\n", "**Subject**: Meet Jim and Lisa for brunch in 3 weeks?\n", "**From**: Partner \n", "**To**: Lance Martin \n", "\n", "Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\n", "\n", "---\n", "\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " check_calendar_availability (call_5jTHskfgPCFQKakHl2dFxvsD)\n", " Call ID: call_5jTHskfgPCFQKakHl2dFxvsD\n", " Args:\n", " day: 2025-05-08\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-05-08: 9:00 AM, 2:00 PM, 4:00 PM\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_3tvSXyHFCWGk4yzbBYMCdEWz)\n", " Call ID: call_3tvSXyHFCWGk4yzbBYMCdEWz\n", " Args:\n", " to: partner@home.com\n", " subject: Re: Meet Jim and Lisa for brunch in 3 weeks?\n", " content: That sounds like a great idea! I checked my calendar for three weeks from now (Thursday, May 8th), and I\u2019m available at 9:00 AM, 2:00 PM, or 4:00 PM. Do any of those times work for you, Jim, and Lisa? The new place on 17th sounds perfect.\n", "\n", "Let me know what everyone prefers and I\u2019ll confirm the reservation.\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "User ignored this email draft. Ignore this email and end the workflow.\n"]}], "source": ["state = graph.get_state(thread_config_4)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "6d92a42b", "metadata": {}, "source": ["## Respond (with feedback) `write_email`, `schedule_meeting`, and `question`\n", "\n", "Our final test set explores the \"response\" feedback pattern - providing guidance without directly editing or accepting. This conversational feedback mechanism offers a middle ground between acceptance and editing:\n", "\n", "1. First, we'll test feedback for meeting scheduling by requesting:\n", " - Shorter duration (30 minutes instead of 45)\n", " - Afternoon meeting times (after 2pm)\n", " \n", "2. Next, we'll test feedback for email drafting by requesting:\n", " - Shorter, less formal language\n", " - A specific closing statement about looking forward to the meeting\n", " \n", "3. Finally, we'll test feedback for questions by providing:\n", " - A direct answer with additional context\n", " - Specific preferences (brunch location, time)\n", "\n", "This natural language feedback approach lets users guide the assistant without having to do the work themselves. We expect to see detailed memory updates that extract the general principles from our specific feedback."]}, {"cell_type": "code", "execution_count": 21, "id": "07676231", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 45, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- cal_preferences ---\n", "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n"]}], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Project Manager \",\n", " \"subject\": \"Tax season let's schedule call\",\n", " \"email_thread\": \"Lance,\\n\\nIt's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\\n\\nAre you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\\n\\nRegards,\\nProject Manager\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "store = InMemoryStore()\n", "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", "thread_id_5 = uuid.uuid4()\n", "thread_config_5 = {\"configurable\": {\"thread_id\": thread_id_5}}\n", "\n", "# Run the graph until the first interrupt \n", "# Email will be classified as \"respond\" \n", "# Agent will create a schedule_meeting and write_email tool call\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after first interrupt \n", "display_memory_content(store, (\"email_assistant\", \"cal_preferences\"))"]}, {"cell_type": "markdown", "id": "b85fc45d", "metadata": {}, "source": ["Provide feedback for the `schedule_meeting` tool call\n", "\n", "Instead of directly editing the meeting proposal or simply accepting it, we'll provide natural language feedback:\n", "\n", "1. We request a 30-minute meeting instead of 45 minutes\n", "2. We express a preference for afternoon meetings after 2pm\n", "3. The system must interpret this feedback and generate a new proposal\n", "\n", "This conversational approach is often more natural and efficient than direct editing, especially for mobile users or those who prefer to give high-level direction rather than detailed edits.\n", "\n", "After providing feedback, we'll examine the calendar preferences memory to see how this natural language guidance is captured. We expect to see the system extract both the meeting duration and time-of-day preferences as general principles."]}, {"cell_type": "code", "execution_count": 22, "id": "30a151f1", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user providing feedback for the schedule_meeting tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'schedule_meeting', 'args': {'attendees': ['pm@client.com', 'lance@company.com'], 'subject': 'Tax Planning Strategies Discussion', 'duration_minutes': 30, 'preferred_day': '2025-04-22', 'start_time': 14}}\n", "\n", "Checking memory after providing feedback for schedule_meeting:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- cal_preferences ---\n", "{'preferences': '30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\nAfternoon meetings after 2pm are preferred.'}\n"]}], "source": ["print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Please schedule this for 30 minutes instead of 45 minutes, and I prefer afternoon meetings after 2pm.\"}]), config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after providing feedback for schedule_meeting\n", "print(\"\\nChecking memory after providing feedback for schedule_meeting:\")\n", "display_memory_content(store, (\"email_assistant\", \"cal_preferences\"))"]}, {"cell_type": "markdown", "id": "8088757c", "metadata": {}, "source": ["Our memory check after providing feedback shows an elegantly simple calendar preference update:\n", "\n", "```\n", "30 minute meetings are preferred, but 15 minute meetings are also acceptable.\n", "Afternoon meetings after 2pm are preferred.\n", "```\n", "\n", "The system has:\n", "1. Captured both aspects of our feedback (duration and time of day)\n", "2. Preserved the existing preference about 15-minute meetings\n", "3. Added our preference for afternoon meetings after 2pm as a new line\n", "4. Kept the format clean and readable\n", "\n", "This natural language feedback mechanism creates the same quality of memory updates as direct editing but requires less effort from the user. The system is able to extract structured preferences from unstructured feedback, showing its ability to learn from conversational interactions.\n", "\n", "Let's accept this revised meeting proposal and move to the email draft:"]}, {"cell_type": "code", "execution_count": 23, "id": "545063be", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user accepting the schedule_meeting tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hello,\\n\\nThank you for reaching out regarding tax planning strategies. I am available on Tuesday, April 22nd at 2:00 PM, and have scheduled a 30-minute call for us to discuss your suggestions. If you need more time or would prefer a different slot, please let me know.\\n\\nLooking forward to our conversation.\\n\\nBest regards,\\nLance'}}\n", "\n", "Checking memory after accepting schedule_meeting after feedback:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", "=======================================\n", "\n", "\n", "--- response_preferences ---\n", "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", "=======================================\n", "\n", "\n", "--- cal_preferences ---\n", "{'preferences': '30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\nAfternoon meetings after 2pm are preferred.'}\n", "=======================================\n", "\n", "\n", "--- background ---\n", "No memory found\n", "=======================================\n", "\n"]}], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after accepting schedule_meeting after feedback\n", "print(\"\\nChecking memory after accepting schedule_meeting after feedback:\")\n", "display_memory_content(store, (\"email_assistant\", \"response_preferences\"))"]}, {"cell_type": "markdown", "id": "e72ede94", "metadata": {}, "source": ["Now provide feedback for the `write_email` tool call\n", "\n", "Similar to our meeting feedback, we'll now provide natural language guidance for the email draft:\n", "\n", "1. We request \"shorter and less formal\" language - a style preference\n", "2. We ask for a specific closing statement about looking forward to the meeting\n", "3. The system must interpret this guidance and rewrite the email accordingly\n", "\n", "After providing this feedback, we'll check the response preferences memory to see how these style and structure preferences are captured. We expect to see generalizable guidelines about email brevity, formality, and closing statements added to our preference profile."]}, {"cell_type": "code", "execution_count": 24, "id": "9831ad2d", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user providing feedback for the write_email tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'pm@client.com', 'subject': \"Re: Tax season let's schedule call\", 'content': 'Hi,\\n\\nThanks for reaching out. I\u2019ve scheduled us for a 30-minute call on Tuesday, April 22nd at 2:00 PM to go over tax planning. Let me know if you need a different time.\\n\\nLooking forward to our chat!\\n\\nBest,\\nLance'}}\n", "\n", "Checking memory after providing feedback for write_email:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- response_preferences ---\n", "{'preferences': \"Use professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n- When scheduling meetings, prefer afternoon times after 2pm when possible, and default to 30-minute durations unless otherwise specified.\\n\\nWhen writing email responses:\\n- Favor shorter and less formal language when possible, unless the context requires formality.\\n- Include a closing statement expressing that you look forward to the meeting or conversation when confirming appointments.\"}\n"]}], "source": ["print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Shorter and less formal. Include a closing statement about looking forward to the meeting!\"}]), config=thread_config_5):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after providing feedback for write_email\n", "print(\"\\nChecking memory after providing feedback for write_email:\")\n", "display_memory_content(store, (\"email_assistant\", \"response_preferences\"))"]}, {"cell_type": "markdown", "id": "b5b360a2", "metadata": {}, "source": ["The memory update after our email feedback shows highly sophisticated learning about both meeting scheduling and email writing preferences:\n", "\n", "1. The system has added a complete new section to the response preferences entitled \"When writing email responses\" with two key preferences:\n", " - \"Favor shorter and less formal language when possible, unless the context requires formality\"\n", " - \"Include a closing statement expressing that you look forward to the meeting or conversation when confirming appointments\"\n", "\n", "2. It has also added a new bullet point to the \"When responding to meeting scheduling requests\" section:\n", " - \"When scheduling meetings, prefer afternoon times after 2pm when possible, and default to 30-minute durations unless otherwise specified\"\n", "\n", "This demonstrates the system's ability to:\n", "- Organize learned preferences into appropriate categories\n", "- Extract multiple insights from a single feedback instance\n", "- Apply meeting preferences to both calendar and email contexts\n", "- Capture nuance with appropriate qualifiers (\"when possible,\" \"unless otherwise specified\")\n", "- Maintain the hierarchical structure of the memory\n", "\n", "The resulting email shows all these preferences applied: it's shorter, less formal, includes a closing statement about looking forward to the chat, and correctly references the 30-minute meeting time."]}, {"cell_type": "code", "execution_count": 25, "id": "8c64999e", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user accepting the write_email tool call...\n", "\n", "Checking memory after accepting write_email after feedback:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", "=======================================\n", "\n", "\n", "--- response_preferences ---\n", "{'preferences': \"Use professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n- When scheduling meetings, prefer afternoon times after 2pm when possible, and default to 30-minute durations unless otherwise specified.\\n\\nWhen writing email responses:\\n- Favor shorter and less formal language when possible, unless the context requires formality.\\n- Include a closing statement expressing that you look forward to the meeting or conversation when confirming appointments.\"}\n", "=======================================\n", "\n", "\n", "--- cal_preferences ---\n", "{'preferences': '30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\nAfternoon meetings after 2pm are preferred.'}\n", "=======================================\n", "\n", "\n", "--- background ---\n", "No memory found\n", "=======================================\n", "\n"]}], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_5):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after accepting write_email after feedback\n", "print(\"\\nChecking memory after accepting write_email after feedback:\")\n", "display_memory_content(store, (\"email_assistant\", \"response_preferences\"))"]}, {"cell_type": "markdown", "id": "f85e63cb", "metadata": {}, "source": ["Look at the full message history."]}, {"cell_type": "code", "execution_count": 26, "id": "fa9cf91d", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Respond to the email: \n", "\n", "**Subject**: Tax season let's schedule call\n", "**From**: Project Manager \n", "**To**: Lance Martin \n", "\n", "Lance,\n", "\n", "It's tax season again, and I wanted to schedule a call to discuss your tax planning strategies for this year. I have some suggestions that could potentially save you money.\n", "\n", "Are you available sometime next week? Tuesday or Thursday afternoon would work best for me, for about 45 minutes.\n", "\n", "Regards,\n", "Project Manager\n", "\n", "---\n", "\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " check_calendar_availability (call_SwwMW5F9zEWmajYTDLjKOoFN)\n", " Call ID: call_SwwMW5F9zEWmajYTDLjKOoFN\n", " Args:\n", " day: 2025-04-22\n", " check_calendar_availability (call_nIBAphQD4FFOdW89Ikc1uNfV)\n", " Call ID: call_nIBAphQD4FFOdW89Ikc1uNfV\n", " Args:\n", " day: 2025-04-24\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-22: 9:00 AM, 2:00 PM, 4:00 PM\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-04-24: 9:00 AM, 2:00 PM, 4:00 PM\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " schedule_meeting (call_uUvycUMHS86YjPSYFKV1dpzq)\n", " Call ID: call_uUvycUMHS86YjPSYFKV1dpzq\n", " Args:\n", " attendees: ['pm@client.com', 'lance@company.com']\n", " subject: Tax Planning Strategies Discussion\n", " duration_minutes: 45\n", " preferred_day: 2025-04-22\n", " start_time: 14\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "User gave feedback, which can we incorporate into the meeting request. Feedback: Please schedule this for 30 minutes instead of 45 minutes, and I prefer afternoon meetings after 2pm.\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " schedule_meeting (call_isFznpqEGfVb7pBmY2itGtgY)\n", " Call ID: call_isFznpqEGfVb7pBmY2itGtgY\n", " Args:\n", " attendees: ['pm@client.com', 'lance@company.com']\n", " subject: Tax Planning Strategies Discussion\n", " duration_minutes: 30\n", " preferred_day: 2025-04-22\n", " start_time: 14\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Meeting 'Tax Planning Strategies Discussion' scheduled on Tuesday, April 22, 2025 at 14 for 30 minutes with 2 attendees\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_GIsMacl5jL7qsjSM47WdK738)\n", " Call ID: call_GIsMacl5jL7qsjSM47WdK738\n", " Args:\n", " to: pm@client.com\n", " subject: Re: Tax season let's schedule call\n", " content: Hello,\n", "\n", "Thank you for reaching out regarding tax planning strategies. I am available on Tuesday, April 22nd at 2:00 PM, and have scheduled a 30-minute call for us to discuss your suggestions. If you need more time or would prefer a different slot, please let me know.\n", "\n", "Looking forward to our conversation.\n", "\n", "Best regards,\n", "Lance\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "User gave feedback, which can we incorporate into the email. Feedback: Shorter and less formal. Include a closing statement about looking forward to the meeting!\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_0vCEdmEMmvBYGh24gvd9pqvh)\n", " Call ID: call_0vCEdmEMmvBYGh24gvd9pqvh\n", " Args:\n", " to: pm@client.com\n", " subject: Re: Tax season let's schedule call\n", " content: Hi,\n", "\n", "Thanks for reaching out. I\u2019ve scheduled us for a 30-minute call on Tuesday, April 22nd at 2:00 PM to go over tax planning. Let me know if you need a different time.\n", "\n", "Looking forward to our chat!\n", "\n", "Best,\n", "Lance\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Email sent to pm@client.com with subject 'Re: Tax season let's schedule call' and content: Hi,\n", "\n", "Thanks for reaching out. I\u2019ve scheduled us for a 30-minute call on Tuesday, April 22nd at 2:00 PM to go over tax planning. Let me know if you need a different time.\n", "\n", "Looking forward to our chat!\n", "\n", "Best,\n", "Lance\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_UXqUyEknR5jjpA39rKdlEcc9)\n", " Call ID: call_UXqUyEknR5jjpA39rKdlEcc9\n", " Args:\n", " done: True\n"]}], "source": ["state = graph.get_state(thread_config_5)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "0bed8841", "metadata": {}, "source": ["Now let's try an email that calls the `Question` tool to provide feedback\n", "\n", "Our final test examines feedback for questions. When the assistant needs clarification before proceeding, users can provide detailed information beyond just answering the question:\n", "\n", "1. For the brunch invitation email, we'll provide feedback that includes:\n", " - Confirmation that we want to invite the people mentioned\n", " - A specific location preference (Jack's)\n", " - A time preference (before 11am)\n", " \n", "2. This gives the system multiple pieces of information:\n", " - A direct answer to the question (yes, let's invite them)\n", " - Additional context and preferences not explicitly asked for\n", "\n", "This tests the system's ability to process compound feedback and extract multiple data points from a single response."]}, {"cell_type": "code", "execution_count": 27, "id": "e111a459", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udce7 Classification: RESPOND - This email requires a response\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'Question', 'args': {'content': 'Do you want me to check your calendar and suggest specific dates and times for the brunch, or would you like to propose a particular day and time to Jim and Lisa first?'}}\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", "=======================================\n", "\n", "\n", "--- response_preferences ---\n", "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", "=======================================\n", "\n", "\n", "--- cal_preferences ---\n", "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", "=======================================\n", "\n", "\n", "--- background ---\n", "No memory found\n", "=======================================\n", "\n"]}], "source": ["# Respond - Meeting Request Email\n", "email_input_respond = {\n", " \"to\": \"Lance Martin \",\n", " \"author\": \"Partner \",\n", " \"subject\": \"Meet Jim and Lisa for brunch in 3 weeks?\",\n", " \"email_thread\": \"Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\"\n", "}\n", "\n", "# Compile the graph\n", "checkpointer = MemorySaver()\n", "store = InMemoryStore()\n", "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", "thread_id_6 = uuid.uuid4()\n", "thread_config_6 = {\"configurable\": {\"thread_id\": thread_id_6}}\n", "\n", "# Run the graph until the first interrupt\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_respond}, config=thread_config_6):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after first interrupt for Question tool\n", "display_memory_content(store)"]}, {"cell_type": "markdown", "id": "c540ebff", "metadata": {}, "source": ["Provide feedback for the `Question` tool call\n", "\n", "When the assistant asks about our preferences for the brunch invitation, we'll respond with rich, multi-faceted feedback:\n", "\n", "1. We confirm we want to invite Jim and Lisa\n", "2. We specify a location preference (Jack's, not the place on 17th)\n", "3. We express a time preference (before 11am)\n", "\n", "This tests the system's ability to handle compound responses that both answer the direct question and provide additional context. Note that we're not just answering \"yes\" or \"no\" - we're providing a rich context that should influence the assistant's next actions.\n", "\n", "An ideal system would use this feedback to both respond to the immediate email and update background knowledge that could be relevant for future similar social invitations. In our current implementation, we don't update background knowledge from question responses, but this would be a straightforward enhancement."]}, {"cell_type": "code", "execution_count": 28, "id": "61d8bfef", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user providing feedback for the Question tool call...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'partner@home.com', 'subject': 'Brunch with Jim and Lisa in 3 Weeks', 'content': 'Great idea! I\u2019ll reach out to Jim and Lisa to invite them to brunch at Jack\u2019s in three weeks. I\u2019ll suggest Saturday, May 8th, with a start time before 11am\u20149:00 AM is available on our calendar. Let me know if you\u2019d like to suggest a different time or if this works for you before I send the invite.'}}\n", "\n", "Checking memory after providing feedback for Question:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", "=======================================\n", "\n", "\n", "--- response_preferences ---\n", "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", "=======================================\n", "\n", "\n", "--- cal_preferences ---\n", "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", "=======================================\n", "\n", "\n", "--- background ---\n", "No memory found\n", "=======================================\n", "\n"]}], "source": ["print(f\"\\nSimulating user providing feedback for the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"Yes, let's invite them, I really like brunch at Jack's, ideally before 11am.\"}]), config=thread_config_6):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after providing feedback for Question\n", "print(\"\\nChecking memory after providing feedback for Question:\")\n", "display_memory_content(store)"]}, {"cell_type": "markdown", "id": "7b9dbba1", "metadata": {}, "source": ["Currently, we don't update `background_information` when the user provides feedback for the `Question` tool, but this would be a valuable enhancement.\n", "\n", "Looking at how the system handled our question response:\n", "\n", "1. It correctly incorporated all three key pieces of information:\n", " - Our affirmative decision to invite Jim and Lisa\n", " - Our location preference (Jack's, not the place on 17th)\n", " - Our time preference (before 11am)\n", "\n", "2. It drafted a complete email that:\n", " - References reaching out to Jim and Lisa\n", " - Specifies Jack's as the location\n", " - Suggests a 9:00 AM time (before 11am as requested)\n", " - Asks for confirmation before sending an invite\n", "\n", "This demonstrates the system's ability to extract and use detailed information from natural language feedback, even when not explicitly updating memory. The email correctly incorporates all aspects of our feedback and presents a coherent plan based on our preferences.\n", "\n", "A future enhancement could store these preferences (location preferences, time preferences for social events) in the background information memory for use in future similar situations."]}, {"cell_type": "code", "execution_count": 29, "id": "5b4c0f6a", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user accepting the write_email tool call...\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_k6NQ6GqqEIE1uNwVQXmx6xU6)\n", " Call ID: call_k6NQ6GqqEIE1uNwVQXmx6xU6\n", " Args:\n", " done: True\n", "\n", "Checking memory after accepting write_email after answering Question:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n", "=======================================\n", "\n", "\n", "--- response_preferences ---\n", "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", "=======================================\n", "\n", "\n", "--- cal_preferences ---\n", "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", "=======================================\n", "\n", "\n", "--- background ---\n", "No memory found\n", "=======================================\n", "\n"]}], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_6):\n", " # Inspect response_agent most recent message\n", " if 'response_agent' in chunk:\n", " chunk['response_agent']['messages'][-1].pretty_print()\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after accepting write_email after answering Question\n", "print(\"\\nChecking memory after accepting write_email after answering Question:\")\n", "display_memory_content(store)"]}, {"cell_type": "markdown", "id": "c80ed860", "metadata": {}, "source": ["Look at the full message history"]}, {"cell_type": "code", "execution_count": 30, "id": "e61fab2f", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Respond to the email: \n", "\n", "**Subject**: Meet Jim and Lisa for brunch in 3 weeks?\n", "**From**: Partner \n", "**To**: Lance Martin \n", "\n", "Hey, should we invite Jim and Lisa to brunch in 3 weeks? We could go to the new place on 17th that everyone is talking about.\n", "\n", "---\n", "\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Question (call_dF0WQiJIDf5ovo5veMixbBm9)\n", " Call ID: call_dF0WQiJIDf5ovo5veMixbBm9\n", " Args:\n", " content: Do you want me to check your calendar and suggest specific dates and times for the brunch, or would you like to propose a particular day and time to Jim and Lisa first?\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "User answered the question, which can we can use for any follow up actions. Feedback: Yes, let's invite them, I really like brunch at Jack's, ideally before 11am.\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " check_calendar_availability (call_07aoxyrzmikMZPDN9XQ9rLbp)\n", " Call ID: call_07aoxyrzmikMZPDN9XQ9rLbp\n", " Args:\n", " day: 2025-05-08\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Available times on 2025-05-08: 9:00 AM, 2:00 PM, 4:00 PM\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_Vq5Ube1Y2Wbr9xzB7bh1aIqw)\n", " Call ID: call_Vq5Ube1Y2Wbr9xzB7bh1aIqw\n", " Args:\n", " to: partner@home.com\n", " subject: Brunch with Jim and Lisa in 3 Weeks\n", " content: Great idea! I\u2019ll reach out to Jim and Lisa to invite them to brunch at Jack\u2019s in three weeks. I\u2019ll suggest Saturday, May 8th, with a start time before 11am\u20149:00 AM is available on our calendar. Let me know if you\u2019d like to suggest a different time or if this works for you before I send the invite.\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Email sent to partner@home.com with subject 'Brunch with Jim and Lisa in 3 Weeks' and content: Great idea! I\u2019ll reach out to Jim and Lisa to invite them to brunch at Jack\u2019s in three weeks. I\u2019ll suggest Saturday, May 8th, with a start time before 11am\u20149:00 AM is available on our calendar. Let me know if you\u2019d like to suggest a different time or if this works for you before I send the invite.\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_k6NQ6GqqEIE1uNwVQXmx6xU6)\n", " Call ID: call_k6NQ6GqqEIE1uNwVQXmx6xU6\n", " Args:\n", " done: True\n"]}], "source": ["state = graph.get_state(thread_config_6)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "a3b11d0c", "metadata": {}, "source": ["## Test Case for Notify Classification\n", "\n", "This test explores how memory updates when an email is initially classified as \"NOTIFY\" but the user decides it needs a response:\n", "\n", "1. The triage system initially classifies IT security updates as notifications\n", "2. But the user decides this particular notification warrants acknowledgment\n", "3. This creates a learning opportunity about which notification types need responses\n", "\n", "The \"notify\" category is designed for important information that doesn't need immediate action. But user feedback can help the system learn which subset of notifications actually do warrant responses, refining the initial classification over time."]}, {"cell_type": "code", "execution_count": 34, "id": "a6e8a62a", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udd14 Classification: NOTIFY - This email contains important information\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'Email Assistant: notify', 'args': {}}\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n"]}], "source": ["# Notify - Important FYI Email\n", "email_input_notify = {\n", " \"to\": \"Team Members \",\n", " \"author\": \"IT Department \",\n", " \"subject\": \"Critical Security Update\",\n", " \"email_thread\": \"Dear Team,\\n\\nThis is an important security notification. We will be updating our authentication system this weekend. During the update window (Saturday 2am-4am), you will not be able to access company resources.\\n\\nPlease ensure you log out of all systems before the maintenance window.\\n\\nRegards,\\nIT Department\"\n", "}\n", "\n", "# Compile the graph with new thread\n", "checkpointer = MemorySaver()\n", "store = InMemoryStore()\n", "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", "thread_id_7 = uuid.uuid4()\n", "thread_config_7 = {\"configurable\": {\"thread_id\": thread_id_7}}\n", "\n", "# Run the graph until the first interrupt - should be classified as \"notify\"\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_notify}, config=thread_config_7):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after first interrupt for Notify\n", "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))"]}, {"cell_type": "markdown", "id": "1b02f3be", "metadata": {}, "source": ["Now simulate user deciding to respond with feedback.\n", "\n", "This test explores the reclassification pathway from \"notify\" to \"respond\":\n", "\n", "1. The system initially classifies a security update as information-only (\"notify\")\n", "2. We're presented with this notification without a suggested action\n", "3. We decide this security update actually requires acknowledgment\n", "4. We provide feedback indicating we want to respond and confirm our compliance\n", "\n", "This represents an important learning signal - that security notifications, particularly those requesting specific user actions, should be treated as items requiring response rather than just information.\n", "\n", "After providing this feedback, we'll check the triage preferences memory to see how this \"override\" affects future classifications. We expect to see security notifications with action requests moved into the \"worth responding to\" section."]}, {"cell_type": "code", "execution_count": 35, "id": "510235cd", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user deciding to respond with feedback...\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'write_email', 'args': {'to': 'it@company.com', 'subject': 'Re: Critical Security Update', 'content': 'Dear IT Department,\\n\\nThank you for the notification regarding the upcoming authentication system update. I acknowledge receipt of this important notice and will ensure that I am logged out of all systems before the maintenance window begins on Saturday from 2am to 4am.\\n\\nBest regards,\\nLance'}}\n", "\n", "Checking memory after responding with feedback to Notify:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"Emails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n- Important security notifications from IT Department requiring acknowledgment or confirmation of action\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\"}\n"]}], "source": ["print(\"\\nSimulating user deciding to respond with feedback...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"response\", \"args\": \"We should acknowledge receipt of this important notice and confirm that we'll be logged out before the maintenance window.\"}]), config=thread_config_7):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after responding with feedback to Notify\n", "print(\"\\nChecking memory after responding with feedback to Notify:\")\n", "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))"]}, {"cell_type": "markdown", "id": "0c4b1139", "metadata": {}, "source": ["The memory update after our decision to respond to a security notification shows a remarkable triage preference refinement:\n", "\n", "1. The system has added a new bullet to the \"Emails that are worth responding to\" section:\n", " ```\n", " \"Important security notifications from IT Department requiring acknowledgment or confirmation of action\"\n", " ```\n", "\n", "2. This update demonstrates:\n", " - Precision: It specifically identifies \"security notifications\" (not all IT emails)\n", " - Source awareness: It's from the \"IT Department\" specifically\n", " - Action trigger: It identifies notifications \"requiring acknowledgment or confirmation\"\n", " - Category reorganization: It moves this from \"notify\" to \"respond\" without removing notifications entirely\n", "\n", "The system didn't just record our specific override - it analyzed the *reason* we might want to respond (acknowledging required action) and created a generalizable rule. This would ensure that future similar security notifications requesting specific user actions would be correctly classified as needing response."]}, {"cell_type": "code", "execution_count": 36, "id": "85fa053f", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user accepting the write_email tool call...\n", "\n", "Checking memory after accepting write_email for Notify:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"Emails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n- Important security notifications from IT Department requiring acknowledgment or confirmation of action\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\"}\n", "=======================================\n", "\n", "\n", "--- response_preferences ---\n", "{'preferences': \"\\nUse professional and concise language. If the e-mail mentions a deadline, make sure to explicitly acknowledge and reference the deadline in your response.\\n\\nWhen responding to technical questions that require investigation:\\n- Clearly state whether you will investigate or who you will ask\\n- Provide an estimated timeline for when you'll have more information or complete the task\\n\\nWhen responding to event or conference invitations:\\n- Always acknowledge any mentioned deadlines (particularly registration deadlines)\\n- If workshops or specific topics are mentioned, ask for more specific details about them\\n- If discounts (group or early bird) are mentioned, explicitly request information about them\\n- Don't commit \\n\\nWhen responding to collaboration or project-related requests:\\n- Acknowledge any existing work or materials mentioned (drafts, slides, documents, etc.)\\n- Explicitly mention reviewing these materials before or during the meeting\\n- When scheduling meetings, clearly state the specific day, date, and time proposed\\n\\nWhen responding to meeting scheduling requests:\\n- If times are proposed, verify calendar availability for all time slots mentioned in the original email and then commit to one of the proposed times based on your availability by scheduling the meeting. Or, say you can't make it at the time proposed.\\n- If no times are proposed, then check your calendar for availability and propose multiple time options when available instead of selecting just one.\\n- Mention the meeting duration in your response to confirm you've noted it correctly.\\n- Reference the meeting's purpose in your response.\\n\"}\n", "=======================================\n", "\n", "\n", "--- cal_preferences ---\n", "{'preferences': '\\n30 minute meetings are preferred, but 15 minute meetings are also acceptable.\\n'}\n", "=======================================\n", "\n", "\n", "--- background ---\n", "No memory found\n", "=======================================\n", "\n"]}], "source": ["print(f\"\\nSimulating user accepting the {Interrupt_Object.value[0]['action_request']['action']} tool call...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"accept\"}]), config=thread_config_7):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after accepting write_email for Notify\n", "print(\"\\nChecking memory after accepting write_email for Notify:\")\n", "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))"]}, {"cell_type": "markdown", "id": "4878373b", "metadata": {}, "source": ["Look at the full message history."]}, {"cell_type": "code", "execution_count": 37, "id": "0501ff69", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Email to notify user about: \n", "\n", "**Subject**: Critical Security Update\n", "**From**: IT Department \n", "**To**: Team Members \n", "\n", "Dear Team,\n", "\n", "This is an important security notification. We will be updating our authentication system this weekend. During the update window (Saturday 2am-4am), you will not be able to access company resources.\n", "\n", "Please ensure you log out of all systems before the maintenance window.\n", "\n", "Regards,\n", "IT Department\n", "\n", "---\n", "\n", "================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "User wants to reply to the email. Use this feedback to respond: We should acknowledge receipt of this important notice and confirm that we'll be logged out before the maintenance window.\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " write_email (call_8tZq4eSFWr2WE01sPgEfgFr0)\n", " Call ID: call_8tZq4eSFWr2WE01sPgEfgFr0\n", " Args:\n", " to: it@company.com\n", " subject: Re: Critical Security Update\n", " content: Dear IT Department,\n", "\n", "Thank you for the notification regarding the upcoming authentication system update. I acknowledge receipt of this important notice and will ensure that I am logged out of all systems before the maintenance window begins on Saturday from 2am to 4am.\n", "\n", "Best regards,\n", "Lance\n", "=================================\u001b[1m Tool Message \u001b[0m=================================\n", "\n", "Email sent to it@company.com with subject 'Re: Critical Security Update' and content: Dear IT Department,\n", "\n", "Thank you for the notification regarding the upcoming authentication system update. I acknowledge receipt of this important notice and will ensure that I am logged out of all systems before the maintenance window begins on Saturday from 2am to 4am.\n", "\n", "Best regards,\n", "Lance\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "Tool Calls:\n", " Done (call_or3Ir137SK0RmLgs2qFrMcWB)\n", " Call ID: call_or3Ir137SK0RmLgs2qFrMcWB\n", " Args:\n", " done: True\n"]}], "source": ["state = graph.get_state(thread_config_7)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "c8e48281", "metadata": {}, "source": ["## Test Case for Notify + Ignore\n", "\n", "Our final test explores the opposite pattern - when a user decides notifications don't even warrant being shown:\n", "\n", "1. The system classifies a company picnic announcement as a notification\n", "2. The user decides this doesn't even warrant notification status\n", "3. This creates a signal to further refine the classification boundaries\n", "\n", "By ignoring certain types of notifications, users can teach the system which information is truly important to them versus which can be filtered out entirely. This completes the full spectrum of classification refinement."]}, {"cell_type": "code", "execution_count": 39, "id": "74e1fe6f", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Running the graph until the first interrupt...\n", "\ud83d\udd14 Classification: NOTIFY - This email contains important information\n", "\n", "INTERRUPT OBJECT:\n", "Action Request: {'action': 'Email Assistant: notify', 'args': {}}\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n"]}], "source": ["# Notify - Important FYI Email\n", "email_input_notify = {\n", " \"to\": \"Team Members \",\n", " \"author\": \"HR Department \",\n", " \"subject\": \"Company Picnic Next Month\",\n", " \"email_thread\": \"Dear Team,\\n\\nWe're planning the annual company picnic for next month. The tentative date is Saturday, June 15th from noon to 4pm at Central Park. There will be food, games, and activities for families.\\n\\nMore details will follow in the coming weeks.\\n\\nRegards,\\nHR Department\"\n", "}\n", "\n", "# Compile the graph with new thread\n", "checkpointer = MemorySaver()\n", "store = InMemoryStore()\n", "graph = overall_workflow.compile(checkpointer=checkpointer, store=store)\n", "thread_id_8 = uuid.uuid4()\n", "thread_config_8 = {\"configurable\": {\"thread_id\": thread_id_8}}\n", "\n", "# Run the graph until the first interrupt - should be classified as \"notify\"\n", "print(\"Running the graph until the first interrupt...\")\n", "for chunk in graph.stream({\"email_input\": email_input_notify}, config=thread_config_8):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after first interrupt for Notify + Ignore\n", "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))"]}, {"cell_type": "markdown", "id": "911a8c79", "metadata": {}, "source": ["Now simulate user deciding to ignore the notification.\n", "\n", "In this final test, we explore the downgrade pathway from \"notify\" to \"ignore\":\n", "\n", "1. The system initially classifies a company picnic announcement as \"notify\"\n", "2. We decide we don't even want to be notified about these social events\n", "3. By choosing \"ignore,\" we signal this entire category should be filtered out\n", "\n", "This represents another important learning signal - that certain types of company announcements (particularly social events) shouldn't even be surfaced as notifications, further refining our triage preferences.\n", "\n", "After ignoring, we'll check the triage preferences memory for the final time to see how this override affects future classifications. We expect to see company social events moved from the \"notify\" section to the \"not worth responding to\" section."]}, {"cell_type": "code", "execution_count": 40, "id": "088e7bc7", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["\n", "Simulating user deciding to ignore the notification...\n", "\n", "Checking memory after ignoring Notify:\n", "\n", "======= CURRENT MEMORY CONTENT =======\n", "\n", "--- triage_preferences ---\n", "{'preferences': \"\\nEmails that are not worth responding to:\\n- Marketing newsletters and promotional emails\\n- Spam or suspicious emails\\n- CC'd on FYI threads with no direct questions\\n- Company social event announcements (e.g., company picnic)\\n\\nThere are also other things that should be known about, but don't require an email response. For these, you should notify (using the `notify` response). Examples of this include:\\n- Team member out sick or on vacation\\n- Build system notifications or deployments\\n- Project status updates without action items\\n- Important company announcements\\n- FYI emails that contain relevant information for current projects\\n- HR Department deadline reminders\\n- Subscription status / renewal reminders\\n- GitHub notifications\\n\\nEmails that are worth responding to:\\n- Direct questions from team members requiring expertise\\n- Meeting requests requiring confirmation\\n- Critical bug reports related to team's projects\\n- Requests from management requiring acknowledgment\\n- Client inquiries about project status or features\\n- Technical questions about documentation, code, or APIs (especially questions about missing endpoints or features)\\n- Personal reminders related to family (wife / daughter)\\n- Personal reminder related to self-care (doctor appointments, etc)\\n\"}\n"]}], "source": ["print(\"\\nSimulating user deciding to ignore the notification...\")\n", "for chunk in graph.stream(Command(resume=[{\"type\": \"ignore\"}]), config=thread_config_8):\n", " # Inspect interrupt object if present\n", " if '__interrupt__' in chunk:\n", " Interrupt_Object = chunk['__interrupt__'][0]\n", " print(\"\\nINTERRUPT OBJECT:\")\n", " print(f\"Action Request: {Interrupt_Object.value[0]['action_request']}\")\n", "\n", "# Check memory after ignoring Notify\n", "print(\"\\nChecking memory after ignoring Notify:\")\n", "display_memory_content(store, (\"email_assistant\", \"triage_preferences\"))"]}, {"cell_type": "markdown", "id": "05136d96", "metadata": {}, "source": ["The final memory update completes our triage preference refinement journey:\n", "\n", "1. The system has added \"Company social event announcements (e.g., company picnic)\" to the \"Emails not worth responding to\" section\n", "\n", "2. This demonstrates:\n", " - Content-based categorization: It identifies \"social event announcements\" specifically\n", " - Example inclusion: It provides an example \"(company picnic)\" for clarity\n", " - Category downgrade: It moves this from \"notify\" to \"not worth responding to\"\n", " - Structural preservation: It maintains the original memory organization\n", "\n", "Through our series of tests, we've now explored the full spectrum of triage learning:\n", "- Upgrading notifications to responses (security updates)\n", "- Downgrading notifications to ignores (company social events)\n", "- Downgrading responses to ignores (personal social invitations, routine calls)\n", "- Refining response preferences (meeting duration, timing, email style)\n", "\n", "Each interaction has produced targeted, intelligent memory updates that maintain the overall structure while adding new insights. This continuous refinement creates an increasingly personalized assistant that learns and adapts to user preferences over time."]}, {"cell_type": "code", "execution_count": 41, "id": "027f3f4e", "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "Email to notify user about: \n", "\n", "**Subject**: Company Picnic Next Month\n", "**From**: HR Department \n", "**To**: Team Members \n", "\n", "Dear Team,\n", "\n", "We're planning the annual company picnic for next month. The tentative date is Saturday, June 15th from noon to 4pm at Central Park. There will be food, games, and activities for families.\n", "\n", "More details will follow in the coming weeks.\n", "\n", "Regards,\n", "HR Department\n", "\n", "---\n", "\n", "================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", "The user decided to ignore the email even though it was classified as notify. Update triage preferences to capture this.\n"]}], "source": ["state = graph.get_state(thread_config_8)\n", "for m in state.values['messages']:\n", " m.pretty_print()"]}, {"cell_type": "markdown", "id": "b075a3ea", "metadata": {}, "source": ["## Testing with Local Deployment\n", "\n", "You can find this graph with memory integration in the `src/email_assistant` directory:\n", "\n", "* `src/email_assistant/email_assistant_hitl_memory.py`\n", "\n", "Testing this locally gives you the full experience of a memory-enabled HITL system:\n", "\n", "1. **Start the local server**: Run `langgraph dev` to launch the agent locally\n", "2. **Connect Agent Inbox**: Use the graph URL from the `langgraph.json` file\n", "3. **Submit test emails**: Try different email types to see classification in action\n", "4. **Provide various feedback types**: Try accepting, editing, ignoring, and responding\n", "5. **Observe memory evolution**: Check the Memory tab in LangGraph Studio to see changes\n", "\n", "![inbox](img/agent-inbox-edit.png)\n", "\n", "The Memory tab in LangGraph Studio offers a real-time view of how your preferences are being captured and updated with each interaction:\n", "\n", "![studio-img](img/memory-studio.png)\n", "\n", "Through continued use, the system becomes increasingly personalized:\n", "- It learns which emails you want to respond to, be notified about, or ignore\n", "- It adapts to your communication style preferences\n", "- It remembers your scheduling preferences\n", "- It refines its understanding with each interaction\n", "\n", "This combination of HITL and memory creates a system that balances automation with control - handling routine tasks automatically while learning from your feedback to become more aligned with your preferences over time."]}, {"cell_type": "code", "execution_count": null, "id": "f60fa538", "metadata": {}, "outputs": [], "source": ["! langgraph dev"]}, {"cell_type": "markdown", "id": "43b6319d", "metadata": {}, "source": ["![inbox](img/agent-inbox-edit.png)\n", "\n", "As you provide feedback or edit replies, you can see memories accumulate in the `memory` tab in LangGraph Studio.\n", "\n", "![studio-img](img/memory-studio.png)\n", "\n", "\n", "\n"]}, {"cell_type": "markdown", "id": "f2ad7580", "metadata": {}, "source": ["### Use LangMem to add background memory! \n", "\n", "Add this to the `llm_call` node:\n", "\n", "```\n", " # Search for existing background memory\n", " # TODO: Here, semantic search over a facts collection of background information from emails could be added. \n", " # background = get_memory(store, (\"email_assistant\", \"background\"), default_background)\n", "```\n", "\n", "Add this to the interrupt handler node:\n", "\n", "```\n", "elif tool_call[\"name\"] == \"Question\":\n", " # Don't execute the tool, and add a message with the user feedback to incorporate into the email\n", " result.append({\"role\": \"tool\", \"content\": f\"User answered the question, which can we can use for any follow up actions. Feedback: {user_feedback}\", \"tool_call_id\": tool_call[\"id\"]})\n", " # TODO: Here, we could update the background information with the user's answer. \n", " # update_memory(store, (\"email_assistant\", \"background\"), [{\n", " # \"role\": \"user\",\n", " # \"content\": f\"Update background information based upon these messages:\"\n", " # }] + state[\"messages\"] + result)\n", "```\n", "\n", "Consider using LangMem: \n", "https://langchain-ai.github.io/langmem/"]}], "metadata": {"jupytext": {"cell_metadata_filter": "-all", "main_language": "python", "notebook_metadata_filter": "-all"}, "kernelspec": {"display_name": ".venv", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6"}}, "nbformat": 4, "nbformat_minor": 5} diff --git a/tests/test_response.py b/tests/test_response.py index 64675c9..8f3be56 100644 --- a/tests/test_response.py +++ b/tests/test_response.py @@ -14,7 +14,7 @@ from langgraph.store.memory import InMemoryStore from langgraph.types import Command -from src.email_assistant.utils import extract_tool_calls, format_messages_string +from email_assistant.utils import extract_tool_calls, format_messages_string from eval.prompts import RESPONSE_CRITERIA_SYSTEM_PROMPT from dotenv import load_dotenv @@ -47,10 +47,10 @@ def set_agent_module(agent_module_name): print(f"Using agent module: {AGENT_MODULE}") # Force reload the module to ensure we get the latest code - if f"src.email_assistant.{AGENT_MODULE}" in sys.modules: - importlib.reload(sys.modules[f"src.email_assistant.{AGENT_MODULE}"]) + if f"email_assistant.{AGENT_MODULE}" in sys.modules: + importlib.reload(sys.modules[f"email_assistant.{AGENT_MODULE}"]) - agent_module = importlib.import_module(f"src.email_assistant.{AGENT_MODULE}") + agent_module = importlib.import_module(f"email_assistant.{AGENT_MODULE}") return AGENT_MODULE def setup_assistant() -> Tuple[Any, Dict[str, Any], InMemoryStore]: