Description
in order for the deep researcher to be run without errors it relies on models that are highly performant at tool calling.
I completely agree with that statement, but I think there may be opportunities to harden the system against this type of malfunction. When running with Gemma, I encountered occasional issues and had Cursor attempt to resolve them. Eventually, this setup produced stable results; however, this isn't the kind of code you'd want to rely on in the long term. That's why I'm opening an issue instead of submitting a PR. Still, perhaps you can use the following as inspiration for how to improve the system's resilience.
diff
diff --git a/deep_researcher/agents/utils/parse_output.py b/deep_researcher/agents/utils/parse_output.py
index 42f472e..3a53f5c 100644
--- a/deep_researcher/agents/utils/parse_output.py
+++ b/deep_researcher/agents/utils/parse_output.py
@@ -1,6 +1,7 @@
import json
from pydantic import BaseModel
from typing import Any, Callable
+import re
class OutputParserError(Exception):
@@ -46,19 +47,25 @@ def find_json_in_string(string: str) -> str:
def parse_json_output(output: str) -> Any:
- """Take a string output and parse it as JSON"""
- # First try to load the string as JSON
- try:
- return json.loads(output)
- except json.JSONDecodeError as e:
- pass
+ """Parse JSON output from the model, accounting for oddities in formatting"""
+ if "```" in output:
+ # If there are code blocks, extract the first code block that likely contains JSON
+ code_blocks = re.findall(r"```(?:json)?(.*?)```", output, re.DOTALL)
+ if code_blocks:
+ parsed_output = code_blocks[0].strip()
+ else:
+ parsed_output = output
+ else:
+ parsed_output = output
- # If that fails, assume that the output is in a code block - remove the code block markers and try again
- parsed_output = output
- parsed_output = parsed_output.split("```")[1]
- parsed_output = parsed_output.split("```")[0]
if parsed_output.startswith("json") or parsed_output.startswith("JSON"):
parsed_output = parsed_output[4:].strip()
+
+ # Pre-process the string to fix common escaping issues
+ # Replace problematic escape sequences
+ parsed_output = parsed_output.replace('\$', '$') # Replace \$ with $
+ parsed_output = re.sub(r'\\(?!["\\/bfnrt])', r'\\\\', parsed_output) # Escape all unescaped backslashes not part of valid JSON escape sequences
+
try:
return json.loads(parsed_output)
except json.JSONDecodeError:
@@ -67,6 +74,10 @@ def parse_json_output(output: str) -> Any:
# As a last attempt, try to manually find the JSON object in the output and parse it
parsed_output = find_json_in_string(output)
if parsed_output:
+ # Apply the same preprocessing to fix escape sequences
+ parsed_output = parsed_output.replace('\$', '$')
+ parsed_output = re.sub(r'\\(?!["\\/bfnrt])', r'\\\\', parsed_output)
+
try:
return json.loads(parsed_output)
except json.JSONDecodeError:
@@ -81,7 +92,50 @@ def create_type_parser(type: BaseModel) -> Callable[[str], BaseModel]:
def convert_json_string_to_type(output: str) -> BaseModel:
"""Take a string output and parse it as a Pydantic model"""
- output_dict = parse_json_output(output)
- return type.model_validate(output_dict)
+ try:
+ output_dict = parse_json_output(output)
+ return type.model_validate(output_dict)
+ except Exception as e:
+ # Special handling for KnowledgeGapOutput - if the required fields are missing
+ if type.__name__ == "KnowledgeGapOutput":
+ print(f"Warning: Failed to parse KnowledgeGapOutput, using fallback. Error: {e}")
+ # Create a fallback object with reasonable defaults
+ from ..knowledge_gap_agent import KnowledgeGapOutput
+ # Attempt to extract some gaps even if schema is wrong
+ gaps = []
+ if isinstance(output_dict, dict):
+ # Try to find any field that might contain gaps or questions
+ for key in output_dict:
+ if any(term in key.lower() for term in ["gap", "question", "issue", "next", "step"]):
+ if isinstance(output_dict[key], list):
+ gaps.extend(output_dict[key])
+ elif isinstance(output_dict[key], str):
+ gaps.append(output_dict[key])
+
+ # If still no gaps found, check if there's text suggesting what to research next
+ if not gaps and isinstance(output, str):
+ import re
+ # Look for phrases indicating gaps or next steps
+ next_step_matches = re.findall(r"(research|investigate|explore|analyze|examine|study|understand|determine) ([\w\s,\-\.']+)", output, re.IGNORECASE)
+ if next_step_matches:
+ gaps = [match[0] + " " + match[1] for match in next_step_matches[:3]]
+
+ # If still nothing, just extract potential questions
+ if not gaps:
+ question_matches = re.findall(r"(what|how|why|where|when|who|which) ([\w\s,\-\.']+)\?", output, re.IGNORECASE)
+ if question_matches:
+ gaps = [match[0] + " " + match[1] + "?" for match in question_matches[:3]]
+
+ # Default gap if nothing could be extracted
+ if not gaps:
+ gaps = ["Need to gather more information to address the original query"]
+
+ return KnowledgeGapOutput(
+ research_complete=False, # Default to not complete
+ outstanding_gaps=gaps
+ )
+ else:
+ # For other types, just raise the original error
+ raise
return convert_json_string_to_type
I also tried with https://huggingface.co/deepcogito/cogito-v1-preview-llama-70B but didn't manage to get it to work yet, probably an issue how I'm serving the model and related tool calling setup. I will try https://huggingface.co/deepcogito/cogito-v1-preview-qwen-32B later.