From 12009a024c70c55e840b47c2973fc7dee1aa45e9 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Mon, 26 May 2025 01:46:03 -0300 Subject: [PATCH 01/27] feat(filesystem): add MEMFS filesystem support --- libs/pyodide-sandbox-js/main.ts | 456 +++++++++++++++++++++------ libs/pyodide-sandbox-js/main_test.ts | 201 +++++++++++- 2 files changed, 562 insertions(+), 95 deletions(-) diff --git a/libs/pyodide-sandbox-js/main.ts b/libs/pyodide-sandbox-js/main.ts index f88bcb5..e7ca372 100644 --- a/libs/pyodide-sandbox-js/main.ts +++ b/libs/pyodide-sandbox-js/main.ts @@ -2,17 +2,15 @@ import { loadPyodide } from "pyodide"; import { join } from "@std/path"; import { parseArgs } from "@std/cli/parse-args"; - const pkgVersion = "0.0.7"; -// Python environment preparation code -// This code was adapted from -// https://github.com/alexmojaki/pyodide-worker-runner/blob/master/lib/pyodide_worker_runner.py const prepareEnvCode = ` import datetime import importlib import json import sys +import os +import base64 from typing import Union, TypedDict, List, Any, Callable, Literal try: @@ -24,11 +22,169 @@ import pyodide_js # noqa sys.setrecursionlimit(400) - class InstallEntry(TypedDict): module: str package: str +class FileSystemOperation(TypedDict): + operation: Literal["read", "write", "list", "mkdir", "exists", "remove", "copy"] + path: str + content: Union[str, bytes, None] + encoding: str + destination: Union[str, None] + +def perform_fs_operation(op) -> dict: + """Performs filesystem operations safely within the sandbox environment. + + Supports the following operations: + - read: Reads file content with text or binary encoding + - write: Writes content to a file, creating parent directories if needed + - list: Lists directory contents with metadata (name, type, size, etc) + - mkdir: Creates directories recursively + - exists: Checks if a file or directory exists + - remove: Deletes files or directories (recursive) + - copy: Copies files or directories to a destination path + + Returns: + A dictionary with operation result ('success' boolean and data or 'error' message) + """ + try: + # Convert JsProxy to Python dict if needed + if hasattr(op, 'to_py'): + op = op.to_py() + + operation = op.get("operation") + path = op.get("path") + content = op.get("content") + encoding = op.get("encoding", "utf-8") + destination = op.get("destination") + + if operation == "read": + if os.path.exists(path): + if encoding == "binary": + with open(path, "rb") as f: + content = base64.b64encode(f.read()).decode('ascii') + return {"success": True, "content": content, "is_binary": True} + else: + with open(path, "r", encoding=encoding) as f: + content = f.read() + return {"success": True, "content": content, "is_binary": False} + else: + return {"success": False, "error": "File not found"} + + elif operation == "write": + # Ensure parent directory exists + parent_dir = os.path.dirname(path) + if parent_dir and not os.path.exists(parent_dir): + os.makedirs(parent_dir, exist_ok=True) + + if encoding == "binary": + content = base64.b64decode(content) + with open(path, "wb") as f: + f.write(content) + else: + with open(path, "w", encoding=encoding) as f: + f.write(content) + return {"success": True} + + elif operation == "list": + if os.path.exists(path): + items = [] + for item in os.listdir(path): + item_path = os.path.join(path, item) + stat_info = os.stat(item_path) + items.append({ + "name": item, + "is_dir": os.path.isdir(item_path), + "is_file": os.path.isfile(item_path), + "size": stat_info.st_size, + "modified": stat_info.st_mtime + }) + return {"success": True, "items": items} + else: + return {"success": False, "error": "Directory not found"} + + elif operation == "mkdir": + os.makedirs(path, exist_ok=True) + return {"success": True} + + elif operation == "exists": + return {"success": True, "exists": os.path.exists(path)} + + elif operation == "remove": + if os.path.exists(path): + if os.path.isfile(path): + os.remove(path) + elif os.path.isdir(path): + import shutil + shutil.rmtree(path) + return {"success": True} + else: + return {"success": False, "error": "Path not found"} + + elif operation == "copy": + if not destination: + return {"success": False, "error": "Destination path required for copy operation"} + if os.path.exists(path): + import shutil + if os.path.isfile(path): + shutil.copy2(path, destination) + elif os.path.isdir(path): + shutil.copytree(path, destination, dirs_exist_ok=True) + return {"success": True} + else: + return {"success": False, "error": "Source path not found"} + else: + return {"success": False, "error": f"Unknown operation: {operation}"} + + except Exception as e: + return {"success": False, "error": str(e)} + +def create_document_store(base_path: str = "/sandbox/documents") -> dict: + """Creates a document store structure for LangChain. + + Sets up a directory structure suitable for LangChain document processing: + - raw: For storing original documents + - processed: For storing processed documents + - embeddings: For storing document embeddings + - metadata: For storing document metadata + + Also creates an index.json file to track documents and collections. + + Args: + base_path: Root directory for the document store + + Returns: + Dictionary with creation status and structure information + """ + try: + os.makedirs(base_path, exist_ok=True) + os.makedirs(f"{base_path}/raw", exist_ok=True) + os.makedirs(f"{base_path}/processed", exist_ok=True) + os.makedirs(f"{base_path}/embeddings", exist_ok=True) + os.makedirs(f"{base_path}/metadata", exist_ok=True) + + # Create index file + index_file = f"{base_path}/index.json" + initial_index = { + "created": datetime.datetime.now().isoformat(), + "version": "1.0", + "documents": {}, + "collections": {}, + "last_updated": datetime.datetime.now().isoformat() + } + + with open(index_file, 'w') as f: + json.dump(initial_index, f, indent=2) + + return { + "success": True, + "base_path": base_path, + "structure": ["raw", "processed", "embeddings", "metadata"], + "index_file": index_file + } + except Exception as e: + return {"success": False, "error": str(e)} def find_imports_to_install(imports: list[str]) -> list[InstallEntry]: """ @@ -57,69 +213,84 @@ def find_imports_to_install(imports: list[str]) -> list[InstallEntry]: ) return to_install - async def install_imports( source_code_or_imports: Union[str, list[str]], additional_packages: list[str] = [], - message_callback: Callable[ - [ - Literal[ - "failed", - ], - Union[InstallEntry, list[InstallEntry]], - ], - None, - ] = lambda event_type, data: None, + message_callback: Callable = lambda event_type, data: None, ) -> List[InstallEntry]: + """Installs Python packages required for the provided code or import list. + + Takes either: + - Python source code: Analyzes imports using Pyodide's find_imports + - A list of import names: Uses the list directly + + Additionally installs any packages specified in additional_packages. + + Args: + source_code_or_imports: Python code string or list of import names + additional_packages: Extra packages to install regardless of imports + message_callback: Function called with status updates during installation + + Returns: + List of package entries that were installed + """ if isinstance(source_code_or_imports, str): try: imports: list[str] = find_imports(source_code_or_imports) except SyntaxError: - return + return [] else: imports: list[str] = source_code_or_imports to_install = find_imports_to_install(imports) - # Merge with additional packages for package in additional_packages: - if package not in to_install: + if package not in [entry["package"] for entry in to_install]: to_install.append(dict(module=package, package=package)) if to_install: try: - import micropip # noqa + import micropip except ModuleNotFoundError: await pyodide_js.loadPackage("micropip") - import micropip # noqa + import micropip for entry in to_install: try: await micropip.install(entry["package"]) except Exception as e: message_callback("failed", entry["package"]) - break # Fail fast + break return to_install - -def load_session_bytes(session_bytes: bytes) -> list[str]: - """Load the session module.""" +def load_session_bytes(session_bytes: bytes): + """Loads a serialized session state from bytes. + + Uses dill to restore a previously serialized Python session state, + including all variables, functions and class definitions. + + Args: + session_bytes: Bytes object containing the serialized session + """ import dill import io - buffer = io.BytesIO(session_bytes.to_py()) dill.session.load_session(filename=buffer) - def dump_session_bytes() -> bytes: - """Dump the session module.""" + """Serializes the current session state to bytes. + + Uses dill to capture the current Python session state, + including all variables, functions and class definitions. + + Returns: + Bytes object containing the serialized session + """ import dill import io - buffer = io.BytesIO() dill.session.dump_session(filename=buffer) return buffer.getvalue() - def robust_serialize(obj): """Recursively converts an arbitrary Python object into a JSON-serializable structure. @@ -132,34 +303,23 @@ def robust_serialize(obj): - For unsupported/unknown objects, a dictionary containing a 'type' indicator and the object's repr is returned. """ - # Base case: primitives that are already JSON-serializable if isinstance(obj, (str, int, float, bool, type(None))): return obj - - # Process lists or tuples recursively. if isinstance(obj, (list, tuple)): return [robust_serialize(item) for item in obj] - - # Process dictionaries. if isinstance(obj, dict): - # Convert keys to strings if necessary and process values recursively. return {str(key): robust_serialize(value) for key, value in obj.items()} - - # Process sets by converting them to lists. if isinstance(obj, (set, frozenset)): return [robust_serialize(item) for item in obj] - - # Process known datetime objects. if isinstance(obj, (datetime.date, datetime.datetime)): return obj.isoformat() - - # Fallback: for objects that are not directly serializable, - # return a dictionary with type indicator and repr. return {"type": "not_serializable", "repr": repr(obj)} - def dumps(result: Any) -> str: - """Get the result of the session.""" + """Serializes a Python object to a JSON string. + + Uses robust_serialize to handle complex Python objects before JSON serialization. + """ result = robust_serialize(result) return json.dumps(result) `; @@ -170,6 +330,11 @@ interface SessionMetadata { packages: string[]; } +interface FileSystemOptions { + enableFileSystem?: boolean; + mountPoint?: string; +} + interface PyodideResult { success: boolean; result?: any; @@ -179,9 +344,40 @@ interface PyodideResult { jsonResult?: string; sessionBytes?: Uint8Array; sessionMetadata?: SessionMetadata; + fileSystemOperations?: any[]; + fileSystemInfo?: { + type: "memfs"; + mountPoint: string; + }; +} + +interface FileSystemOperation { + operation: "read" | "write" | "list" | "mkdir" | "exists" | "remove" | "copy"; + path: string; + content?: string | Uint8Array; + encoding?: string; + destination?: string; +} + +interface ExecutionResult { + success: boolean; + stdout: string | null; + stderr: string | null; + result: any; + session?: { + metadata?: SessionMetadata; + bytes?: Uint8Array; + }; + filesystem?: { + info?: { + type: string; + mountPoint: string; + }; + operations?: any[]; + }; } -async function initPyodide(pyodide: any): Promise { +async function initPyodide(pyodide: any, options: FileSystemOptions = {}): Promise { const sys = pyodide.pyimport("sys"); const pathlib = pyodide.pyimport("pathlib"); @@ -189,6 +385,40 @@ async function initPyodide(pyodide: any): Promise { sys.path.append(dirPath); pathlib.Path(dirPath).mkdir(); pathlib.Path(dirPath + "prepare_env.py").write_text(prepareEnvCode); + + if (options.enableFileSystem) { + const mountPoint = options.mountPoint || "/sandbox"; + + try { + pyodide.FS.mkdirTree(mountPoint); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + if (!errorMessage.includes("exists")) { + console.warn(`⚠️ Failed to create mount point ${mountPoint}:`, error); + } + } + } +} + +async function performFileSystemOperations( + pyodide: any, + operations: FileSystemOperation[] +): Promise { + const results: any[] = []; + const prepare_env = pyodide.pyimport("prepare_env"); + + for (const op of operations) { + try { + const result = prepare_env.perform_fs_operation(op); + const jsResult = result.toJs({ dict_converter: Object.fromEntries }); + results.push(jsResult); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + results.push({ success: false, error: errorMessage }); + } + } + + return results; } async function runPython( @@ -197,7 +427,9 @@ async function runPython( stateful?: boolean; sessionBytes?: string; sessionMetadata?: string; - } + fileSystemOptions?: FileSystemOptions; + fileSystemOperations?: FileSystemOperation[]; + } = {} ): Promise { const output: string[] = []; const err_output: string[] = []; @@ -215,9 +447,9 @@ async function runPython( output.push(`install error: ${msg}`) }, }); - await initPyodide(pyodide); + + await initPyodide(pyodide, options.fileSystemOptions); - // Determine session directory let sessionMetadata: SessionMetadata; if (options.sessionMetadata) { sessionMetadata = JSON.parse(options.sessionMetadata); @@ -227,17 +459,15 @@ async function runPython( lastModified: new Date().toISOString(), packages: [], }; - }; + } + let sessionData: Uint8Array | null = null; if (options.sessionBytes && !options.sessionMetadata) { console.error("sessionMetadata is required when providing sessionBytes"); return { success: false, error: "sessionMetadata is required when providing sessionBytes" }; } - - // Import our prepared environment module const prepare_env = pyodide.pyimport("prepare_env"); - // Prepare additional packages to install (include dill) const defaultPackages = options.stateful ? ["dill"] : []; const additionalPackagesToInstall = options.sessionBytes ? [...new Set([...defaultPackages, ...sessionMetadata.packages])] @@ -256,44 +486,40 @@ async function runPython( ); if (installErrors.length > 0) { - // Restore the original console.log function console.log = originalLog; return { success: false, - error: `Failed to install required Python packages: ${installErrors.join(", ")}. ` + - `This is likely because these packages are not available in the Pyodide environment. ` + - `Pyodide is a Python runtime that runs in the browser and has a limited set of ` + - `pre-built packages. You may need to use alternative packages that are compatible ` + - `with Pyodide.` + error: `Failed to install required Python packages: ${installErrors.join(", ")}.` }; } if (options.sessionBytes) { sessionData = Uint8Array.from(JSON.parse(options.sessionBytes)); - // Run session preamble await prepare_env.load_session_bytes(sessionData); } + let fileSystemResults: any[] = []; + if (options.fileSystemOperations) { + fileSystemResults = await performFileSystemOperations(pyodide, options.fileSystemOperations); + } + const packages = installedPackages.map((pkg: any) => pkg.get("package")); - // Restore the original console.log function console.log = originalLog; - // Run the Python code + const rawValue = await pyodide.runPythonAsync(pythonCode); - // Dump result to string + const jsonValue = await prepare_env.dumps(rawValue); - // Update session metadata with installed packages sessionMetadata.packages = [ ...new Set([...sessionMetadata.packages, ...packages]), ]; sessionMetadata.lastModified = new Date().toISOString(); if (options.stateful) { - // Save session state to sessionBytes sessionData = await prepare_env.dump_session_bytes() as Uint8Array; - }; - // Return the result with stdout and stderr output + } + const result: PyodideResult = { success: true, result: rawValue, @@ -301,15 +527,26 @@ async function runPython( stdout: output, stderr: err_output, sessionMetadata: sessionMetadata, + fileSystemOperations: fileSystemResults, }; + if (options.stateful && sessionData) { result["sessionBytes"] = sessionData; } + + if (options.fileSystemOptions?.enableFileSystem) { + result["fileSystemInfo"] = { + type: "memfs", + mountPoint: options.fileSystemOptions.mountPoint || "/sandbox", + }; + } + return result; - } catch (error: any) { + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); return { success: false, - error: error.message, + error: errorMessage, stdout: output, stderr: err_output }; @@ -318,7 +555,7 @@ async function runPython( async function main(): Promise { const flags = parseArgs(Deno.args, { - string: ["code", "file", "session-bytes", "session-metadata"], + string: ["code", "file", "session-bytes", "session-metadata", "fs-operations", "mount-point"], alias: { c: "code", f: "file", @@ -327,9 +564,17 @@ async function main(): Promise { s: "stateful", b: "session-bytes", m: "session-metadata", + "fs": "fs-operations", + "mp": "mount-point", + }, + boolean: ["help", "version", "stateful", "enable-filesystem"], + default: { + help: false, + version: false, + stateful: false, + "enable-filesystem": false, + "mount-point": "/sandbox" }, - boolean: ["help", "version", "stateful"], - default: { help: false, version: false, stateful: false }, }); if (flags.help) { @@ -340,9 +585,12 @@ Run Python code in a sandboxed environment using Pyodide OPTIONS: -c, --code Python code to execute -f, --file Path to Python file to execute - -s, --stateful Use a stateful session + -s, --stateful Use a stateful session -b, --session-bytes Session bytes -m, --session-metadata Session metadata + --enable-filesystem Enable filesystem operations (MEMFS) + --fs-operations JSON array of filesystem operations + --mount-point Mount point path (default: /sandbox) -h, --help Display help -V, --version Display version `); @@ -360,68 +608,90 @@ OPTIONS: stateful: flags.stateful, sessionBytes: flags["session-bytes"], sessionMetadata: flags["session-metadata"], + enableFileSystem: flags["enable-filesystem"], + fsOperations: flags["fs-operations"], + mountPoint: flags["mount-point"], }; if (!options.code && !options.file) { - console.error( - "Error: You must provide Python code using either -c/--code or -f/--file option.\nUse --help for usage information." - ); + console.error("Error: You must provide Python code using either -c/--code or -f/--file option."); Deno.exit(1); } - // Get Python code from file or command line argument let pythonCode = ""; if (options.file) { try { - // Resolve relative or absolute file path const filePath = options.file.startsWith("/") ? options.file : join(Deno.cwd(), options.file); pythonCode = await Deno.readTextFile(filePath); - } catch (error: any) { - console.error(`Error reading file ${options.file}:`, error.message); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + console.error(`Error reading file ${options.file}:`, errorMessage); Deno.exit(1); } } else { - // Process code from command line (replacing escaped newlines) pythonCode = options.code?.replace(/\\n/g, "\n") ?? ""; } + let fileSystemOperations: FileSystemOperation[] = []; + if (options.fsOperations) { + try { + fileSystemOperations = JSON.parse(options.fsOperations); + } catch (error: unknown) { + console.error("Error parsing filesystem operations:", error instanceof Error ? error.message : String(error)); + Deno.exit(1); + } + } + const result = await runPython(pythonCode, { stateful: options.stateful, sessionBytes: options.sessionBytes, sessionMetadata: options.sessionMetadata, + fileSystemOptions: { + enableFileSystem: options.enableFileSystem, + mountPoint: options.mountPoint, + }, + fileSystemOperations: fileSystemOperations, }); - // Exit with error code if Python execution failed - // Create output JSON with stdout, stderr, and result - const outputJson = { + const executionResult: ExecutionResult = { + success: result.success, stdout: result.stdout?.join('') || null, stderr: result.success ? (result.stderr?.join('') || null) : result.error || null, result: result.success ? JSON.parse(result.jsonResult || 'null') : null, - success: result.success, - sessionBytes: result.sessionBytes, - sessionMetadata: result.sessionMetadata, }; - // Output as JSON to stdout - console.log(JSON.stringify(outputJson)); + // Only include session data if stateful execution was used + if (result.sessionBytes || result.sessionMetadata) { + executionResult.session = { + metadata: result.sessionMetadata, + ...(result.sessionBytes ? { bytes: result.sessionBytes } : {}) + }; + } + + // Only include filesystem information if filesystem was enabled + if (options.enableFileSystem) { + executionResult.filesystem = { + info: result.fileSystemInfo, + // Only include operations results if operations were actually performed + ...(fileSystemOperations.length > 0 ? { operations: result.fileSystemOperations } : {}) + }; + } + + console.log(JSON.stringify(executionResult)); - // Exit with error code if Python execution failed if (!result.success) { Deno.exit(1); } } -// If this module is run directly if (import.meta.main) { - // Override the global environment variables that Deno's permission prompts look for - // to suppress color-related permission prompts main().catch((err) => { console.error("Unhandled error:", err); Deno.exit(1); }); } -export { runPython }; +export { runPython, type FileSystemOperation, type FileSystemOptions }; \ No newline at end of file diff --git a/libs/pyodide-sandbox-js/main_test.ts b/libs/pyodide-sandbox-js/main_test.ts index 5aafc05..c9d8cb4 100644 --- a/libs/pyodide-sandbox-js/main_test.ts +++ b/libs/pyodide-sandbox-js/main_test.ts @@ -1,5 +1,6 @@ -import { assertEquals, assertNotEquals, assertExists } from "@std/assert"; -import { runPython } from "./main.ts"; +import { assert, assertEquals, assertNotEquals, assertExists } from "@std/assert"; +import { runPython, type FileSystemOperation } from "./main.ts"; + Deno.test("runPython simple test", async () => { const result = await runPython("x = 2 + 3; x", {}); @@ -37,3 +38,199 @@ Deno.test("runPython with error - name error", async () => { // Check that error contains NameError assertEquals(result.error?.includes("NameError"), true); }); + + +Deno.test("filesystem - basic operations", async () => { + const operations: FileSystemOperation[] = [ + { operation: "mkdir", path: "/sandbox/test", encoding: "utf-8" }, + { operation: "write", path: "/sandbox/test/hello.txt", content: "Hello, MEMFS!", encoding: "utf-8" }, + { operation: "read", path: "/sandbox/test/hello.txt", encoding: "utf-8" }, + { operation: "list", path: "/sandbox/test", encoding: "utf-8" } + ]; + + const result = await runPython(` +import os + +# Test file and directory operations +if os.path.exists("/sandbox/test/hello.txt"): + with open("/sandbox/test/hello.txt", "r") as f: + content = f.read() + +# Return file info +{ + "file_exists": os.path.exists("/sandbox/test/hello.txt"), + "dir_exists": os.path.exists("/sandbox/test"), + "content": content if 'content' in locals() else None +} + `, { + fileSystemOptions: { enableFileSystem: true }, + fileSystemOperations: operations + }); + + assertEquals(result.success, true); + assertEquals(result.fileSystemOperations?.length, 4); + + // Check operations results + assertEquals(result.fileSystemOperations?.[0].success, true); // mkdir + assertEquals(result.fileSystemOperations?.[1].success, true); // write + assertEquals(result.fileSystemOperations?.[2].success, true); // read + assertEquals(result.fileSystemOperations?.[2].content, "Hello, MEMFS!"); // read content + + // Check list operation + const listResult = result.fileSystemOperations?.[3]; + assertEquals(listResult.success, true); + assertEquals(listResult.items.length, 1); + assertEquals(listResult.items[0].name, "hello.txt"); + + // Verify Python code could access the files + const resultData = JSON.parse(result.jsonResult || "null"); + assertEquals(resultData.file_exists, true); + assertEquals(resultData.dir_exists, true); + assertEquals(resultData.content, "Hello, MEMFS!"); +}); + +Deno.test("filesystem - document store creation", async () => { + const result = await runPython(` +import os +from prepare_env import create_document_store + +store_result = create_document_store("/sandbox/docs") + +# Verify structure was created +expected_dirs = ["raw", "processed", "embeddings", "metadata"] +all_exist = True +for dir_name in expected_dirs: + path = f"/sandbox/docs/{dir_name}" + if not os.path.exists(path): + all_exist = False + break + +{ + "success": store_result["success"], + "base_path": store_result["base_path"], + "dirs_exist": all_exist, + "index_exists": os.path.exists("/sandbox/docs/index.json") +} + `, { + fileSystemOptions: { enableFileSystem: true } + }); + + assertEquals(result.success, true); + const storeResult = JSON.parse(result.jsonResult || "null"); + assertEquals(storeResult.success, true); + assertEquals(storeResult.base_path, "/sandbox/docs"); + assertEquals(storeResult.dirs_exist, true); + assertEquals(storeResult.index_exists, true); +}); + +Deno.test("filesystem - file manipulation workflow", async () => { + const setupOps: FileSystemOperation[] = [ + { operation: "mkdir", path: "/sandbox/workflow", encoding: "utf-8" }, + { operation: "write", path: "/sandbox/workflow/doc1.txt", content: "Document one content", encoding: "utf-8" }, + { operation: "write", path: "/sandbox/workflow/doc2.txt", content: "Document two content", encoding: "utf-8" }, + ]; + + const result = await runPython(` +import os + +processed_dir = "/sandbox/workflow/processed" +os.makedirs(processed_dir, exist_ok=True) + +documents = [] +for filename in os.listdir("/sandbox/workflow"): + if filename.endswith('.txt'): + filepath = os.path.join("/sandbox/workflow", filename) + + with open(filepath, 'r') as f: + content = f.read() + + word_count = len(content.split()) + processed_content = content.lower().strip() + + processed_path = os.path.join(processed_dir, f"processed_{filename}") + with open(processed_path, 'w') as f: + f.write(processed_content) + + backup_path = os.path.join("/sandbox/workflow", f"backup_{filename}") + + with open(backup_path, 'w') as f: + f.write(content) + + documents.append({ + "source": filename, + "word_count": word_count, + "processed_path": processed_path, + "backup_path": backup_path + }) + +{ + "total_documents": len(documents), + "documents": documents, + "processed_files": os.listdir(processed_dir) +} + `, { + fileSystemOptions: { enableFileSystem: true }, + fileSystemOperations: setupOps + }); + + assertEquals(result.success, true); + const summary = JSON.parse(result.jsonResult || "null"); + assertEquals(summary.total_documents, 2); + assertEquals(summary.documents.length, 2); + assertEquals(summary.processed_files.length, 2); + + for (const doc of summary.documents) { + assert(doc.word_count > 0); + assert(doc.processed_path.includes("processed_")); + assert(doc.backup_path.includes("backup_")); + } +}); + +Deno.test("filesystem - copy operations", async () => { + const operations: FileSystemOperation[] = [ + { operation: "mkdir", path: "/sandbox/copy_test", encoding: "utf-8" }, + { operation: "write", path: "/sandbox/copy_test/original.txt", content: "Original content", encoding: "utf-8" }, + { operation: "copy", path: "/sandbox/copy_test/original.txt", destination: "/sandbox/copy_test/copy.txt", encoding: "utf-8" }, + { operation: "read", path: "/sandbox/copy_test/copy.txt", encoding: "utf-8" }, + { operation: "list", path: "/sandbox/copy_test", encoding: "utf-8" } + ]; + + const result = await runPython(` +import os +print("Testing copy operations") +print("Files in copy_test:", os.listdir("/sandbox/copy_test")) + +with open("/sandbox/copy_test/original.txt", "r") as f_orig: + original_content = f_orig.read() + +with open("/sandbox/copy_test/copy.txt", "r") as f_copy: + copy_content = f_copy.read() + +{ + "files": os.listdir("/sandbox/copy_test"), + "original_content": original_content, + "copy_content": copy_content, + "match": original_content == copy_content +} + `, { + fileSystemOptions: { enableFileSystem: true }, + fileSystemOperations: operations + }); + + assertEquals(result.success, true); + assertEquals(result.fileSystemOperations?.length, 5); + + // Check copy operation success + assertEquals(result.fileSystemOperations?.[2].success, true); + + // Verify copied file has the same content + const readResult = result.fileSystemOperations?.[3]; + assertEquals(readResult.success, true); + assertEquals(readResult.content, "Original content"); + + // Check verification from Python side + const resultData = JSON.parse(result.jsonResult || "null"); + assertEquals(resultData.files.length, 2); + assertEquals(resultData.match, true); + assertEquals(resultData.original_content, resultData.copy_content); +}); From 033c044c5fb330a938bf21c17c6ff6dcaf8a114e Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Mon, 26 May 2025 11:18:12 -0300 Subject: [PATCH 02/27] feat(sandbox): file attachment operations --- libs/sandbox-py/langchain_sandbox/pyodide.py | 272 ++++++++++-------- .../tests/unit_tests/test_pyodide_sandbox.py | 193 +++++++++++++ 2 files changed, 353 insertions(+), 112 deletions(-) diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index cc1f579..431f764 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -1,6 +1,7 @@ """Python wrapper that calls pyodide & deno for code execution.""" import asyncio +import base64 import dataclasses import json import logging @@ -162,6 +163,8 @@ def __init__( # Configure permissions self.permissions = [] + self.file_operations = [] + if not skip_deno_check: # Check if Deno is installed try: @@ -197,6 +200,63 @@ def __init__( self.permissions.append(f"--node-modules-dir={node_modules_dir}") + def attach_file(self, path: str, content: str | bytes) -> None: + """Attach a file to the sandbox filesystem. + + The file will be created in the sandbox's memfs filesystem and will be + available to the Python code when executed. Binary content is automatically + detected based on content type. + + Args: + path: Path in the sandbox filesystem where the file should be created. + If not starting with '/sandbox/', it will be prefixed automatically. + content: The content of the file, either as a string or bytes. + If bytes are provided, it will be treated as binary data. + """ + binary = isinstance(content, bytes) + + if not path.startswith("/sandbox/"): + path = f"/sandbox/{path}" + + encoding = "binary" if binary else "utf-8" + + if binary: + content = base64.b64encode(content).decode("ascii") + + self.file_operations.append({ + "operation": "write", + "path": path, + "content": content, + "encoding": encoding + }) + + def attach_files( + self, files: dict[str, str | bytes | dict[str, str | bool]] + ) -> None: + """Attach multiple files to the sandbox filesystem. + + Args: + files: Dictionary mapping paths to file contents. + Each value can be: + - a string (treated as text content) + - bytes (treated as binary content) + - a dictionary with 'content' key (and optional 'binary' key + if explicit format control is needed) + """ + for path, content_info in files.items(): + if isinstance(content_info, (str, bytes)): + self.attach_file(path, content_info) + elif isinstance(content_info, dict): + content = content_info.get("content", "") + + if "binary" in content_info: + binary_flag = content_info.get("binary", False) + if isinstance(content, str) and binary_flag: + # Convert string to bytes when binary flag is True + content = content.encode("utf-8") + + self.attach_file(path, content) + def _build_command( self, code: str, @@ -245,6 +305,10 @@ def _build_command( if session_metadata: cmd.extend(["-m", json.dumps(session_metadata)]) + # Add filesystem operations if there are any + if self.file_operations: + cmd.extend(["--fs-operations", json.dumps(self.file_operations)]) + return cmd @@ -263,6 +327,7 @@ async def execute( session_metadata: dict | None = None, timeout_seconds: float | None = None, memory_limit_mb: int | None = None, + clear_files: bool = False, ) -> CodeExecutionResult: """Execute Python code asynchronously in a sandboxed Deno subprocess. @@ -277,6 +342,7 @@ async def execute( session_metadata: Optional metadata for session state timeout_seconds: Maximum execution time in seconds memory_limit_mb: Maximum memory usage in MB + clear_files: If True, clear the attached files after execution Returns: CodeExecutionResult containing execution results and metadata @@ -287,53 +353,58 @@ async def execute( result = None status: Literal["success", "error"] = "success" - cmd = self._build_command( - code, - session_bytes=session_bytes, - session_metadata=session_metadata, - memory_limit_mb=memory_limit_mb, - ) - - # Create and run the subprocess - process = await asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - try: - # Wait for process with a timeout - stdout_bytes, stderr_bytes = await asyncio.wait_for( - process.communicate(), - timeout=timeout_seconds, + cmd = self._build_command( + code, + session_bytes=session_bytes, + session_metadata=session_metadata, + memory_limit_mb=memory_limit_mb, ) - stdout = stdout_bytes.decode("utf-8", errors="replace") - if stdout: - # stdout encodes the full result from the sandbox. - # including stdout, stderr, and the json result. - full_result = json.loads(stdout) - stdout = full_result.get("stdout", None) - stderr = full_result.get("stderr", None) - result = full_result.get("result", None) - status = "success" if full_result.get("success", False) else "error" - session_metadata = full_result.get("sessionMetadata", None) - # Convert the Uint8Array to Python bytes - session_bytes_array = full_result.get("sessionBytes", None) - session_bytes = ( - bytes(session_bytes_array) if session_bytes_array else None + # Create and run the subprocess + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + try: + # Wait for process with a timeout + stdout_bytes, stderr_bytes = await asyncio.wait_for( + process.communicate(), + timeout=timeout_seconds, ) - else: - stderr = stderr_bytes.decode("utf-8", errors="replace") + stdout = stdout_bytes.decode("utf-8", errors="replace") + + if stdout: + # stdout encodes the full result from the sandbox. + # including stdout, stderr, and the json result. + full_result = json.loads(stdout) + stdout = full_result.get("stdout", None) + stderr = full_result.get("stderr", None) + result = full_result.get("result", None) + status = "success" if full_result.get("success", False) else "error" + session_metadata = full_result.get("sessionMetadata", None) + # Convert the Uint8Array to Python bytes + session_bytes_array = full_result.get("sessionBytes", None) + session_bytes = ( + bytes(session_bytes_array) if session_bytes_array else None + ) + else: + stderr = stderr_bytes.decode("utf-8", errors="replace") + status = "error" + except asyncio.TimeoutError: + process.kill() + await process.wait() status = "error" - except asyncio.TimeoutError: - process.kill() - await process.wait() - status = "error" - stderr = f"Execution timed out after {timeout_seconds} seconds" - except asyncio.CancelledError: - # Optionally: log cancellation if needed - pass + stderr = f"Execution timed out after {timeout_seconds} seconds" + except asyncio.CancelledError: + # Optionally: log cancellation if needed + pass + finally: + if clear_files: + self.file_operations = [] + end_time = time.time() return CodeExecutionResult( @@ -361,6 +432,7 @@ def execute( session_metadata: dict | None = None, timeout_seconds: float | None = None, memory_limit_mb: int | None = None, + clear_files: bool = False, ) -> CodeExecutionResult: """Execute Python code synchronously in a sandboxed Deno subprocess. @@ -373,6 +445,7 @@ def execute( session_metadata: Optional metadata for session state timeout_seconds: Maximum execution time in seconds memory_limit_mb: Maximum memory usage in MB + clear_files: If True, clear the attached files after execution Returns: CodeExecutionResult containing execution results and metadata @@ -383,14 +456,14 @@ def execute( stderr: str status: Literal["success", "error"] - cmd = self._build_command( - code, - session_bytes=session_bytes, - session_metadata=session_metadata, - memory_limit_mb=memory_limit_mb, - ) - try: + cmd = self._build_command( + code, + session_bytes=session_bytes, + session_metadata=session_metadata, + memory_limit_mb=memory_limit_mb, + ) + # Run the subprocess with timeout # Ignoring S603 for subprocess.run as the cmd is built safely. # Untrusted input comes from `code` parameter, which should be @@ -429,6 +502,9 @@ def execute( except subprocess.TimeoutExpired: status = "error" stderr = f"Execution timed out after {timeout_seconds} seconds" + finally: + if clear_files: + self.file_operations = [] end_time = time.time() @@ -444,68 +520,7 @@ def execute( class PyodideSandboxTool(BaseTool): - """Tool for running python code in a PyodideSandbox. - - If you use a stateful sandbox (PyodideSandboxTool(stateful=True)), - the state between code executions (to variables, imports, - and definitions, etc.), will be persisted using LangGraph checkpointer. - - !!! important - When you use a stateful sandbox, this tool can only be used - inside a LangGraph graph with a checkpointer, and - has to be used with the prebuilt `create_react_agent` or `ToolNode`. - - Example: stateless sandbox usage - - ```python - from langgraph.prebuilt import create_react_agent - from langchain_sandbox import PyodideSandboxTool - - tool = PyodideSandboxTool(allow_net=True) - agent = create_react_agent( - "anthropic:claude-3-7-sonnet-latest", - tools=[tool], - ) - result = await agent.ainvoke( - {"messages": [{"role": "user", "content": "what's 5 + 7?"}]}, - ) - ``` - - Example: stateful sandbox usage - - ```python - from langgraph.prebuilt import create_react_agent - from langgraph.prebuilt.chat_agent_executor import AgentState - from langgraph.checkpoint.memory import InMemorySaver - from langchain_sandbox import PyodideSandboxTool, PyodideSandbox - - class State(AgentState): - session_bytes: bytes - session_metadata: dict - - tool = PyodideSandboxTool(stateful=True, allow_net=True) - agent = create_react_agent( - "anthropic:claude-3-7-sonnet-latest", - tools=[tool], - checkpointer=InMemorySaver(), - state_schema=State - ) - result = await agent.ainvoke( - { - "messages": [ - {"role": "user", "content": "what's 5 + 7? save result as 'a'"} - ], - "session_bytes": None, - "session_metadata": None - }, - config={"configurable": {"thread_id": "123"}}, - ) - second_result = await agent.ainvoke( - {"messages": [{"role": "user", "content": "what's the sine of 'a'?"}]}, - config={"configurable": {"thread_id": "123"}}, - ) - ``` - """ + """Tool for running python code in a PyodideSandbox.""" name: str = "python_code_sandbox" description: str = ( @@ -613,6 +628,39 @@ class PyodideSandboxToolInput(BaseModel): skip_deno_check=True, # Skip deno check since async sandbox already checked ) + def attach_file(self, path: str, content: str | bytes) -> None: + """Attach a file to the sandbox filesystem. + + This method delegates to both the async and sync sandboxes to ensure consistency + Binary content is automatically detected based on the content type. + + Args: + path: Path in the sandbox filesystem where the file should be created. + If not starting with '/sandbox/', it will be prefixed automatically. + content: The content of the file, either as a string or bytes. + If bytes are provided, it will be treated as binary data. + """ + self._sandbox.attach_file(path, content) + self._sync_sandbox.attach_file(path, content) + + def attach_files( + self, files: dict[str, str | bytes | dict[str, str | bool]] + ) -> None: + """Attach multiple files to the sandbox filesystem. + + This method delegates to both the async and sync sandboxes to ensure consistency + + Args: + files: Dictionary mapping paths to file contents. + Each value can be: + - a string (treated as text content) + - bytes (treated as binary content) + - a dictionary with 'content' key (and optional 'binary' key + if explicit format control is needed) + """ + self._sandbox.attach_files(files) + self._sync_sandbox.attach_files(files) + def _run( self, code: str, @@ -683,7 +731,7 @@ async def _arun( config: RunnableConfig | None = None, run_manager: AsyncCallbackManagerForToolRun | None = None, ) -> Any: # noqa: ANN401 - """Use the tool synchronously.""" + """Use the tool asynchronously.""" if self.stateful: required_keys = {"session_bytes", "session_metadata", "messages"} actual_keys = set(state) if isinstance(state, dict) else set(state.__dict__) diff --git a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py index 89e6635..ecf3422 100644 --- a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py +++ b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py @@ -197,3 +197,196 @@ async def test_async_pyodide_timeout() -> None: tool = PyodideSandboxTool(stateful=False, timeout_seconds=0.1, allow_net=True) result = await tool.ainvoke("while True: pass") assert result == "Error during execution: Execution timed out after 0.1 seconds" + + +async def test_attach_binary_file(pyodide_package: None) -> None: + """Test attaching and reading a binary file.""" + sandbox = PyodideSandbox( + allow_read=True, + allow_write=True, + ) + + simple_binary = bytes([0x01, 0x02, 0x03, 0x04, 0x05]) + + sandbox.attach_file("test_binary.bin", simple_binary) + + code = """ +import os +import base64 + +file_path = "/sandbox/test_binary.bin" +if os.path.exists(file_path): + with open(file_path, "rb") as f: + content = f.read() + + print(f"File exists: True") + print(f"Content length: {len(content)}") + print(f"Content bytes: {', '.join(str(b) for b in content)}") +else: + print("File exists: False") +""" + + result = await sandbox.execute(code) + + assert result.status == "success", f"Error in execution: {result.stderr}" + assert "File exists: True" in result.stdout + assert "Content length: 5" in result.stdout + assert "Content bytes: 1, 2, 3, 4, 5" in result.stdout + + +async def test_clear_files_after_execution(pyodide_package: None) -> None: + """Test clearing files after execution.""" + sandbox = get_default_sandbox() + + sandbox.attach_file("temp.txt", "Temporary content") + + result1 = await sandbox.execute( + 'print(open("/sandbox/temp.txt").read())', + clear_files=True + ) + assert result1.status == "success" + assert "Temporary content" in result1.stdout + + assert len(sandbox.file_operations) == 0 + + result2 = await sandbox.execute(""" +import os +if os.path.exists("/sandbox/temp.txt"): + print("File still exists") +else: + print("File is gone") +""") + assert result2.status == "success" + assert "File is gone" in result2.stdout + + +async def test_tool_with_file_attachment(pyodide_package: None) -> None: + """Test using PyodideSandboxTool with file attachment.""" + tool = PyodideSandboxTool(allow_read=True, allow_write=True, allow_net=True) + + tool.attach_file("data.csv", "id,value\n1,100\n2,200\n3,300") + tool.attach_file("config.json", '{"max_value": 250, "min_value": 50}') + + code = """ +import csv +import json + +with open("/sandbox/data.csv", "r") as f: + reader = csv.DictReader(f) + rows = list(reader) + +with open("/sandbox/config.json", "r") as f: + config = json.load(f) + +# Filter data based on config +filtered = [] +for row in rows: + value = int(row["value"]) + if config["min_value"] <= value <= config["max_value"]: + filtered.append(row) + +print(f"Filtered data:") +for row in filtered: + print(f"id: {row['id']}, value: {row['value']}") +""" + + result = await tool.ainvoke(code) + + assert "Filtered data:" in result + assert "id: 1, value: 100" in result + assert "id: 2, value: 200" in result + # Value 300 should be excluded by filter + assert "id: 3, value: 300" not in result + + +async def test_directory_operations(pyodide_package: None) -> None: + """Test directory creation and file operations within directories.""" + sandbox = get_default_sandbox() + + sandbox.attach_file("nested/dir/file.txt", "Content in nested directory") + + code = """ +import os +from pathlib import Path + +dir_exists = os.path.isdir("/sandbox/nested/dir") +file_exists = os.path.exists("/sandbox/nested/dir/file.txt") +content = Path("/sandbox/nested/dir/file.txt").read_text() if file_exists else "" + +print(f"Directory exists: {dir_exists}") +print(f"File exists: {file_exists}") +print(f"Content: {content}") +""" + + result = await sandbox.execute(code) + assert result.status == "success" + assert "Directory exists: True" in result.stdout + assert "File exists: True" in result.stdout + assert "Content: Content in nested directory" in result.stdout + + +def test_sync_file_operations(pyodide_package: None) -> None: + """Test synchronous file operations.""" + sandbox = get_default_sync_sandbox() + + sandbox.attach_files({ + "data.txt": "Text file content", + "config.json": '{"enabled": true}' + }) + + code = """ +import json +from pathlib import Path + +text_content = Path("/sandbox/data.txt").read_text() +json_content = json.loads(Path("/sandbox/config.json").read_text()) + +print(f"Text content: {text_content}") +print(f"JSON enabled: {json_content['enabled']}") +""" + + result = sandbox.execute(code) + assert result.status == "success" + assert "Text content: Text file content" in result.stdout + assert "JSON enabled: True" in result.stdout + + +async def test_attach_files_with_explicit_binary_flag(pyodide_package: None) -> None: + """Test attaching files with explicit binary flag in dictionary format.""" + sandbox = get_default_sandbox() + + text_content = "Hello world" + binary_content = b"\x00\x01\x02\x03" + + sandbox.attach_files({ + "text_file.txt": {"content": text_content, "binary": False}, + "binary_file.bin": {"content": binary_content, "binary": True} + }) + + code = """ +from pathlib import Path +import os + +# Check text file +text_path = "/sandbox/text_file.txt" +if os.path.exists(text_path): + with open(text_path, "r") as f: + text_content = f.read() + print(f"Text content: {text_content}") + +# Check binary file +bin_path = "/sandbox/binary_file.bin" +if os.path.exists(bin_path): + with open(bin_path, "rb") as f: + bin_content = f.read() + print(f"Binary exists: True") + print(f"Binary length: {len(bin_content)}") + print(f"Binary bytes: {', '.join(str(b) for b in bin_content)}") +""" + + result = await sandbox.execute(code) + assert result.status == "success" + assert "Text content: Hello world" in result.stdout + assert "Binary exists: True" in result.stdout + assert "Binary length: 4" in result.stdout + assert "Binary bytes: 0, 1, 2, 3" in result.stdout From c7d18966fa54eeae36ffdd74a77e6e34e38d1f81 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Mon, 26 May 2025 12:09:17 -0300 Subject: [PATCH 03/27] fix: improve file system implementation and file handling --- libs/pyodide-sandbox-js/main.ts | 325 ++++++++++++--------------- libs/pyodide-sandbox-js/main_test.ts | 258 ++++++++------------- 2 files changed, 239 insertions(+), 344 deletions(-) diff --git a/libs/pyodide-sandbox-js/main.ts b/libs/pyodide-sandbox-js/main.ts index 637bd30..cb0668d 100644 --- a/libs/pyodide-sandbox-js/main.ts +++ b/libs/pyodide-sandbox-js/main.ts @@ -15,6 +15,7 @@ import json import sys import os import base64 +from pathlib import Path from typing import Union, TypedDict, List, Any, Callable, Literal try: @@ -30,30 +31,56 @@ class InstallEntry(TypedDict): module: str package: str -class FileSystemOperation(TypedDict): - operation: Literal["read", "write", "list", "mkdir", "exists", "remove", "copy"] - path: str - content: Union[str, bytes, None] - encoding: str - destination: Union[str, None] +class SandboxPath: + """Enhanced Path operations for sandbox environment. -def perform_fs_operation(op) -> dict: - """Performs filesystem operations safely within the sandbox environment. - - Supports the following operations: - - read: Reads file content with text or binary encoding - - write: Writes content to a file, creating parent directories if needed - - list: Lists directory contents with metadata (name, type, size, etc) - - mkdir: Creates directories recursively - - exists: Checks if a file or directory exists - - remove: Deletes files or directories (recursive) - - copy: Copies files or directories to a destination path - - Returns: - A dictionary with operation result ('success' boolean and data or 'error' message) + Provides intuitive file operations with automatic handling of common use cases. """ + + @staticmethod + def sandbox(path: str = "") -> Path: + """Get a Path object pointing to the sandbox directory.""" + base = Path("/sandbox") + if path: + return base / path.lstrip("/") + return base + + @staticmethod + def write_json(path: Union[str, Path], data: Any, indent: int = 2) -> None: + """Write JSON data to a file.""" + path_obj = Path(path) if isinstance(path, str) else path + path_obj.parent.mkdir(parents=True, exist_ok=True) + path_obj.write_text(json.dumps(data, indent=indent, ensure_ascii=False)) + + @staticmethod + def read_json(path: Union[str, Path]) -> Any: + """Read JSON data from a file.""" + path_obj = Path(path) if isinstance(path, str) else path + return json.loads(path_obj.read_text()) + + @staticmethod + def write_bytes_b64(path: Union[str, Path], data: bytes) -> None: + """Write binary data to a file.""" + path_obj = Path(path) if isinstance(path, str) else path + path_obj.parent.mkdir(parents=True, exist_ok=True) + encoded = base64.b64encode(data).decode('ascii') + path_obj.with_suffix(path_obj.suffix + '.b64').write_text(encoded) + + @staticmethod + def read_bytes_b64(path: Union[str, Path]) -> bytes: + """Read binary data from a file.""" + path_obj = Path(path) if isinstance(path, str) else path + b64_file = path_obj.with_suffix(path_obj.suffix + '.b64') + if b64_file.exists(): + encoded = b64_file.read_text() + return base64.b64decode(encoded) + raise FileNotFoundError(f"Binary file {path} not found") + +sandbox_path = SandboxPath() + +def perform_fs_operation(op) -> dict: + """Filesystem operation function for file operations.""" try: - # Convert JsProxy to Python dict if needed if hasattr(op, 'to_py'): op = op.to_py() @@ -77,7 +104,6 @@ def perform_fs_operation(op) -> dict: return {"success": False, "error": "File not found"} elif operation == "write": - # Ensure parent directory exists parent_dir = os.path.dirname(path) if parent_dir and not os.path.exists(parent_dir): os.makedirs(parent_dir, exist_ok=True) @@ -144,52 +170,6 @@ def perform_fs_operation(op) -> dict: except Exception as e: return {"success": False, "error": str(e)} -def create_document_store(base_path: str = "/sandbox/documents") -> dict: - """Creates a document store structure for LangChain. - - Sets up a directory structure suitable for LangChain document processing: - - raw: For storing original documents - - processed: For storing processed documents - - embeddings: For storing document embeddings - - metadata: For storing document metadata - - Also creates an index.json file to track documents and collections. - - Args: - base_path: Root directory for the document store - - Returns: - Dictionary with creation status and structure information - """ - try: - os.makedirs(base_path, exist_ok=True) - os.makedirs(f"{base_path}/raw", exist_ok=True) - os.makedirs(f"{base_path}/processed", exist_ok=True) - os.makedirs(f"{base_path}/embeddings", exist_ok=True) - os.makedirs(f"{base_path}/metadata", exist_ok=True) - - # Create index file - index_file = f"{base_path}/index.json" - initial_index = { - "created": datetime.datetime.now().isoformat(), - "version": "1.0", - "documents": {}, - "collections": {}, - "last_updated": datetime.datetime.now().isoformat() - } - - with open(index_file, 'w') as f: - json.dump(initial_index, f, indent=2) - - return { - "success": True, - "base_path": base_path, - "structure": ["raw", "processed", "embeddings", "metadata"], - "index_file": index_file - } - except Exception as e: - return {"success": False, "error": str(e)} - def find_imports_to_install(imports: list[str]) -> list[InstallEntry]: """ Given a list of module names being imported, return a list of dicts @@ -217,84 +197,68 @@ def find_imports_to_install(imports: list[str]) -> list[InstallEntry]: ) return to_install + async def install_imports( source_code_or_imports: Union[str, list[str]], additional_packages: list[str] = [], - message_callback: Callable = lambda event_type, data: None, + message_callback: Callable[ + [ + Literal[ + "failed", + ], + Union[InstallEntry, list[InstallEntry]], + ], + None, + ] = lambda event_type, data: None, ) -> List[InstallEntry]: - """Installs Python packages required for the provided code or import list. - - Takes either: - - Python source code: Analyzes imports using Pyodide's find_imports - - A list of import names: Uses the list directly - - Additionally installs any packages specified in additional_packages. - - Args: - source_code_or_imports: Python code string or list of import names - additional_packages: Extra packages to install regardless of imports - message_callback: Function called with status updates during installation - - Returns: - List of package entries that were installed - """ if isinstance(source_code_or_imports, str): try: imports: list[str] = find_imports(source_code_or_imports) except SyntaxError: - return [] + return else: imports: list[str] = source_code_or_imports to_install = find_imports_to_install(imports) + # Merge with additional packages for package in additional_packages: - if package not in [entry["package"] for entry in to_install]: + if package not in to_install: to_install.append(dict(module=package, package=package)) if to_install: try: - import micropip + import micropip # noqa except ModuleNotFoundError: await pyodide_js.loadPackage("micropip") - import micropip + import micropip # noqa for entry in to_install: try: await micropip.install(entry["package"]) except Exception as e: message_callback("failed", entry["package"]) - break + break # Fail fast return to_install -def load_session_bytes(session_bytes: bytes): - """Loads a serialized session state from bytes. - - Uses dill to restore a previously serialized Python session state, - including all variables, functions and class definitions. - - Args: - session_bytes: Bytes object containing the serialized session - """ +def load_session_bytes(session_bytes: bytes) -> list[str]: + """Load the session module.""" import dill import io + buffer = io.BytesIO(session_bytes.to_py()) dill.session.load_session(filename=buffer) + def dump_session_bytes() -> bytes: - """Serializes the current session state to bytes. - - Uses dill to capture the current Python session state, - including all variables, functions and class definitions. - - Returns: - Bytes object containing the serialized session - """ + """Dump the session module.""" import dill import io + buffer = io.BytesIO() dill.session.dump_session(filename=buffer) return buffer.getvalue() + def robust_serialize(obj): """Recursively converts an arbitrary Python object into a JSON-serializable structure. @@ -307,23 +271,34 @@ def robust_serialize(obj): - For unsupported/unknown objects, a dictionary containing a 'type' indicator and the object's repr is returned. """ + # Base case: primitives that are already JSON-serializable if isinstance(obj, (str, int, float, bool, type(None))): return obj + + # Process lists or tuples recursively. if isinstance(obj, (list, tuple)): return [robust_serialize(item) for item in obj] + + # Process dictionaries. if isinstance(obj, dict): + # Convert keys to strings if necessary and process values recursively. return {str(key): robust_serialize(value) for key, value in obj.items()} + + # Process sets by converting them to lists. if isinstance(obj, (set, frozenset)): return [robust_serialize(item) for item in obj] + + # Process known datetime objects. if isinstance(obj, (datetime.date, datetime.datetime)): return obj.isoformat() + + # Fallback: for objects that are not directly serializable, + # return a dictionary with type indicator and repr. return {"type": "not_serializable", "repr": repr(obj)} + def dumps(result: Any) -> str: - """Serializes a Python object to a JSON string. - - Uses robust_serialize to handle complex Python objects before JSON serialization. - """ + """Get the result of the session.""" result = robust_serialize(result) return json.dumps(result) `; @@ -363,24 +338,6 @@ interface FileSystemOperation { destination?: string; } -interface ExecutionResult { - success: boolean; - stdout: string | null; - stderr: string | null; - result: any; - session?: { - metadata?: SessionMetadata; - bytes?: Uint8Array; - }; - filesystem?: { - info?: { - type: string; - mountPoint: string; - }; - operations?: any[]; - }; -} - async function initPyodide(pyodide: any, options: FileSystemOptions = {}): Promise { const sys = pyodide.pyimport("sys"); const pathlib = pyodide.pyimport("pathlib"); @@ -390,16 +347,14 @@ async function initPyodide(pyodide: any, options: FileSystemOptions = {}): Promi pathlib.Path(dirPath).mkdir(); pathlib.Path(dirPath + "prepare_env.py").write_text(prepareEnvCode); - if (options.enableFileSystem) { - const mountPoint = options.mountPoint || "/sandbox"; - - try { - pyodide.FS.mkdirTree(mountPoint); - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - if (!errorMessage.includes("exists")) { - console.warn(`⚠️ Failed to create mount point ${mountPoint}:`, error); - } + const mountPoint = options.mountPoint || "/sandbox"; + + try { + pyodide.FS.mkdirTree(mountPoint); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + if (!errorMessage.includes("exists")) { + console.warn(`⚠️ Failed to create mount point ${mountPoint}:`, error); } } } @@ -452,8 +407,12 @@ async function runPython( }, }); - await initPyodide(pyodide, options.fileSystemOptions); + await initPyodide(pyodide, { + enableFileSystem: true, + mountPoint: options.fileSystemOptions?.mountPoint || "/sandbox" + }); + // Determine session directory let sessionMetadata: SessionMetadata; if (options.sessionMetadata) { sessionMetadata = JSON.parse(options.sessionMetadata); @@ -471,7 +430,10 @@ async function runPython( console.error("sessionMetadata is required when providing sessionBytes"); return { success: false, error: "sessionMetadata is required when providing sessionBytes" }; } + + // Import our prepared environment module const prepare_env = pyodide.pyimport("prepare_env"); + // Prepare additional packages to install (include dill) const defaultPackages = options.stateful ? ["dill"] : []; const additionalPackagesToInstall = options.sessionBytes ? [...new Set([...defaultPackages, ...sessionMetadata.packages])] @@ -490,15 +452,21 @@ async function runPython( ); if (installErrors.length > 0) { + // Restore the original console.log function console.log = originalLog; return { success: false, - error: `Failed to install required Python packages: ${installErrors.join(", ")}.` + error: `Failed to install required Python packages: ${installErrors.join(", ")}. ` + + `This is likely because these packages are not available in the Pyodide environment. ` + + `Pyodide is a Python runtime that runs in the browser and has a limited set of ` + + `pre-built packages. You may need to use alternative packages that are compatible ` + + `with Pyodide.` }; } if (options.sessionBytes) { sessionData = Uint8Array.from(JSON.parse(options.sessionBytes)); + // Run session preamble await prepare_env.load_session_bytes(sessionData); } @@ -509,21 +477,24 @@ async function runPython( const packages = installedPackages.map((pkg: any) => pkg.get("package")); + // Restore the original console.log function console.log = originalLog; - + // Run the Python code const rawValue = await pyodide.runPythonAsync(pythonCode); - + // Dump result to string const jsonValue = await prepare_env.dumps(rawValue); + // Update session metadata with installed packages sessionMetadata.packages = [ ...new Set([...sessionMetadata.packages, ...packages]), ]; sessionMetadata.lastModified = new Date().toISOString(); if (options.stateful) { + // Save session state to sessionBytes sessionData = await prepare_env.dump_session_bytes() as Uint8Array; - } - + }; + // Return the result with stdout and stderr output const result: PyodideResult = { success: true, result: rawValue, @@ -538,19 +509,17 @@ async function runPython( result["sessionBytes"] = sessionData; } - if (options.fileSystemOptions?.enableFileSystem) { - result["fileSystemInfo"] = { - type: "memfs", - mountPoint: options.fileSystemOptions.mountPoint || "/sandbox", - }; - } + result["fileSystemInfo"] = { + type: "memfs", + mountPoint: options.fileSystemOptions?.mountPoint || "/sandbox", + }; return result; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); return { success: false, - error: errorMessage, + error: errorMessage, // No errorMessage conversion needed stdout: output, stderr: err_output }; @@ -568,15 +537,14 @@ async function main(): Promise { s: "stateful", b: "session-bytes", m: "session-metadata", - "fs": "fs-operations", - "mp": "mount-point", + fs: "fs-operations", + mp: "mount-point", }, - boolean: ["help", "version", "stateful", "enable-filesystem"], + boolean: ["help", "version", "stateful"], default: { help: false, version: false, stateful: false, - "enable-filesystem": false, "mount-point": "/sandbox" }, }); @@ -589,15 +557,14 @@ Run Python code in a sandboxed environment using Pyodide OPTIONS: -c, --code Python code to execute -f, --file Path to Python file to execute - -s, --stateful Use a stateful session + -s, --stateful Use a stateful session -b, --session-bytes Session bytes -m, --session-metadata Session metadata - --enable-filesystem Enable filesystem operations (MEMFS) - --fs-operations JSON array of filesystem operations - --mount-point Mount point path (default: /sandbox) + -fs, --fs-operations JSON array of filesystem operations + -mp, --mount-point Mount point path (default: /sandbox) -h, --help Display help -V, --version Display version -`); +`); return; } @@ -612,7 +579,6 @@ OPTIONS: stateful: flags.stateful, sessionBytes: flags["session-bytes"], sessionMetadata: flags["session-metadata"], - enableFileSystem: flags["enable-filesystem"], fsOperations: flags["fs-operations"], mountPoint: flags["mount-point"], }; @@ -622,10 +588,12 @@ OPTIONS: Deno.exit(1); } + // Get Python code from file or command line argument let pythonCode = ""; if (options.file) { try { + // Resolve relative or absolute file path const filePath = options.file.startsWith("/") ? options.file : join(Deno.cwd(), options.file); @@ -636,6 +604,7 @@ OPTIONS: Deno.exit(1); } } else { + // Process code from command line (replacing escaped newlines) pythonCode = options.code?.replace(/\\n/g, "\n") ?? ""; } @@ -654,44 +623,34 @@ OPTIONS: sessionBytes: options.sessionBytes, sessionMetadata: options.sessionMetadata, fileSystemOptions: { - enableFileSystem: options.enableFileSystem, + enableFileSystem: true, // Always enabled mountPoint: options.mountPoint, }, fileSystemOperations: fileSystemOperations, }); - const executionResult: ExecutionResult = { - success: result.success, + const outputJson = { stdout: result.stdout?.join('') || null, stderr: result.success ? (result.stderr?.join('') || null) : result.error || null, result: result.success ? JSON.parse(result.jsonResult || 'null') : null, + success: result.success, + sessionBytes: result.sessionBytes, + sessionMetadata: result.sessionMetadata, + fileSystemInfo: result.fileSystemInfo, + fileSystemOperations: result.fileSystemOperations, }; - // Only include session data if stateful execution was used - if (result.sessionBytes || result.sessionMetadata) { - executionResult.session = { - metadata: result.sessionMetadata, - ...(result.sessionBytes ? { bytes: result.sessionBytes } : {}) - }; - } - - // Only include filesystem information if filesystem was enabled - if (options.enableFileSystem) { - executionResult.filesystem = { - info: result.fileSystemInfo, - // Only include operations results if operations were actually performed - ...(fileSystemOperations.length > 0 ? { operations: result.fileSystemOperations } : {}) - }; - } - - console.log(JSON.stringify(executionResult)); + console.log(JSON.stringify(outputJson)); if (!result.success) { Deno.exit(1); } } +// If this module is run directly if (import.meta.main) { + // Override the global environment variables that Deno's permission prompts look for + // to suppress color-related permission prompts main().catch((err) => { console.error("Unhandled error:", err); Deno.exit(1); diff --git a/libs/pyodide-sandbox-js/main_test.ts b/libs/pyodide-sandbox-js/main_test.ts index c9d8cb4..04d3fd3 100644 --- a/libs/pyodide-sandbox-js/main_test.ts +++ b/libs/pyodide-sandbox-js/main_test.ts @@ -1,7 +1,6 @@ -import { assert, assertEquals, assertNotEquals, assertExists } from "@std/assert"; +import { assertEquals, assertNotEquals, assertExists } from "@std/assert"; import { runPython, type FileSystemOperation } from "./main.ts"; - Deno.test("runPython simple test", async () => { const result = await runPython("x = 2 + 3; x", {}); assertEquals(result.success, true); @@ -39,198 +38,135 @@ Deno.test("runPython with error - name error", async () => { assertEquals(result.error?.includes("NameError"), true); }); - -Deno.test("filesystem - basic operations", async () => { +Deno.test("filesystem - write and read text file", async () => { const operations: FileSystemOperation[] = [ - { operation: "mkdir", path: "/sandbox/test", encoding: "utf-8" }, - { operation: "write", path: "/sandbox/test/hello.txt", content: "Hello, MEMFS!", encoding: "utf-8" }, - { operation: "read", path: "/sandbox/test/hello.txt", encoding: "utf-8" }, - { operation: "list", path: "/sandbox/test", encoding: "utf-8" } + { + operation: "write", + path: "/sandbox/test.txt", + content: "Hello, World!", + } ]; - + const result = await runPython(` -import os - -# Test file and directory operations -if os.path.exists("/sandbox/test/hello.txt"): - with open("/sandbox/test/hello.txt", "r") as f: - content = f.read() - -# Return file info -{ - "file_exists": os.path.exists("/sandbox/test/hello.txt"), - "dir_exists": os.path.exists("/sandbox/test"), - "content": content if 'content' in locals() else None -} +with open("/sandbox/test.txt", "r") as f: + content = f.read() +content `, { - fileSystemOptions: { enableFileSystem: true }, fileSystemOperations: operations }); - - assertEquals(result.success, true); - assertEquals(result.fileSystemOperations?.length, 4); - // Check operations results - assertEquals(result.fileSystemOperations?.[0].success, true); // mkdir - assertEquals(result.fileSystemOperations?.[1].success, true); // write - assertEquals(result.fileSystemOperations?.[2].success, true); // read - assertEquals(result.fileSystemOperations?.[2].content, "Hello, MEMFS!"); // read content - - // Check list operation - const listResult = result.fileSystemOperations?.[3]; - assertEquals(listResult.success, true); - assertEquals(listResult.items.length, 1); - assertEquals(listResult.items[0].name, "hello.txt"); - - // Verify Python code could access the files - const resultData = JSON.parse(result.jsonResult || "null"); - assertEquals(resultData.file_exists, true); - assertEquals(resultData.dir_exists, true); - assertEquals(resultData.content, "Hello, MEMFS!"); + assertEquals(result.success, true); + assertEquals(JSON.parse(result.jsonResult || "null"), "Hello, World!"); }); -Deno.test("filesystem - document store creation", async () => { +Deno.test("filesystem - directory operations", async () => { + const operations: FileSystemOperation[] = [ + { + operation: "mkdir", + path: "/sandbox/testdir", + }, + { + operation: "write", + path: "/sandbox/testdir/file.txt", + content: "File in directory", + } + ]; + const result = await runPython(` import os -from prepare_env import create_document_store - -store_result = create_document_store("/sandbox/docs") - -# Verify structure was created -expected_dirs = ["raw", "processed", "embeddings", "metadata"] -all_exist = True -for dir_name in expected_dirs: - path = f"/sandbox/docs/{dir_name}" - if not os.path.exists(path): - all_exist = False - break - -{ - "success": store_result["success"], - "base_path": store_result["base_path"], - "dirs_exist": all_exist, - "index_exists": os.path.exists("/sandbox/docs/index.json") -} +dir_exists = os.path.isdir("/sandbox/testdir") +file_path = "/sandbox/testdir/file.txt" +file_exists = os.path.exists(file_path) +content = open(file_path).read() if file_exists else "" +{"dir_exists": dir_exists, "file_exists": file_exists, "content": content} `, { - fileSystemOptions: { enableFileSystem: true } + fileSystemOperations: operations }); - + assertEquals(result.success, true); - const storeResult = JSON.parse(result.jsonResult || "null"); - assertEquals(storeResult.success, true); - assertEquals(storeResult.base_path, "/sandbox/docs"); - assertEquals(storeResult.dirs_exist, true); - assertEquals(storeResult.index_exists, true); + const resultObj = JSON.parse(result.jsonResult || "null"); + assertEquals(resultObj.dir_exists, true); + assertEquals(resultObj.file_exists, true); + assertEquals(resultObj.content, "File in directory"); }); -Deno.test("filesystem - file manipulation workflow", async () => { - const setupOps: FileSystemOperation[] = [ - { operation: "mkdir", path: "/sandbox/workflow", encoding: "utf-8" }, - { operation: "write", path: "/sandbox/workflow/doc1.txt", content: "Document one content", encoding: "utf-8" }, - { operation: "write", path: "/sandbox/workflow/doc2.txt", content: "Document two content", encoding: "utf-8" }, +Deno.test("filesystem - list directory contents", async () => { + const operations: FileSystemOperation[] = [ + { + operation: "mkdir", + path: "/sandbox/listdir", + }, + { + operation: "write", + path: "/sandbox/listdir/file1.txt", + content: "File 1", + }, + { + operation: "write", + path: "/sandbox/listdir/file2.txt", + content: "File 2", + } ]; - + const result = await runPython(` import os - -processed_dir = "/sandbox/workflow/processed" -os.makedirs(processed_dir, exist_ok=True) - -documents = [] -for filename in os.listdir("/sandbox/workflow"): - if filename.endswith('.txt'): - filepath = os.path.join("/sandbox/workflow", filename) - - with open(filepath, 'r') as f: - content = f.read() - - word_count = len(content.split()) - processed_content = content.lower().strip() - - processed_path = os.path.join(processed_dir, f"processed_{filename}") - with open(processed_path, 'w') as f: - f.write(processed_content) - - backup_path = os.path.join("/sandbox/workflow", f"backup_{filename}") - - with open(backup_path, 'w') as f: - f.write(content) - - documents.append({ - "source": filename, - "word_count": word_count, - "processed_path": processed_path, - "backup_path": backup_path - }) - -{ - "total_documents": len(documents), - "documents": documents, - "processed_files": os.listdir(processed_dir) -} +files = os.listdir("/sandbox/listdir") +sorted(files) `, { - fileSystemOptions: { enableFileSystem: true }, - fileSystemOperations: setupOps + fileSystemOperations: operations }); - - assertEquals(result.success, true); - const summary = JSON.parse(result.jsonResult || "null"); - assertEquals(summary.total_documents, 2); - assertEquals(summary.documents.length, 2); - assertEquals(summary.processed_files.length, 2); - for (const doc of summary.documents) { - assert(doc.word_count > 0); - assert(doc.processed_path.includes("processed_")); - assert(doc.backup_path.includes("backup_")); - } + assertEquals(result.success, true); + assertEquals(JSON.parse(result.jsonResult || "null"), ["file1.txt", "file2.txt"]); }); -Deno.test("filesystem - copy operations", async () => { +Deno.test("filesystem - custom mount point", async () => { const operations: FileSystemOperation[] = [ - { operation: "mkdir", path: "/sandbox/copy_test", encoding: "utf-8" }, - { operation: "write", path: "/sandbox/copy_test/original.txt", content: "Original content", encoding: "utf-8" }, - { operation: "copy", path: "/sandbox/copy_test/original.txt", destination: "/sandbox/copy_test/copy.txt", encoding: "utf-8" }, - { operation: "read", path: "/sandbox/copy_test/copy.txt", encoding: "utf-8" }, - { operation: "list", path: "/sandbox/copy_test", encoding: "utf-8" } + { + operation: "write", + path: "/customdir/test.txt", + content: "Custom mount point", + } ]; - + const result = await runPython(` import os -print("Testing copy operations") -print("Files in copy_test:", os.listdir("/sandbox/copy_test")) - -with open("/sandbox/copy_test/original.txt", "r") as f_orig: - original_content = f_orig.read() - -with open("/sandbox/copy_test/copy.txt", "r") as f_copy: - copy_content = f_copy.read() - -{ - "files": os.listdir("/sandbox/copy_test"), - "original_content": original_content, - "copy_content": copy_content, - "match": original_content == copy_content -} +path = "/customdir/test.txt" +exists = os.path.exists(path) +content = open(path).read() if exists else "" +{"exists": exists, "content": content} `, { - fileSystemOptions: { enableFileSystem: true }, + fileSystemOptions: { mountPoint: "/customdir" }, fileSystemOperations: operations }); - + assertEquals(result.success, true); - assertEquals(result.fileSystemOperations?.length, 5); + const resultObj = JSON.parse(result.jsonResult || "null"); + assertEquals(resultObj.exists, true); + assertEquals(resultObj.content, "Custom mount point"); +}); + +Deno.test("filesystem - binary file operations with explicit encoding", async () => { + // Create binary data as base64 string + const binaryContent = "QmluYXJ5IGRhdGE="; // base64 for "Binary data" - // Check copy operation success - assertEquals(result.fileSystemOperations?.[2].success, true); + const operations: FileSystemOperation[] = [ + { + operation: "write", + path: "/sandbox/explicit.bin", + content: binaryContent, + encoding: "binary" // Explicitly set binary encoding + } + ]; - // Verify copied file has the same content - const readResult = result.fileSystemOperations?.[3]; - assertEquals(readResult.success, true); - assertEquals(readResult.content, "Original content"); + const result = await runPython(` +with open("/sandbox/explicit.bin", "rb") as f: + content = f.read() +content.decode('utf-8') # Should be "Binary data" + `, { + fileSystemOperations: operations + }); - // Check verification from Python side - const resultData = JSON.parse(result.jsonResult || "null"); - assertEquals(resultData.files.length, 2); - assertEquals(resultData.match, true); - assertEquals(resultData.original_content, resultData.copy_content); + assertEquals(result.success, true); + assertEquals(JSON.parse(result.jsonResult || "null"), "Binary data"); }); From b5038cb4a06e5a597ed7230c763e03f3feeee3d7 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Mon, 26 May 2025 12:35:25 -0300 Subject: [PATCH 04/27] fix: conventions --- libs/sandbox-py/langchain_sandbox/pyodide.py | 63 +++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index 431f764..cab7a12 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -520,7 +520,68 @@ def execute( class PyodideSandboxTool(BaseTool): - """Tool for running python code in a PyodideSandbox.""" + """Tool for running python code in a PyodideSandbox. + + If you use a stateful sandbox (PyodideSandboxTool(stateful=True)), + the state between code executions (to variables, imports, + and definitions, etc.), will be persisted using LangGraph checkpointer. + + !!! important + When you use a stateful sandbox, this tool can only be used + inside a LangGraph graph with a checkpointer, and + has to be used with the prebuilt `create_react_agent` or `ToolNode`. + + Example: stateless sandbox usage + + ```python + from langgraph.prebuilt import create_react_agent + from langchain_sandbox import PyodideSandboxTool + + tool = PyodideSandboxTool(allow_net=True) + agent = create_react_agent( + "anthropic:claude-3-7-sonnet-latest", + tools=[tool], + ) + result = await agent.ainvoke( + {"messages": [{"role": "user", "content": "what's 5 + 7?"}]}, + ) + ``` + + Example: stateful sandbox usage + + ```python + from langgraph.prebuilt import create_react_agent + from langgraph.prebuilt.chat_agent_executor import AgentState + from langgraph.checkpoint.memory import InMemorySaver + from langchain_sandbox import PyodideSandboxTool, PyodideSandbox + + class State(AgentState): + session_bytes: bytes + session_metadata: dict + + tool = PyodideSandboxTool(stateful=True, allow_net=True) + agent = create_react_agent( + "anthropic:claude-3-7-sonnet-latest", + tools=[tool], + checkpointer=InMemorySaver(), + state_schema=State + ) + result = await agent.ainvoke( + { + "messages": [ + {"role": "user", "content": "what's 5 + 7? save result as 'a'"} + ], + "session_bytes": None, + "session_metadata": None + }, + config={"configurable": {"thread_id": "123"}}, + ) + second_result = await agent.ainvoke( + {"messages": [{"role": "user", "content": "what's the sine of 'a'?"}]}, + config={"configurable": {"thread_id": "123"}}, + ) + ``` + """ name: str = "python_code_sandbox" description: str = ( From bb115d3d349474d443d9007bf224612553872fe4 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Mon, 26 May 2025 12:39:27 -0300 Subject: [PATCH 05/27] fix: _build_command place convention --- libs/sandbox-py/langchain_sandbox/pyodide.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index cab7a12..aafdcb2 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -353,14 +353,15 @@ async def execute( result = None status: Literal["success", "error"] = "success" - try: - cmd = self._build_command( + cmd = self._build_command( code, session_bytes=session_bytes, session_metadata=session_metadata, memory_limit_mb=memory_limit_mb, ) + try: + # Create and run the subprocess process = await asyncio.create_subprocess_exec( *cmd, From f4584dc4b784e6cb9a07da0363956a5acb752ca0 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Tue, 27 May 2025 18:08:15 -0300 Subject: [PATCH 06/27] fix: better filesystem operations and file handling --- libs/pyodide-sandbox-js/main.ts | 370 +++++---- libs/pyodide-sandbox-js/main_test.ts | 263 ++++-- libs/sandbox-py/langchain_sandbox/pyodide.py | 809 +++++++++---------- 3 files changed, 803 insertions(+), 639 deletions(-) diff --git a/libs/pyodide-sandbox-js/main.ts b/libs/pyodide-sandbox-js/main.ts index cb0668d..9b0b5de 100644 --- a/libs/pyodide-sandbox-js/main.ts +++ b/libs/pyodide-sandbox-js/main.ts @@ -31,53 +31,6 @@ class InstallEntry(TypedDict): module: str package: str -class SandboxPath: - """Enhanced Path operations for sandbox environment. - - Provides intuitive file operations with automatic handling of common use cases. - """ - - @staticmethod - def sandbox(path: str = "") -> Path: - """Get a Path object pointing to the sandbox directory.""" - base = Path("/sandbox") - if path: - return base / path.lstrip("/") - return base - - @staticmethod - def write_json(path: Union[str, Path], data: Any, indent: int = 2) -> None: - """Write JSON data to a file.""" - path_obj = Path(path) if isinstance(path, str) else path - path_obj.parent.mkdir(parents=True, exist_ok=True) - path_obj.write_text(json.dumps(data, indent=indent, ensure_ascii=False)) - - @staticmethod - def read_json(path: Union[str, Path]) -> Any: - """Read JSON data from a file.""" - path_obj = Path(path) if isinstance(path, str) else path - return json.loads(path_obj.read_text()) - - @staticmethod - def write_bytes_b64(path: Union[str, Path], data: bytes) -> None: - """Write binary data to a file.""" - path_obj = Path(path) if isinstance(path, str) else path - path_obj.parent.mkdir(parents=True, exist_ok=True) - encoded = base64.b64encode(data).decode('ascii') - path_obj.with_suffix(path_obj.suffix + '.b64').write_text(encoded) - - @staticmethod - def read_bytes_b64(path: Union[str, Path]) -> bytes: - """Read binary data from a file.""" - path_obj = Path(path) if isinstance(path, str) else path - b64_file = path_obj.with_suffix(path_obj.suffix + '.b64') - if b64_file.exists(): - encoded = b64_file.read_text() - return base64.b64decode(encoded) - raise FileNotFoundError(f"Binary file {path} not found") - -sandbox_path = SandboxPath() - def perform_fs_operation(op) -> dict: """Filesystem operation function for file operations.""" try: @@ -101,7 +54,7 @@ def perform_fs_operation(op) -> dict: content = f.read() return {"success": True, "content": content, "is_binary": False} else: - return {"success": False, "error": "File not found"} + return {"success": False, "error": f"File not found: {path}"} elif operation == "write": parent_dir = os.path.dirname(path) @@ -115,7 +68,12 @@ def perform_fs_operation(op) -> dict: else: with open(path, "w", encoding=encoding) as f: f.write(content) - return {"success": True} + + exists = os.path.exists(path) + if exists: + return {"success": True} + else: + return {"success": False, "error": f"Failed to create file at {path}"} elif operation == "list": if os.path.exists(path): @@ -132,14 +90,19 @@ def perform_fs_operation(op) -> dict: }) return {"success": True, "items": items} else: - return {"success": False, "error": "Directory not found"} + return {"success": False, "error": f"Directory not found: {path}"} elif operation == "mkdir": - os.makedirs(path, exist_ok=True) - return {"success": True} + try: + os.makedirs(path, exist_ok=True) + exists = os.path.exists(path) + return {"success": exists, "error": None if exists else "Failed to create directory"} + except Exception as e: + return {"success": False, "error": f"Error creating directory: {str(e)}"} elif operation == "exists": - return {"success": True, "exists": os.path.exists(path)} + exists = os.path.exists(path) + return {"success": True, "exists": exists} elif operation == "remove": if os.path.exists(path): @@ -150,7 +113,7 @@ def perform_fs_operation(op) -> dict: shutil.rmtree(path) return {"success": True} else: - return {"success": False, "error": "Path not found"} + return {"success": False, "error": f"Path not found for removal: {path}"} elif operation == "copy": if not destination: @@ -163,7 +126,7 @@ def perform_fs_operation(op) -> dict: shutil.copytree(path, destination, dirs_exist_ok=True) return {"success": True} else: - return {"success": False, "error": "Source path not found"} + return {"success": False, "error": f"Source path not found for copy: {path}"} else: return {"success": False, "error": f"Unknown operation: {operation}"} @@ -197,7 +160,6 @@ def find_imports_to_install(imports: list[str]) -> list[InstallEntry]: ) return to_install - async def install_imports( source_code_or_imports: Union[str, list[str]], additional_packages: list[str] = [], @@ -215,14 +177,14 @@ async def install_imports( try: imports: list[str] = find_imports(source_code_or_imports) except SyntaxError: - return + return [] else: imports: list[str] = source_code_or_imports to_install = find_imports_to_install(imports) # Merge with additional packages for package in additional_packages: - if package not in to_install: + if package not in [entry["package"] for entry in to_install]: to_install.append(dict(module=package, package=package)) if to_install: @@ -248,7 +210,6 @@ def load_session_bytes(session_bytes: bytes) -> list[str]: buffer = io.BytesIO(session_bytes.to_py()) dill.session.load_session(filename=buffer) - def dump_session_bytes() -> bytes: """Dump the session module.""" import dill @@ -258,7 +219,6 @@ def dump_session_bytes() -> bytes: dill.session.dump_session(filename=buffer) return buffer.getvalue() - def robust_serialize(obj): """Recursively converts an arbitrary Python object into a JSON-serializable structure. @@ -296,7 +256,6 @@ def robust_serialize(obj): # return a dictionary with type indicator and repr. return {"type": "not_serializable", "repr": repr(obj)} - def dumps(result: Any) -> str: """Get the result of the session.""" result = robust_serialize(result) @@ -311,7 +270,6 @@ interface SessionMetadata { interface FileSystemOptions { enableFileSystem?: boolean; - mountPoint?: string; } interface PyodideResult { @@ -327,6 +285,8 @@ interface PyodideResult { fileSystemInfo?: { type: "memfs"; mountPoint: string; + workingDirectory: string; + mounted: boolean; }; } @@ -338,6 +298,60 @@ interface FileSystemOperation { destination?: string; } + +function resolvePathInSandbox( + inputPath: string, + mountPoint: string = "/sandbox" +): string { + // Se já é absoluto, retorna como está + if (inputPath.startsWith("/")) { + return inputPath; + } + + // Resolve direto no mount point + if (inputPath.startsWith("./")) { + const cleanPath = inputPath.substring(2); + return `${mountPoint}/${cleanPath}`.replace(/\/+/g, "/"); + } else if (inputPath.startsWith("../")) { + return `${mountPoint}/${inputPath}`.replace(/\/+/g, "/"); + } else { + return `${mountPoint}/${inputPath}`.replace(/\/+/g, "/"); + } +} + +/** + * Setup memory filesystem environment in Python + */ +function setupFileSystem(pyodide: any): void { + const mountPoint = "/sandbox"; + + pyodide.runPython(` +import os +import sys + +# Setup memory filesystem environment +MOUNT_POINT = "${mountPoint}" + +# Ensure directory exists +os.makedirs(MOUNT_POINT, exist_ok=True) + +# Change to mount point +os.chdir(MOUNT_POINT) + +# Make variables available globally +sys.modules['__main__'].MOUNT_POINT = MOUNT_POINT + +# Add helper function for path resolution +def resolve_path(path): + """Resolve a path relative to the sandbox""" + if path.startswith("/"): + return path + return os.path.join(MOUNT_POINT, path) + +sys.modules['__main__'].resolve_path = resolve_path + `); +} + async function initPyodide(pyodide: any, options: FileSystemOptions = {}): Promise { const sys = pyodide.pyimport("sys"); const pathlib = pyodide.pyimport("pathlib"); @@ -347,33 +361,86 @@ async function initPyodide(pyodide: any, options: FileSystemOptions = {}): Promi pathlib.Path(dirPath).mkdir(); pathlib.Path(dirPath + "prepare_env.py").write_text(prepareEnvCode); - const mountPoint = options.mountPoint || "/sandbox"; - - try { - pyodide.FS.mkdirTree(mountPoint); - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - if (!errorMessage.includes("exists")) { - console.warn(`⚠️ Failed to create mount point ${mountPoint}:`, error); + // Initialize filesystem if enabled + if (options.enableFileSystem) { + // Ensure sandbox mount point exists + try { + pyodide.FS.mkdirTree("/sandbox"); + } catch (e) { + // Directory might already exist, which is fine } + + setupFileSystem(pyodide); } } async function performFileSystemOperations( pyodide: any, - operations: FileSystemOperation[] + operations: FileSystemOperation[], + options: FileSystemOptions = {} ): Promise { const results: any[] = []; + + // Ensure sandbox mount point exists + try { + pyodide.FS.mkdirTree("/sandbox"); + } catch (e) { + // Directory might already exist, which is fine + } + const prepare_env = pyodide.pyimport("prepare_env"); for (const op of operations) { try { - const result = prepare_env.perform_fs_operation(op); - const jsResult = result.toJs({ dict_converter: Object.fromEntries }); - results.push(jsResult); + // Resolve paths using sandbox resolution + const resolvedPath = resolvePathInSandbox(op.path, "/sandbox"); + let resolvedDestination: string | undefined; + + if (op.operation === "copy" && op.destination) { + resolvedDestination = resolvePathInSandbox(op.destination, "/sandbox"); + } + + // Create resolved operation + const resolvedOp = { + ...op, + path: resolvedPath, + ...(resolvedDestination && { destination: resolvedDestination }) + }; + + // Handle binary write operations + if (op.operation === "write" && typeof op.content === "string") { + if (op.encoding === "binary") { + const result = await prepare_env.perform_fs_operation(resolvedOp); + results.push(result.toJs()); + continue; + } + + // Use pyodide.FS for text writes (better performance) + try { + const parentDir = resolvedPath.substring(0, resolvedPath.lastIndexOf("/")); + if (parentDir) { + pyodide.FS.mkdirTree(parentDir); + } + pyodide.FS.writeFile(resolvedPath, op.content, { encoding: op.encoding || "utf8" }); + results.push({ success: true, operation: op.operation, path: resolvedPath }); + continue; + } catch { + // Fallback to Python method if pyodide.FS fails + } + } + + // Use Python method for other operations + const result = await prepare_env.perform_fs_operation(resolvedOp); + results.push(result.toJs()); + } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); - results.push({ success: false, error: errorMessage }); + results.push({ + success: false, + error: errorMessage, + operation: op.operation, + path: op.path, + }); } } @@ -399,20 +466,34 @@ async function runPython( const pyodide = await loadPyodide({ stdout: (msg) => output.push(msg), stderr: (msg) => err_output.push(msg), - }) + }); + await pyodide.loadPackage(["micropip"], { messageCallback: () => {}, errorCallback: (msg: string) => { output.push(`install error: ${msg}`) }, }); - - await initPyodide(pyodide, { - enableFileSystem: true, - mountPoint: options.fileSystemOptions?.mountPoint || "/sandbox" - }); - // Determine session directory + // Auto-enable filesystem if operations are provided or explicitly enabled + const shouldEnableFileSystem = + options.fileSystemOperations?.length > 0 || + options.fileSystemOptions?.enableFileSystem || + // Detect file operations in Python code + (pythonCode.includes("open(") || + pythonCode.includes("with open") || + pythonCode.includes("os.") || + pythonCode.includes("pathlib") || + pythonCode.includes("Path(")); + + const fsOptions: FileSystemOptions = { + enableFileSystem: shouldEnableFileSystem, + ...options.fileSystemOptions + }; + + await initPyodide(pyodide, fsOptions); + + // Determine session metadata let sessionMetadata: SessionMetadata; if (options.sessionMetadata) { sessionMetadata = JSON.parse(options.sessionMetadata); @@ -423,17 +504,18 @@ async function runPython( packages: [], }; } - let sessionData: Uint8Array | null = null; - if (options.sessionBytes && !options.sessionMetadata) { - console.error("sessionMetadata is required when providing sessionBytes"); - return { success: false, error: "sessionMetadata is required when providing sessionBytes" }; - } - - // Import our prepared environment module + // Import prepared environment module const prepare_env = pyodide.pyimport("prepare_env"); - // Prepare additional packages to install (include dill) + + // Execute filesystem operations before Python code + let fileSystemResults: any[] = []; + if (options.fileSystemOperations && options.fileSystemOperations.length > 0) { + fileSystemResults = await performFileSystemOperations(pyodide, options.fileSystemOperations, fsOptions); + } + + // Prepare packages to install const defaultPackages = options.stateful ? ["dill"] : []; const additionalPackagesToInstall = options.sessionBytes ? [...new Set([...defaultPackages, ...sessionMetadata.packages])] @@ -452,7 +534,6 @@ async function runPython( ); if (installErrors.length > 0) { - // Restore the original console.log function console.log = originalLog; return { success: false, @@ -466,35 +547,28 @@ async function runPython( if (options.sessionBytes) { sessionData = Uint8Array.from(JSON.parse(options.sessionBytes)); - // Run session preamble await prepare_env.load_session_bytes(sessionData); } - let fileSystemResults: any[] = []; - if (options.fileSystemOperations) { - fileSystemResults = await performFileSystemOperations(pyodide, options.fileSystemOperations); - } - const packages = installedPackages.map((pkg: any) => pkg.get("package")); - // Restore the original console.log function console.log = originalLog; - // Run the Python code + + // Execute Python code const rawValue = await pyodide.runPythonAsync(pythonCode); - // Dump result to string const jsonValue = await prepare_env.dumps(rawValue); - // Update session metadata with installed packages + // Update session metadata sessionMetadata.packages = [ ...new Set([...sessionMetadata.packages, ...packages]), ]; sessionMetadata.lastModified = new Date().toISOString(); if (options.stateful) { - // Save session state to sessionBytes sessionData = await prepare_env.dump_session_bytes() as Uint8Array; - }; - // Return the result with stdout and stderr output + } + + // Build result const result: PyodideResult = { success: true, result: rawValue, @@ -502,24 +576,28 @@ async function runPython( stdout: output, stderr: err_output, sessionMetadata: sessionMetadata, - fileSystemOperations: fileSystemResults, }; - + if (options.stateful && sessionData) { result["sessionBytes"] = sessionData; } - result["fileSystemInfo"] = { - type: "memfs", - mountPoint: options.fileSystemOptions?.mountPoint || "/sandbox", - }; - + // Add filesystem info if enabled + if (fsOptions.enableFileSystem) { + result["fileSystemOperations"] = fileSystemResults; + result["fileSystemInfo"] = { + type: "memfs", + mountPoint: "/sandbox", + workingDirectory: "", + mounted: true + }; + } + return result; - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); + } catch (error: any) { return { success: false, - error: errorMessage, // No errorMessage conversion needed + error: error.message, stdout: output, stderr: err_output }; @@ -528,7 +606,7 @@ async function runPython( async function main(): Promise { const flags = parseArgs(Deno.args, { - string: ["code", "file", "session-bytes", "session-metadata", "fs-operations", "mount-point"], + string: ["code", "file", "session-bytes", "session-metadata", "fs-operations"], alias: { c: "code", f: "file", @@ -538,14 +616,12 @@ async function main(): Promise { b: "session-bytes", m: "session-metadata", fs: "fs-operations", - mp: "mount-point", }, boolean: ["help", "version", "stateful"], default: { help: false, version: false, - stateful: false, - "mount-point": "/sandbox" + stateful: false }, }); @@ -555,15 +631,14 @@ pyodide-sandbox ${pkgVersion} Run Python code in a sandboxed environment using Pyodide OPTIONS: - -c, --code Python code to execute - -f, --file Path to Python file to execute - -s, --stateful Use a stateful session - -b, --session-bytes Session bytes - -m, --session-metadata Session metadata - -fs, --fs-operations JSON array of filesystem operations - -mp, --mount-point Mount point path (default: /sandbox) - -h, --help Display help - -V, --version Display version + -c, --code Python code to execute + -f, --file Path to Python file to execute + -s, --stateful Use a stateful session + -b, --session-bytes Session bytes + -m, --session-metadata Session metadata + -fs, --fs-operations JSON array of filesystem operations + -h, --help Display help + -V, --version Display version `); return; } @@ -580,7 +655,6 @@ OPTIONS: sessionBytes: flags["session-bytes"], sessionMetadata: flags["session-metadata"], fsOperations: flags["fs-operations"], - mountPoint: flags["mount-point"], }; if (!options.code && !options.file) { @@ -588,12 +662,10 @@ OPTIONS: Deno.exit(1); } - // Get Python code from file or command line argument let pythonCode = ""; if (options.file) { try { - // Resolve relative or absolute file path const filePath = options.file.startsWith("/") ? options.file : join(Deno.cwd(), options.file); @@ -604,7 +676,6 @@ OPTIONS: Deno.exit(1); } } else { - // Process code from command line (replacing escaped newlines) pythonCode = options.code?.replace(/\\n/g, "\n") ?? ""; } @@ -618,28 +689,40 @@ OPTIONS: } } - const result = await runPython(pythonCode, { + const runOptions: any = { stateful: options.stateful, sessionBytes: options.sessionBytes, sessionMetadata: options.sessionMetadata, - fileSystemOptions: { - enableFileSystem: true, // Always enabled - mountPoint: options.mountPoint, - }, - fileSystemOperations: fileSystemOperations, - }); + }; - const outputJson = { + // Enable filesystem if operations are provided + if (fileSystemOperations.length > 0) { + runOptions.fileSystemOptions = { + enableFileSystem: true, + }; + runOptions.fileSystemOperations = fileSystemOperations; + } + + const result = await runPython(pythonCode, runOptions); + + // Output result + const outputJson: any = { stdout: result.stdout?.join('') || null, stderr: result.success ? (result.stderr?.join('') || null) : result.error || null, result: result.success ? JSON.parse(result.jsonResult || 'null') : null, success: result.success, sessionBytes: result.sessionBytes, sessionMetadata: result.sessionMetadata, - fileSystemInfo: result.fileSystemInfo, - fileSystemOperations: result.fileSystemOperations, }; + // Include filesystem info if used + if (result.fileSystemInfo) { + outputJson.fileSystemInfo = result.fileSystemInfo; + } + if (result.fileSystemOperations) { + outputJson.fileSystemOperations = result.fileSystemOperations; + } + console.log(JSON.stringify(outputJson)); if (!result.success) { @@ -647,14 +730,11 @@ OPTIONS: } } -// If this module is run directly if (import.meta.main) { - // Override the global environment variables that Deno's permission prompts look for - // to suppress color-related permission prompts main().catch((err) => { console.error("Unhandled error:", err); Deno.exit(1); }); } -export { runPython, type FileSystemOperation, type FileSystemOptions }; \ No newline at end of file +export { runPython, resolvePathInSandbox, type FileSystemOperation, type FileSystemOptions }; \ No newline at end of file diff --git a/libs/pyodide-sandbox-js/main_test.ts b/libs/pyodide-sandbox-js/main_test.ts index 04d3fd3..4a53cb8 100644 --- a/libs/pyodide-sandbox-js/main_test.ts +++ b/libs/pyodide-sandbox-js/main_test.ts @@ -1,5 +1,5 @@ import { assertEquals, assertNotEquals, assertExists } from "@std/assert"; -import { runPython, type FileSystemOperation } from "./main.ts"; +import { runPython, resolvePathInSandbox, type FileSystemOperation } from "./main.ts"; Deno.test("runPython simple test", async () => { const result = await runPython("x = 2 + 3; x", {}); @@ -26,147 +26,254 @@ Deno.test("runPython with error - syntax error", async () => { const result = await runPython("x = 5; y = x +", {}); assertEquals(result.success, false); assertNotEquals(result.error?.length, 0); - // Check that error contains SyntaxError assertEquals(result.error?.includes("SyntaxError"), true); }); -Deno.test("runPython with error - name error", async () => { - const result = await runPython("undefined_variable", {}); - assertEquals(result.success, false); - assertExists(result.error); - // Check that error contains NameError - assertEquals(result.error?.includes("NameError"), true); +Deno.test("resolvePathInSandbox - basic resolution", () => { + assertEquals(resolvePathInSandbox("config.json"), "/sandbox/config.json"); + assertEquals(resolvePathInSandbox("./logs/app.log"), "/sandbox/logs/app.log"); + assertEquals(resolvePathInSandbox("../shared/data.txt"), "/sandbox/../shared/data.txt"); + assertEquals(resolvePathInSandbox("/tmp/absolute.txt"), "/tmp/absolute.txt"); }); -Deno.test("filesystem - write and read text file", async () => { +// REMOVIDO: teste "resolvePathInSandbox - with working directory" pois working directory foi removido + +Deno.test("FileSystem - basic operations", async () => { const operations: FileSystemOperation[] = [ { operation: "write", - path: "/sandbox/test.txt", - content: "Hello, World!", - } - ]; - - const result = await runPython(` -with open("/sandbox/test.txt", "r") as f: - content = f.read() -content - `, { - fileSystemOperations: operations - }); - - assertEquals(result.success, true); - assertEquals(JSON.parse(result.jsonResult || "null"), "Hello, World!"); -}); - -Deno.test("filesystem - directory operations", async () => { - const operations: FileSystemOperation[] = [ + path: "config.json", + content: '{"app": "test", "version": "1.0"}', + }, { operation: "mkdir", - path: "/sandbox/testdir", + path: "data", }, { operation: "write", - path: "/sandbox/testdir/file.txt", - content: "File in directory", + path: "data/output.txt", + content: "Hello World\nLine 2", } ]; - + const result = await runPython(` import os -dir_exists = os.path.isdir("/sandbox/testdir") -file_path = "/sandbox/testdir/file.txt" -file_exists = os.path.exists(file_path) -content = open(file_path).read() if file_exists else "" -{"dir_exists": dir_exists, "file_exists": file_exists, "content": content} +import json + +# Read config file +with open("config.json", "r") as f: + config = json.load(f) + +# Read data file +with open("data/output.txt", "r") as f: + content = f.read() + +# List files +root_files = os.listdir(".") +data_files = os.listdir("data") + +result = { + "config": config, + "content": content.strip(), + "root_files": sorted(root_files), + "data_files": sorted(data_files), + "working_dir": os.getcwd() +} + +result `, { fileSystemOperations: operations }); - + assertEquals(result.success, true); const resultObj = JSON.parse(result.jsonResult || "null"); - assertEquals(resultObj.dir_exists, true); - assertEquals(resultObj.file_exists, true); - assertEquals(resultObj.content, "File in directory"); + + assertEquals(resultObj.config.app, "test"); + assertEquals(resultObj.content, "Hello World\nLine 2"); + assertEquals(resultObj.root_files, ["config.json", "data"]); + assertEquals(resultObj.data_files, ["output.txt"]); + assertEquals(resultObj.working_dir, "/sandbox"); }); -Deno.test("filesystem - list directory contents", async () => { +// REMOVIDO: teste "FileSystem - working directory" pois working directory foi removido + +Deno.test("FileSystem - complex workflow", async () => { const operations: FileSystemOperation[] = [ { operation: "mkdir", - path: "/sandbox/listdir", + path: "workspace", }, { operation: "write", - path: "/sandbox/listdir/file1.txt", - content: "File 1", + path: "workspace/input.txt", + content: "oldvalue=100\nother line", }, { operation: "write", - path: "/sandbox/listdir/file2.txt", - content: "File 2", + path: "workspace/config.ini", + content: "[database]\nhost=localhost\nport=5432", } ]; - + const result = await runPython(` import os -files = os.listdir("/sandbox/listdir") -sorted(files) +import configparser + +# Modify input file +with open("workspace/input.txt", "r") as f: + content = f.read() + +modified_content = content.replace("oldvalue=100", "newvalue=200") + +with open("workspace/input.txt", "w") as f: + f.write(modified_content) + +# Read config +config = configparser.ConfigParser() +config.read("workspace/config.ini") + +# Create report +with open("workspace/report.txt", "w") as f: + f.write(f"Host: {config['database']['host']}\\n") + f.write("Modification successful\\n") + +workspace_files = os.listdir("workspace") + +result = { + "modification_success": "newvalue=200" in modified_content, + "db_host": config['database']['host'], + "workspace_files": sorted(workspace_files), + "working_dir": os.getcwd() +} + +result `, { fileSystemOperations: operations }); - + assertEquals(result.success, true); - assertEquals(JSON.parse(result.jsonResult || "null"), ["file1.txt", "file2.txt"]); + const resultObj = JSON.parse(result.jsonResult || "null"); + + assertEquals(resultObj.modification_success, true); + assertEquals(resultObj.db_host, "localhost"); + assertEquals(resultObj.workspace_files, ["config.ini", "input.txt", "report.txt"]); + assertEquals(resultObj.working_dir, "/sandbox"); }); -Deno.test("filesystem - custom mount point", async () => { +Deno.test("FileSystem - binary operations", async () => { const operations: FileSystemOperation[] = [ { operation: "write", - path: "/customdir/test.txt", - content: "Custom mount point", + path: "test.bin", + content: "QmluYXJ5IGRhdGE=", // Base64 for "Binary data" + encoding: "binary" } ]; - + const result = await runPython(` import os -path = "/customdir/test.txt" -exists = os.path.exists(path) -content = open(path).read() if exists else "" -{"exists": exists, "content": content} +import base64 + +# Read binary file +with open("test.bin", "rb") as f: + binary_content = f.read() + +# Decode content +try: + decoded = binary_content.decode('utf-8') +except UnicodeDecodeError: + decoded = base64.b64decode(binary_content).decode('utf-8') + +result = { + "file_exists": os.path.exists("test.bin"), + "decoded_content": decoded, + "is_binary_match": decoded == "Binary data", + "working_dir": os.getcwd() +} + +result `, { - fileSystemOptions: { mountPoint: "/customdir" }, fileSystemOperations: operations }); - + assertEquals(result.success, true); const resultObj = JSON.parse(result.jsonResult || "null"); - assertEquals(resultObj.exists, true); - assertEquals(resultObj.content, "Custom mount point"); + assertEquals(resultObj.file_exists, true); + assertEquals(resultObj.decoded_content, "Binary data"); + assertEquals(resultObj.is_binary_match, true); + assertEquals(resultObj.working_dir, "/sandbox"); }); -Deno.test("filesystem - binary file operations with explicit encoding", async () => { - // Create binary data as base64 string - const binaryContent = "QmluYXJ5IGRhdGE="; // base64 for "Binary data" - +// NOVO: Teste adicional para verificar o memfs funcionando com diferentes estruturas de diretórios +Deno.test("FileSystem - memfs directory structure", async () => { const operations: FileSystemOperation[] = [ + { + operation: "mkdir", + path: "project", + }, + { + operation: "mkdir", + path: "project/src", + }, { operation: "write", - path: "/sandbox/explicit.bin", - content: binaryContent, - encoding: "binary" // Explicitly set binary encoding + path: "project/src/main.py", + content: "print('Hello from memfs!')", + }, + { + operation: "write", + path: "project/README.md", + content: "# My Project\nRunning in memfs", } ]; - + const result = await runPython(` -with open("/sandbox/explicit.bin", "rb") as f: - content = f.read() -content.decode('utf-8') # Should be "Binary data" +import os + +# Navigate and check structure +project_exists = os.path.exists("project") +src_exists = os.path.exists("project/src") +main_py_exists = os.path.exists("project/src/main.py") +readme_exists = os.path.exists("project/README.md") + +# Read files +with open("project/src/main.py", "r") as f: + main_content = f.read() + +with open("project/README.md", "r") as f: + readme_content = f.read() + +# List structure +project_files = sorted(os.listdir("project")) +src_files = sorted(os.listdir("project/src")) + +result = { + "project_exists": project_exists, + "src_exists": src_exists, + "main_py_exists": main_py_exists, + "readme_exists": readme_exists, + "main_content": main_content.strip(), + "readme_content": readme_content.strip(), + "project_files": project_files, + "src_files": src_files, + "working_dir": os.getcwd() +} + +result `, { fileSystemOperations: operations }); - + assertEquals(result.success, true); - assertEquals(JSON.parse(result.jsonResult || "null"), "Binary data"); -}); + const resultObj = JSON.parse(result.jsonResult || "null"); + + assertEquals(resultObj.project_exists, true); + assertEquals(resultObj.src_exists, true); + assertEquals(resultObj.main_py_exists, true); + assertEquals(resultObj.readme_exists, true); + assertEquals(resultObj.main_content, "print('Hello from memfs!')"); + assertEquals(resultObj.readme_content, "# My Project\nRunning in memfs"); + assertEquals(resultObj.project_files, ["README.md", "src"]); + assertEquals(resultObj.src_files, ["main.py"]); + assertEquals(resultObj.working_dir, "/sandbox"); +}); \ No newline at end of file diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index aafdcb2..3103982 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -35,10 +35,23 @@ class CodeExecutionResult: execution_time: float session_metadata: dict | None = None session_bytes: bytes | None = None + filesystem_info: dict | None = None + filesystem_operations: list[dict] | None = None + + +@dataclasses.dataclass(kw_only=True) +class FileSystemOperation: + """Container for filesystem operations.""" + + operation: Literal["read", "write", "list", "mkdir", "exists", "remove", "copy"] + path: str + content: str | None = None + encoding: str | None = None + destination: str | None = None # Published package name -PKG_NAME = "jsr:@langchain/pyodide-sandbox@0.0.4" +PKG_NAME = "../pyodide-sandbox-js/main.ts" def build_permission_flag( @@ -46,18 +59,7 @@ def build_permission_flag( *, value: bool | list[str], ) -> str | None: - """Build a permission flag string based on the provided setting. - - Args: - flag: The base permission flag (e.g., "--allow-read"). - value: Either a boolean (True for unrestricted access, False for no access) - or a list of allowed items. - default_values: Optional default items that should always be included. - - Returns: - A string with the permission flag and items, or None if no permission should - be added. - """ + """Build a permission flag string based on the provided setting.""" if value is True: return flag if isinstance(value, list) and value: @@ -66,28 +68,7 @@ def build_permission_flag( class BasePyodideSandbox: - """Base class for PyodideSandbox implementations. - - This class provides the common initialization and configuration logic for both - synchronous and asynchronous PyodideSandbox implementations. - - The sandbox leverages Deno's security model to create a secure runtime for - executing untrusted Python code. It works by spawning a Deno subprocess that loads - Pyodide (Python compiled to WebAssembly) and executes the provided code in an - isolated environment. - - Security features: - - Configurable permissions for file system, network, and environment access - - Support for execution timeouts to prevent infinite loops - - Memory usage monitoring - - Process isolation via Deno's security sandbox - - The sandbox supports fine-grained permission control through its initializer: - - Restrict network access to specific domains - - Limit file system access to specific directories - - Control environment variable access - - Prevent subprocess execution and FFI - """ + """Base class for PyodideSandbox implementations.""" def __init__( self, @@ -101,70 +82,13 @@ def __init__( allow_ffi: list[str] | bool = False, node_modules_dir: str = "auto", skip_deno_check: bool = False, + enable_filesystem: bool = False, # Novo: controle explícito do filesystem ) -> None: - """Initialize the sandbox with specific Deno permissions. - - This method configures the security permissions for the Deno subprocess that - will execute Python code via Pyodide. By default, all permissions are - disabled (False) for maximum security. Permissions can be enabled selectively - based on the needs of the code being executed. - - Args: - stateful: Whether to use a stateful session. If True, `sandbox.execute` - will include session metadata and the session bytes containing the - session state (variables, imports, etc.) in the execution result. - This allows saving and reusing the session state between executions. - - allow_env: Environment variable access configuration: - - False: No environment access (default, most secure) - - True: Unrestricted access to all environment variables - - List[str]: Access restricted to specific environment variables, e.g. - ["PATH", "PYTHONPATH"] - - allow_read: File system read access configuration: - - False: No file system read access (default, most secure) - - True: Unrestricted read access to the file system - - List[str]: Read access restricted to specific paths, e.g. - ["/tmp/sandbox", "./data"] - - By default allows read from node_modules - - allow_write: File system write access configuration: - - False: No file system write access (default, most secure) - - True: Unrestricted write access to the file system - - List[str]: Write access restricted to specific paths, e.g. - ["/tmp/sandbox/output"] - - By default allows write to node_modules - - allow_net: Network access configuration: - - False: No network access (default, most secure) - - True: Unrestricted network access - - List[str]: Network access restricted to specific domains/IPs, e.g. - ["api.example.com", "data.example.org:8080"] - - allow_run: Subprocess execution configuration: - - False: No subprocess execution allowed (default, most secure) - - True: Unrestricted subprocess execution - - List[str]: Subprocess execution restricted to specific commands, e.g. - ["python", "git"] - - allow_ffi: Foreign Function Interface access configuration: - - False: No FFI access (default, most secure) - - True: Unrestricted FFI access - - List[str]: FFI access restricted to specific libraries, e.g. - ["/usr/lib/libm.so"] - - node_modules_dir: Directory for Node.js modules. Set to "auto" to use - the default directory for Deno modules. - skip_deno_check: If True, skip the check for Deno installation. - """ + """Initialize the sandbox with specific Deno permissions.""" self.stateful = stateful - # Configure permissions - self.permissions = [] - - self.file_operations = [] - + self.enable_filesystem = enable_filesystem + self._filesystem_operations: list[FileSystemOperation] = [] + if not skip_deno_check: # Check if Deno is installed try: @@ -176,12 +100,9 @@ def __init__( msg = "Deno is not installed or not in PATH." raise RuntimeError(msg) from e - # Define permission configurations: - # each tuple contains (flag, setting, defaults) + # Define permission configurations perm_defs = [ ("--allow-env", allow_env, None), - # For file system permissions, if no permission is specified, - # force node_modules ("--allow-read", allow_read, ["node_modules"]), ("--allow-write", allow_write, ["node_modules"]), ("--allow-net", allow_net, None), @@ -200,124 +121,203 @@ def __init__( self.permissions.append(f"--node-modules-dir={node_modules_dir}") - def attach_file(self, path: str, content: str | bytes) -> None: + def attach_file( + self, + path: str, + content: str, + *, + encoding: str = "utf-8", + ) -> None: """Attach a file to the sandbox filesystem. - - The file will be created in the sandbox's memfs filesystem and will be - available to the Python code when executed. Binary content is automatically - detected based on content type. - + Args: - path: Path in the sandbox filesystem where the file should be created. - If not starting with '/sandbox/', it will be prefixed automatically. - content: The content of the file, either as a string or bytes. - If bytes are provided, it will be treated as binary data. + path: Path where the file should be created (relative to /sandbox) + content: Content of the file as a string + encoding: Text encoding for the file (default: utf-8) """ - binary = isinstance(content, bytes) - - if not path.startswith("/sandbox/"): - path = f"/sandbox/{path}" - - encoding = "binary" if binary else "utf-8" - - if binary: - content = base64.b64encode(content).decode("ascii") - - self.file_operations.append({ - "operation": "write", - "path": path, - "content": content, - "encoding": encoding - }) + # Auto-enable filesystem when files are attached + self.enable_filesystem = True + + operation = FileSystemOperation( + operation="write", + path=path, + content=content, + encoding=encoding, + ) + self._filesystem_operations.append(operation) + logger.debug(f"Attached file: {path} ({len(content)} chars)") - def attach_files( - self, files: dict[str, str | bytes | dict[str, str | bool]] + def attach_binary_file( + self, + path: str, + content: bytes, ) -> None: - """Attach multiple files to the sandbox filesystem. - + """Attach a binary file to the sandbox filesystem. + Args: - files: Dictionary mapping paths to file contents. - Each value can be: - - a string (treated as text content) - - bytes (treated as binary content) - - a dictionary with 'content' key (and optional 'binary' key - if explicit format control is needed) + path: Path where the file should be created (relative to /sandbox) + content: Binary content of the file """ - for path, content_info in files.items(): - if isinstance(content_info, (str, bytes)): - self.attach_file(path, content_info) - elif isinstance(content_info, dict): - content = content_info.get("content", "") + # Auto-enable filesystem when files are attached + self.enable_filesystem = True + + b64_content = base64.b64encode(content).decode("ascii") + operation = FileSystemOperation( + operation="write", + path=path, + content=b64_content, + encoding="binary", + ) + self._filesystem_operations.append(operation) + logger.debug(f"Attached binary file: {path} ({len(content)} bytes)") - if "binary" in content_info: - binary_flag = content_info.get("binary", False) - if isinstance(content, str) and binary_flag: - # Convert string to bytes when binary flag is True - content = content.encode("utf-8") + def create_directory(self, path: str) -> None: + """Create a directory in the sandbox filesystem. + + Args: + path: Directory path to create (relative to /sandbox) + """ + # Auto-enable filesystem when directories are created + self.enable_filesystem = True + + operation = FileSystemOperation( + operation="mkdir", + path=path, + ) + self._filesystem_operations.append(operation) + logger.debug(f"Created directory: {path}") - self.attach_file(path, content) + def read_file(self, path: str, *, encoding: str = "utf-8") -> None: + """Queue a file read operation. + + Args: + path: Path to read from (relative to /sandbox) + encoding: Text encoding for the file (default: utf-8) + + Note: This queues the operation but doesn't return content immediately. + Use this when you need to read files during code execution. + """ + self.enable_filesystem = True + + operation = FileSystemOperation( + operation="read", + path=path, + encoding=encoding, + ) + self._filesystem_operations.append(operation) + logger.debug(f"Queued read operation: {path}") - def _build_command( - self, - code: str, - *, - session_bytes: bytes | None = None, - session_metadata: dict | None = None, - memory_limit_mb: int | None = None, - ) -> list[str]: - """Build the Deno command with all necessary arguments. + def list_directory(self, path: str = ".") -> None: + """Queue a directory listing operation. + + Args: + path: Directory path to list (relative to /sandbox, default: current) + """ + self.enable_filesystem = True + + operation = FileSystemOperation( + operation="list", + path=path, + ) + self._filesystem_operations.append(operation) + logger.debug(f"Queued list operation: {path}") + def remove_path(self, path: str) -> None: + """Queue a file or directory removal operation. + Args: - code: The Python code to execute - session_bytes: Optional session state bytes - session_metadata: Optional session metadata - memory_limit_mb: Optional memory limit in MB + path: Path to remove (relative to /sandbox) + """ + self.enable_filesystem = True + + operation = FileSystemOperation( + operation="remove", + path=path, + ) + self._filesystem_operations.append(operation) + logger.debug(f"Queued remove operation: {path}") - Returns: - List of command arguments for subprocess execution + def copy_path(self, source: str, destination: str) -> None: + """Queue a file or directory copy operation. + + Args: + source: Source path (relative to /sandbox) + destination: Destination path (relative to /sandbox) """ - cmd = [ - "deno", - "run", - ] + self.enable_filesystem = True + + operation = FileSystemOperation( + operation="copy", + path=source, + destination=destination, + ) + self._filesystem_operations.append(operation) + logger.debug(f"Queued copy operation: {source} -> {destination}") + def clear_filesystem_operations(self) -> None: + """Clear all queued filesystem operations.""" + self._filesystem_operations.clear() + logger.debug("Cleared filesystem operations") + + def _build_command(self, code: str, **kwargs) -> list[str]: + cmd = ["deno", "run"] + # Apply permissions cmd.extend(self.permissions) + + # Memory limit + if kwargs.get('memory_limit_mb'): + cmd.append(f"--v8-flags=--max-old-space-size={kwargs['memory_limit_mb']}") - # Deno uses the V8 flag --max-old-space-size to limit memory usage in MB - if memory_limit_mb is not None and memory_limit_mb > 0: - cmd.append(f"--v8-flags=--max-old-space-size={memory_limit_mb}") - - # Add the path to the JavaScript wrapper script cmd.append(PKG_NAME) - - # Add script path and code cmd.extend(["-c", code]) + # Stateful if self.stateful: cmd.extend(["-s"]) - if session_bytes: - # Convert bytes to list of integers and then to JSON string - bytes_array = list(session_bytes) + # Session data + if kwargs.get('session_bytes'): + bytes_array = list(kwargs['session_bytes']) cmd.extend(["-b", json.dumps(bytes_array)]) - if session_metadata: - cmd.extend(["-m", json.dumps(session_metadata)]) - - # Add filesystem operations if there are any - if self.file_operations: - cmd.extend(["--fs-operations", json.dumps(self.file_operations)]) + if kwargs.get('session_metadata'): + cmd.extend(["-m", json.dumps(kwargs['session_metadata'])]) + + # FILESYSTEM: Ativado se há operações ou foi explicitamente habilitado + if self._filesystem_operations or self.enable_filesystem: + # Construir operações filesystem se existem + if self._filesystem_operations: + fs_ops = [] + for op in self._filesystem_operations: + op_dict = { + "operation": op.operation, + "path": op.path, + } + if op.content is not None: + op_dict["content"] = op.content + if op.encoding is not None: + op_dict["encoding"] = op.encoding + if op.destination is not None: + op_dict["destination"] = op.destination + fs_ops.append(op_dict) + + cmd.extend(["-fs", json.dumps(fs_ops)]) + + logger.debug(f"Filesystem enabled with {len(fs_ops)} operations") + if len(fs_ops) <= 5: # Log detalhes se poucas operações + for i, op in enumerate(fs_ops): + logger.debug(f" Op {i+1}: {op['operation']} {op['path']}") + else: + # Filesystem habilitado mas sem operações iniciais + cmd.extend(["-fs", "[]"]) + logger.debug("Filesystem enabled with no initial operations") return cmd class PyodideSandbox(BasePyodideSandbox): - """Asynchronous implementation of PyodideSandbox. - - This class provides an asynchronous interface for executing Python code in a - sandboxed Deno environment using Pyodide. - """ + """Asynchronous implementation of PyodideSandbox.""" async def execute( self, @@ -327,26 +327,8 @@ async def execute( session_metadata: dict | None = None, timeout_seconds: float | None = None, memory_limit_mb: int | None = None, - clear_files: bool = False, ) -> CodeExecutionResult: - """Execute Python code asynchronously in a sandboxed Deno subprocess. - - This method spawns a Deno subprocess that loads Pyodide (Python compiled - to WebAssembly) and executes the provided code within that sandboxed - environment. The execution is subject to the permissions configured in the - sandbox's initialization and the resource constraints provided as arguments. - - Args: - code: The Python code to execute in the sandbox - session_bytes: Optional bytes containing session state - session_metadata: Optional metadata for session state - timeout_seconds: Maximum execution time in seconds - memory_limit_mb: Maximum memory usage in MB - clear_files: If True, clear the attached files after execution - - Returns: - CodeExecutionResult containing execution results and metadata - """ + """Execute Python code asynchronously in a sandboxed Deno subprocess.""" start_time = time.time() stdout = "" stderr = "" @@ -354,58 +336,73 @@ async def execute( status: Literal["success", "error"] = "success" cmd = self._build_command( - code, - session_bytes=session_bytes, - session_metadata=session_metadata, - memory_limit_mb=memory_limit_mb, - ) + code, + session_bytes=session_bytes, + session_metadata=session_metadata, + memory_limit_mb=memory_limit_mb, + ) - try: + # Debug logging + logger.debug(f"Executing command: {' '.join(cmd[:8])}{'...' if len(cmd) > 8 else ''}") - # Create and run the subprocess - process = await asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, + # Create and run the subprocess + process = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + try: + # Wait for process with a timeout + stdout_bytes, stderr_bytes = await asyncio.wait_for( + process.communicate(), + timeout=timeout_seconds, ) + stdout = stdout_bytes.decode("utf-8", errors="replace") - try: - # Wait for process with a timeout - stdout_bytes, stderr_bytes = await asyncio.wait_for( - process.communicate(), - timeout=timeout_seconds, + if stdout: + # Extract JSON from output that may contain loading messages + full_result = json.loads(stdout) + stdout = full_result.get("stdout", None) + stderr = full_result.get("stderr", None) + result = full_result.get("result", None) + status = "success" if full_result.get("success", False) else "error" + session_metadata = full_result.get("sessionMetadata", None) + filesystem_info = full_result.get("fileSystemInfo", None) + filesystem_operations = full_result.get("fileSystemOperations", None) + + # Convert the Uint8Array to Python bytes + session_bytes_array = full_result.get("sessionBytes", None) + session_bytes = ( + bytes(session_bytes_array) if session_bytes_array else None ) - stdout = stdout_bytes.decode("utf-8", errors="replace") - - if stdout: - # stdout encodes the full result from the sandbox. - # including stdout, stderr, and the json result. - full_result = json.loads(stdout) - stdout = full_result.get("stdout", None) - stderr = full_result.get("stderr", None) - result = full_result.get("result", None) - status = "success" if full_result.get("success", False) else "error" - session_metadata = full_result.get("sessionMetadata", None) - # Convert the Uint8Array to Python bytes - session_bytes_array = full_result.get("sessionBytes", None) - session_bytes = ( - bytes(session_bytes_array) if session_bytes_array else None - ) - else: - stderr = stderr_bytes.decode("utf-8", errors="replace") - status = "error" - except asyncio.TimeoutError: - process.kill() - await process.wait() + + # Log filesystem info if available + if filesystem_info: + logger.debug(f"Filesystem: {filesystem_info['type']} at {filesystem_info['mountPoint']}") + if filesystem_operations: + logger.debug(f"Filesystem operations completed: {len(filesystem_operations)}") + else: + stderr = stderr_bytes.decode("utf-8", errors="replace") status = "error" - stderr = f"Execution timed out after {timeout_seconds} seconds" - except asyncio.CancelledError: - # Optionally: log cancellation if needed - pass - finally: - if clear_files: - self.file_operations = [] - + filesystem_info = None + filesystem_operations = None + except asyncio.TimeoutError: + process.kill() + await process.wait() + status = "error" + stderr = f"Execution timed out after {timeout_seconds} seconds" + filesystem_info = None + filesystem_operations = None + except json.JSONDecodeError as e: + status = "error" + stderr = f"Failed to parse output as JSON: {e}\nRaw output: {stdout}" + filesystem_info = None + filesystem_operations = None + except asyncio.CancelledError: + # Optionally: log cancellation if needed + pass + end_time = time.time() return CodeExecutionResult( @@ -416,14 +413,13 @@ async def execute( result=result, session_metadata=session_metadata, session_bytes=session_bytes, + filesystem_info=filesystem_info, + filesystem_operations=filesystem_operations, ) class SyncPyodideSandbox(BasePyodideSandbox): - """Synchronous version of PyodideSandbox. - - This class provides a synchronous interface to the PyodideSandbox functionality. - """ + """Synchronous version of PyodideSandbox.""" def execute( self, @@ -433,42 +429,26 @@ def execute( session_metadata: dict | None = None, timeout_seconds: float | None = None, memory_limit_mb: int | None = None, - clear_files: bool = False, ) -> CodeExecutionResult: - """Execute Python code synchronously in a sandboxed Deno subprocess. - - This method provides the same functionality as PyodideSandbox.execute() but - in a synchronous/blocking manner. - - Args: - code: The Python code to execute in the sandbox - session_bytes: Optional bytes containing session state - session_metadata: Optional metadata for session state - timeout_seconds: Maximum execution time in seconds - memory_limit_mb: Maximum memory usage in MB - clear_files: If True, clear the attached files after execution - - Returns: - CodeExecutionResult containing execution results and metadata - """ + """Execute Python code synchronously in a sandboxed Deno subprocess.""" start_time = time.time() stdout = "" result = None stderr: str status: Literal["success", "error"] - try: - cmd = self._build_command( - code, - session_bytes=session_bytes, - session_metadata=session_metadata, - memory_limit_mb=memory_limit_mb, - ) + cmd = self._build_command( + code, + session_bytes=session_bytes, + session_metadata=session_metadata, + memory_limit_mb=memory_limit_mb, + ) + + # Debug logging + logger.debug(f"Executing command: {' '.join(cmd[:8])}{'...' if len(cmd) > 8 else ''}") + try: # Run the subprocess with timeout - # Ignoring S603 for subprocess.run as the cmd is built safely. - # Untrusted input comes from `code` parameter, which should be - # escaped properly as we are **not** using shell=True. process = subprocess.run( # noqa: S603 cmd, capture_output=True, @@ -483,29 +463,43 @@ def execute( stdout = stdout_bytes.decode("utf-8", errors="replace") if stdout: - # stdout encodes the full result from the sandbox - # including stdout, stderr, and the json result + # Extract JSON from output that may contain loading messages full_result = json.loads(stdout) stdout = full_result.get("stdout", None) stderr = full_result.get("stderr", None) result = full_result.get("result", None) status = "success" if full_result.get("success", False) else "error" session_metadata = full_result.get("sessionMetadata", None) + filesystem_info = full_result.get("fileSystemInfo", None) + filesystem_operations = full_result.get("fileSystemOperations", None) + # Convert the Uint8Array to Python bytes session_bytes_array = full_result.get("sessionBytes", None) session_bytes = ( bytes(session_bytes_array) if session_bytes_array else None ) + + # Log filesystem info if available + if filesystem_info: + logger.debug(f"Filesystem: {filesystem_info['type']} at {filesystem_info['mountPoint']}") + if filesystem_operations: + logger.debug(f"Filesystem operations completed: {len(filesystem_operations)}") else: stderr = stderr_bytes.decode("utf-8", errors="replace") status = "error" + filesystem_info = None + filesystem_operations = None except subprocess.TimeoutExpired: status = "error" stderr = f"Execution timed out after {timeout_seconds} seconds" - finally: - if clear_files: - self.file_operations = [] + filesystem_info = None + filesystem_operations = None + except json.JSONDecodeError as e: + status = "error" + stderr = f"Failed to parse output as JSON: {e}\nRaw output: {stdout}" + filesystem_info = None + filesystem_operations = None end_time = time.time() @@ -517,80 +511,23 @@ def execute( result=result, session_metadata=session_metadata, session_bytes=session_bytes, + filesystem_info=filesystem_info, + filesystem_operations=filesystem_operations, ) class PyodideSandboxTool(BaseTool): - """Tool for running python code in a PyodideSandbox. - - If you use a stateful sandbox (PyodideSandboxTool(stateful=True)), - the state between code executions (to variables, imports, - and definitions, etc.), will be persisted using LangGraph checkpointer. - - !!! important - When you use a stateful sandbox, this tool can only be used - inside a LangGraph graph with a checkpointer, and - has to be used with the prebuilt `create_react_agent` or `ToolNode`. - - Example: stateless sandbox usage - - ```python - from langgraph.prebuilt import create_react_agent - from langchain_sandbox import PyodideSandboxTool - - tool = PyodideSandboxTool(allow_net=True) - agent = create_react_agent( - "anthropic:claude-3-7-sonnet-latest", - tools=[tool], - ) - result = await agent.ainvoke( - {"messages": [{"role": "user", "content": "what's 5 + 7?"}]}, - ) - ``` - - Example: stateful sandbox usage - - ```python - from langgraph.prebuilt import create_react_agent - from langgraph.prebuilt.chat_agent_executor import AgentState - from langgraph.checkpoint.memory import InMemorySaver - from langchain_sandbox import PyodideSandboxTool, PyodideSandbox - - class State(AgentState): - session_bytes: bytes - session_metadata: dict - - tool = PyodideSandboxTool(stateful=True, allow_net=True) - agent = create_react_agent( - "anthropic:claude-3-7-sonnet-latest", - tools=[tool], - checkpointer=InMemorySaver(), - state_schema=State - ) - result = await agent.ainvoke( - { - "messages": [ - {"role": "user", "content": "what's 5 + 7? save result as 'a'"} - ], - "session_bytes": None, - "session_metadata": None - }, - config={"configurable": {"thread_id": "123"}}, - ) - second_result = await agent.ainvoke( - {"messages": [{"role": "user", "content": "what's the sine of 'a'?"}]}, - config={"configurable": {"thread_id": "123"}}, - ) - ``` - """ + """Tool for running python code in a PyodideSandbox.""" name: str = "python_code_sandbox" description: str = ( - "A secure Python code sandbox. Use this to execute python commands.\n" + "A secure Python code sandbox with filesystem support. Use this to execute python commands.\n" "- Input should be a valid python command.\n" "- To return output, you should print it out with `print(...)`.\n" "- Don't use f-strings when printing outputs.\n" - "- If you need to make web requests, use `httpx.AsyncClient`." + "- If you need to make web requests, use `httpx.AsyncClient`.\n" + "- Files can be read/written using standard Python file operations.\n" + "- All file operations work within a sandboxed memory filesystem." ) # Mirror the PyodideSandbox constructor arguments @@ -604,6 +541,7 @@ class State(AgentState): timeout_seconds: float | None """Timeout for code execution in seconds. By default set to 60 seconds.""" node_modules_dir: str = "auto" + enable_filesystem: bool = False # NOVO: controle do filesystem _sandbox: PyodideSandbox _sync_sandbox: SyncPyodideSandbox @@ -614,28 +552,24 @@ def __init__( stateful: bool = False, timeout_seconds: float | None = 60, allow_net: list[str] | bool = False, + enable_filesystem: bool = False, # NOVO: habilitar filesystem explicitamente **kwargs: dict[str, Any], ) -> None: """Initialize the tool. Args: - stateful: Whether to use a stateful sandbox. If True, `sandbox.execute` - will include session metadata and the session bytes containing the - session state (variables, imports, etc.) in the execution result. - This allows saving and reusing the session state between executions. + stateful: Whether to use a stateful sandbox. timeout_seconds: Timeout for code execution in seconds. - allow_net: configure network access. If setting to True, any network access - is allowed, including potentially internal network addresses that you - may not want to expose to a malicious actor. - Depending on your use case, you can restrict the network access to - only the URLs you need (e.g., required to set up micropip / pyodide). - Please refer to pyodide documentation for more details. + allow_net: configure network access. + enable_filesystem: Enable filesystem operations in the sandbox. + This is automatically enabled when files are attached. **kwargs: Other attributes will be passed to the PyodideSandbox """ super().__init__( stateful=stateful, timeout_seconds=timeout_seconds, allow_net=allow_net, + enable_filesystem=enable_filesystem, **kwargs, ) @@ -675,6 +609,7 @@ class PyodideSandboxToolInput(BaseModel): allow_run=self.allow_run, allow_ffi=self.allow_ffi, node_modules_dir=self.node_modules_dir, + enable_filesystem=self.enable_filesystem, # NOVO ) # Initialize sync sandbox with deno check skipped since async sandbox already # checked @@ -687,41 +622,59 @@ class PyodideSandboxToolInput(BaseModel): allow_run=self.allow_run, allow_ffi=self.allow_ffi, node_modules_dir=self.node_modules_dir, + enable_filesystem=self.enable_filesystem, # NOVO skip_deno_check=True, # Skip deno check since async sandbox already checked ) - def attach_file(self, path: str, content: str | bytes) -> None: - """Attach a file to the sandbox filesystem. - - This method delegates to both the async and sync sandboxes to ensure consistency - Binary content is automatically detected based on the content type. - - Args: - path: Path in the sandbox filesystem where the file should be created. - If not starting with '/sandbox/', it will be prefixed automatically. - content: The content of the file, either as a string or bytes. - If bytes are provided, it will be treated as binary data. - """ - self._sandbox.attach_file(path, content) - self._sync_sandbox.attach_file(path, content) - - def attach_files( - self, files: dict[str, str | bytes | dict[str, str | bool]] - ) -> None: - """Attach multiple files to the sandbox filesystem. - - This method delegates to both the async and sync sandboxes to ensure consistency + def attach_file( + self, + path: str, + content: str, + *, + encoding: str = "utf-8", + ) -> None: + """Attach a file to the sandbox environment.""" + self._sandbox.attach_file(path, content, encoding=encoding) + self._sync_sandbox.attach_file(path, content, encoding=encoding) - Args: - files: Dictionary mapping paths to file contents. - Each value can be: - - a string (treated as text content) - - bytes (treated as binary content) - - a dictionary with 'content' key (and optional 'binary' key - if explicit format control is needed) - """ - self._sandbox.attach_files(files) - self._sync_sandbox.attach_files(files) + def attach_binary_file( + self, + path: str, + content: bytes, + ) -> None: + """Attach a binary file to the sandbox environment.""" + self._sandbox.attach_binary_file(path, content) + self._sync_sandbox.attach_binary_file(path, content) + + def create_directory(self, path: str) -> None: + """Create a directory in the sandbox environment.""" + self._sandbox.create_directory(path) + self._sync_sandbox.create_directory(path) + + def read_file(self, path: str, *, encoding: str = "utf-8") -> None: + """Queue a file read operation for the next execution.""" + self._sandbox.read_file(path, encoding=encoding) + self._sync_sandbox.read_file(path, encoding=encoding) + + def list_directory(self, path: str = ".") -> None: + """Queue a directory listing operation for the next execution.""" + self._sandbox.list_directory(path) + self._sync_sandbox.list_directory(path) + + def remove_path(self, path: str) -> None: + """Queue a file/directory removal operation for the next execution.""" + self._sandbox.remove_path(path) + self._sync_sandbox.remove_path(path) + + def copy_path(self, source: str, destination: str) -> None: + """Queue a file/directory copy operation for the next execution.""" + self._sandbox.copy_path(source, destination) + self._sync_sandbox.copy_path(source, destination) + + def clear_filesystem_operations(self) -> None: + """Clear all queued filesystem operations.""" + self._sandbox.clear_filesystem_operations() + self._sync_sandbox.clear_filesystem_operations() def _run( self, @@ -755,17 +708,12 @@ def _run( session_metadata=session_metadata, timeout_seconds=self.timeout_seconds, ) - else: - result = self._sync_sandbox.execute( - code, timeout_seconds=self.timeout_seconds - ) - - if result.stderr: - tool_result = f"Error during execution: {result.stderr}" - else: - tool_result = result.stdout + + if result.stderr: + tool_result = f"Error during execution: {result.stderr}" + else: + tool_result = result.stdout - if self.stateful: from langgraph.types import Command # if the tool is used with a stateful sandbox, @@ -782,8 +730,25 @@ def _run( ], } ) + else: + # Para sandbox não stateful + result = self._sync_sandbox.execute( + code, timeout_seconds=self.timeout_seconds + ) - return tool_result + # Tratamento mais robusto de erros + if result.status == "error": + error_msg = result.stderr if result.stderr else "Execution failed with unknown error" + return f"Error during execution: {error_msg}" + + # Se foi sucesso, retornar stdout ou result + if result.stdout: + return result.stdout + + if result.result is not None: + return str(result.result) + + return "" async def _arun( self, @@ -817,17 +782,12 @@ async def _arun( session_metadata=session_metadata, timeout_seconds=self.timeout_seconds, ) - else: - result = await self._sandbox.execute( - code, timeout_seconds=self.timeout_seconds - ) - - if result.stderr: - tool_result = f"Error during execution: {result.stderr}" - else: - tool_result = result.stdout + + if result.stderr: + tool_result = f"Error during execution: {result.stderr}" + else: + tool_result = result.stdout - if self.stateful: from langgraph.types import Command # if the tool is used with a stateful sandbox, @@ -844,5 +804,22 @@ async def _arun( ], } ) + else: + # Para sandbox não stateful + result = await self._sandbox.execute( + code, timeout_seconds=self.timeout_seconds + ) - return tool_result + # Tratamento mais robusto de erros + if result.status == "error": + error_msg = result.stderr if result.stderr else "Execution failed with unknown error" + return f"Error during execution: {error_msg}" + + # Se foi sucesso, retornar stdout ou result + if result.stdout: + return result.stdout + + if result.result is not None: + return str(result.result) + + return "" \ No newline at end of file From 5b7b91deac6be97e18e25c52db833652e9d31c78 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Wed, 28 May 2025 11:37:57 -0300 Subject: [PATCH 07/27] feat: implement dynamic filesystem support with flexible tool architecture --- libs/pyodide-sandbox-js/main.ts | 8 +- libs/sandbox-py/langchain_sandbox/__init__.py | 2 +- libs/sandbox-py/langchain_sandbox/pyodide.py | 628 ++++++++++-------- .../tests/unit_tests/test_pyodide_sandbox.py | 273 +++----- 4 files changed, 436 insertions(+), 475 deletions(-) diff --git a/libs/pyodide-sandbox-js/main.ts b/libs/pyodide-sandbox-js/main.ts index 9b0b5de..d5d7a8d 100644 --- a/libs/pyodide-sandbox-js/main.ts +++ b/libs/pyodide-sandbox-js/main.ts @@ -615,7 +615,7 @@ async function main(): Promise { s: "stateful", b: "session-bytes", m: "session-metadata", - fs: "fs-operations", + x: "fs-operations", }, boolean: ["help", "version", "stateful"], default: { @@ -636,7 +636,7 @@ OPTIONS: -s, --stateful Use a stateful session -b, --session-bytes Session bytes -m, --session-metadata Session metadata - -fs, --fs-operations JSON array of filesystem operations + -x, --fs-operations JSON array of filesystem operations -h, --help Display help -V, --version Display version `); @@ -707,8 +707,8 @@ OPTIONS: // Output result const outputJson: any = { - stdout: result.stdout?.join('') || null, - stderr: result.success ? (result.stderr?.join('') || null) : result.error || null, + stdout: result.stdout?.join('\n') || null, // <-- ADICIONAR '\n' + stderr: result.success ? (result.stderr?.join('\n') || null) : result.error || null, result: result.success ? JSON.parse(result.jsonResult || 'null') : null, success: result.success, sessionBytes: result.sessionBytes, diff --git a/libs/sandbox-py/langchain_sandbox/__init__.py b/libs/sandbox-py/langchain_sandbox/__init__.py index ab8c43a..9265d7e 100644 --- a/libs/sandbox-py/langchain_sandbox/__init__.py +++ b/libs/sandbox-py/langchain_sandbox/__init__.py @@ -9,5 +9,5 @@ __all__ = [ "PyodideSandbox", "PyodideSandboxTool", - "SyncPyodideSandbox", + "SyncPyodideSandbox" ] diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index 3103982..63a76ea 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -15,8 +15,8 @@ ) from langchain_core.messages import ToolMessage from langchain_core.runnables import RunnableConfig -from langchain_core.tools import BaseTool, InjectedToolCallId -from pydantic import BaseModel, Field +from langchain_core.tools import BaseTool, StructuredTool, InjectedToolCallId +from pydantic import BaseModel, Field, PrivateAttr logger = logging.getLogger(__name__) @@ -48,6 +48,22 @@ class FileSystemOperation: content: str | None = None encoding: str | None = None destination: str | None = None + + def to_dict(self) -> dict[str, str]: + """Convert to dict for JSON serialization.""" + result = { + "operation": self.operation, + "path": self.path, + } + + if self.content is not None: + result["content"] = self.content + if self.encoding is not None: + result["encoding"] = self.encoding + if self.destination is not None: + result["destination"] = self.destination + + return result # Published package name @@ -82,7 +98,7 @@ def __init__( allow_ffi: list[str] | bool = False, node_modules_dir: str = "auto", skip_deno_check: bool = False, - enable_filesystem: bool = False, # Novo: controle explícito do filesystem + enable_filesystem: bool = False, ) -> None: """Initialize the sandbox with specific Deno permissions.""" self.stateful = stateful @@ -128,16 +144,12 @@ def attach_file( *, encoding: str = "utf-8", ) -> None: - """Attach a file to the sandbox filesystem. - - Args: - path: Path where the file should be created (relative to /sandbox) - content: Content of the file as a string - encoding: Text encoding for the file (default: utf-8) - """ - # Auto-enable filesystem when files are attached + """Attach a file to the sandbox filesystem.""" self.enable_filesystem = True + if not isinstance(content, str): + raise ValueError("Content must be a string for text files") + operation = FileSystemOperation( operation="write", path=path, @@ -145,22 +157,19 @@ def attach_file( encoding=encoding, ) self._filesystem_operations.append(operation) - logger.debug(f"Attached file: {path} ({len(content)} chars)") + logger.debug(f"Attached file: {path} ({len(content)} chars, encoding: {encoding})") def attach_binary_file( self, path: str, content: bytes, ) -> None: - """Attach a binary file to the sandbox filesystem. - - Args: - path: Path where the file should be created (relative to /sandbox) - content: Binary content of the file - """ - # Auto-enable filesystem when files are attached + """Attach a binary file to the sandbox filesystem.""" self.enable_filesystem = True + if not isinstance(content, bytes): + raise ValueError("Content must be bytes for binary files") + b64_content = base64.b64encode(content).decode("ascii") operation = FileSystemOperation( operation="write", @@ -169,15 +178,10 @@ def attach_binary_file( encoding="binary", ) self._filesystem_operations.append(operation) - logger.debug(f"Attached binary file: {path} ({len(content)} bytes)") + logger.debug(f"Attached binary file: {path} ({len(content)} bytes -> {len(b64_content)} b64 chars)") def create_directory(self, path: str) -> None: - """Create a directory in the sandbox filesystem. - - Args: - path: Directory path to create (relative to /sandbox) - """ - # Auto-enable filesystem when directories are created + """Create a directory in the sandbox filesystem.""" self.enable_filesystem = True operation = FileSystemOperation( @@ -187,72 +191,13 @@ def create_directory(self, path: str) -> None: self._filesystem_operations.append(operation) logger.debug(f"Created directory: {path}") - def read_file(self, path: str, *, encoding: str = "utf-8") -> None: - """Queue a file read operation. - - Args: - path: Path to read from (relative to /sandbox) - encoding: Text encoding for the file (default: utf-8) - - Note: This queues the operation but doesn't return content immediately. - Use this when you need to read files during code execution. - """ - self.enable_filesystem = True - - operation = FileSystemOperation( - operation="read", - path=path, - encoding=encoding, - ) - self._filesystem_operations.append(operation) - logger.debug(f"Queued read operation: {path}") - - def list_directory(self, path: str = ".") -> None: - """Queue a directory listing operation. - - Args: - path: Directory path to list (relative to /sandbox, default: current) - """ - self.enable_filesystem = True - - operation = FileSystemOperation( - operation="list", - path=path, - ) - self._filesystem_operations.append(operation) - logger.debug(f"Queued list operation: {path}") - - def remove_path(self, path: str) -> None: - """Queue a file or directory removal operation. - - Args: - path: Path to remove (relative to /sandbox) - """ - self.enable_filesystem = True - - operation = FileSystemOperation( - operation="remove", - path=path, - ) - self._filesystem_operations.append(operation) - logger.debug(f"Queued remove operation: {path}") - - def copy_path(self, source: str, destination: str) -> None: - """Queue a file or directory copy operation. - - Args: - source: Source path (relative to /sandbox) - destination: Destination path (relative to /sandbox) - """ - self.enable_filesystem = True - - operation = FileSystemOperation( - operation="copy", - path=source, - destination=destination, - ) - self._filesystem_operations.append(operation) - logger.debug(f"Queued copy operation: {source} -> {destination}") + def get_attached_files(self) -> list[str]: + """Get list of attached file paths.""" + files = [] + for op in self._filesystem_operations: + if op.operation in ["write"]: + files.append(op.path) + return files def clear_filesystem_operations(self) -> None: """Clear all queued filesystem operations.""" @@ -284,35 +229,19 @@ def _build_command(self, code: str, **kwargs) -> list[str]: if kwargs.get('session_metadata'): cmd.extend(["-m", json.dumps(kwargs['session_metadata'])]) - # FILESYSTEM: Ativado se há operações ou foi explicitamente habilitado + # FILESYSTEM if self._filesystem_operations or self.enable_filesystem: - # Construir operações filesystem se existem if self._filesystem_operations: - fs_ops = [] - for op in self._filesystem_operations: - op_dict = { - "operation": op.operation, - "path": op.path, - } - if op.content is not None: - op_dict["content"] = op.content - if op.encoding is not None: - op_dict["encoding"] = op.encoding - if op.destination is not None: - op_dict["destination"] = op.destination - fs_ops.append(op_dict) - - cmd.extend(["-fs", json.dumps(fs_ops)]) + fs_ops = [op.to_dict() for op in self._filesystem_operations] + fs_json = json.dumps(fs_ops, ensure_ascii=True, separators=(',', ':')) + cmd.extend(["-x", fs_json]) logger.debug(f"Filesystem enabled with {len(fs_ops)} operations") - if len(fs_ops) <= 5: # Log detalhes se poucas operações - for i, op in enumerate(fs_ops): - logger.debug(f" Op {i+1}: {op['operation']} {op['path']}") else: - # Filesystem habilitado mas sem operações iniciais - cmd.extend(["-fs", "[]"]) + cmd.extend(["-x", "[]"]) logger.debug("Filesystem enabled with no initial operations") + logger.debug(f"Full command: {' '.join(cmd)}") return cmd @@ -342,9 +271,6 @@ async def execute( memory_limit_mb=memory_limit_mb, ) - # Debug logging - logger.debug(f"Executing command: {' '.join(cmd[:8])}{'...' if len(cmd) > 8 else ''}") - # Create and run the subprocess process = await asyncio.create_subprocess_exec( *cmd, @@ -353,7 +279,6 @@ async def execute( ) try: - # Wait for process with a timeout stdout_bytes, stderr_bytes = await asyncio.wait_for( process.communicate(), timeout=timeout_seconds, @@ -361,7 +286,6 @@ async def execute( stdout = stdout_bytes.decode("utf-8", errors="replace") if stdout: - # Extract JSON from output that may contain loading messages full_result = json.loads(stdout) stdout = full_result.get("stdout", None) stderr = full_result.get("stderr", None) @@ -371,17 +295,10 @@ async def execute( filesystem_info = full_result.get("fileSystemInfo", None) filesystem_operations = full_result.get("fileSystemOperations", None) - # Convert the Uint8Array to Python bytes session_bytes_array = full_result.get("sessionBytes", None) session_bytes = ( bytes(session_bytes_array) if session_bytes_array else None ) - - # Log filesystem info if available - if filesystem_info: - logger.debug(f"Filesystem: {filesystem_info['type']} at {filesystem_info['mountPoint']}") - if filesystem_operations: - logger.debug(f"Filesystem operations completed: {len(filesystem_operations)}") else: stderr = stderr_bytes.decode("utf-8", errors="replace") status = "error" @@ -400,7 +317,6 @@ async def execute( filesystem_info = None filesystem_operations = None except asyncio.CancelledError: - # Optionally: log cancellation if needed pass end_time = time.time() @@ -444,17 +360,13 @@ def execute( memory_limit_mb=memory_limit_mb, ) - # Debug logging - logger.debug(f"Executing command: {' '.join(cmd[:8])}{'...' if len(cmd) > 8 else ''}") - try: - # Run the subprocess with timeout process = subprocess.run( # noqa: S603 cmd, capture_output=True, - text=False, # Keep as bytes for proper decoding + text=False, timeout=timeout_seconds, - check=False, # Don't raise on non-zero exit + check=False, ) stdout_bytes = process.stdout @@ -463,7 +375,6 @@ def execute( stdout = stdout_bytes.decode("utf-8", errors="replace") if stdout: - # Extract JSON from output that may contain loading messages full_result = json.loads(stdout) stdout = full_result.get("stdout", None) stderr = full_result.get("stderr", None) @@ -473,17 +384,10 @@ def execute( filesystem_info = full_result.get("fileSystemInfo", None) filesystem_operations = full_result.get("fileSystemOperations", None) - # Convert the Uint8Array to Python bytes session_bytes_array = full_result.get("sessionBytes", None) session_bytes = ( bytes(session_bytes_array) if session_bytes_array else None ) - - # Log filesystem info if available - if filesystem_info: - logger.debug(f"Filesystem: {filesystem_info['type']} at {filesystem_info['mountPoint']}") - if filesystem_operations: - logger.debug(f"Filesystem operations completed: {len(filesystem_operations)}") else: stderr = stderr_bytes.decode("utf-8", errors="replace") status = "error" @@ -516,20 +420,39 @@ def execute( ) +# Input schema para ferramentas +class PyodideSandboxInput(BaseModel): + """Input schema for PyodideSandbox tool.""" + code: str = Field(description="Python code to execute.") + + +# ============================================================================= +# CLASSE PRINCIPAL - Herda de BaseTool mas oferece acesso ao StructuredTool +# ============================================================================= + class PyodideSandboxTool(BaseTool): - """Tool for running python code in a PyodideSandbox.""" + """ + Flexible PyodideSandbox tool that can be used as BaseTool or StructuredTool. + + Usage examples: + + # As BaseTool (herança direta): + tool = PyodideSandboxTool(enable_filesystem=True) + result = tool.invoke({"code": "print('Hello')"}) + + # As StructuredTool (via propriedade): + tool = PyodideSandboxTool(enable_filesystem=True) + result = tool.as_structured_tool().invoke({"code": "print('Hello')"}) + + # Para agents que precisam de StructuredTool: + agent = create_react_agent(llm, [tool.as_structured_tool()]) + + # Para agents que aceitam BaseTool: + agent = create_react_agent(llm, [tool]) + """ name: str = "python_code_sandbox" - description: str = ( - "A secure Python code sandbox with filesystem support. Use this to execute python commands.\n" - "- Input should be a valid python command.\n" - "- To return output, you should print it out with `print(...)`.\n" - "- Don't use f-strings when printing outputs.\n" - "- If you need to make web requests, use `httpx.AsyncClient`.\n" - "- Files can be read/written using standard Python file operations.\n" - "- All file operations work within a sandboxed memory filesystem." - ) - + # Mirror the PyodideSandbox constructor arguments stateful: bool = False allow_env: list[str] | bool = False @@ -539,12 +462,39 @@ class PyodideSandboxTool(BaseTool): allow_run: list[str] | bool = False allow_ffi: list[str] | bool = False timeout_seconds: float | None - """Timeout for code execution in seconds. By default set to 60 seconds.""" node_modules_dir: str = "auto" - enable_filesystem: bool = False # NOVO: controle do filesystem + enable_filesystem: bool = False + + # CORREÇÃO: Usar PrivateAttr para atributos privados no Pydantic + _sandbox: PyodideSandbox = PrivateAttr() + _sync_sandbox: SyncPyodideSandbox = PrivateAttr() + _structured_tool: StructuredTool | None = PrivateAttr(default=None) + _stateful: bool = PrivateAttr() + _input_schema: type[BaseModel] = PrivateAttr() + + def _build_description(self) -> str: + """Build the complete description string with attached files.""" + base = ( + "A secure Python code sandbox with filesystem support. " + "Use this to execute python commands.\n" + "- Input should be a valid python command.\n" + "- To return output, you should print it out with `print(...)`.\n" + "- Don't use f-strings when printing outputs.\n" + "- If you need to make web requests, use `httpx.AsyncClient`.\n" + "- Files can be read/written using standard Python file operations.\n" + "- All file operations work within a sandboxed memory filesystem.\n" + "- Check for attached files using: import os; print(os.listdir('.'))" + ) - _sandbox: PyodideSandbox - _sync_sandbox: SyncPyodideSandbox + files = self._sandbox.get_attached_files() + if files: + base += "\n\n🗂️ ATTACHED FILES AVAILABLE:\n" + base += "\n".join(f" • {p}" for p in files) + base += ( + "\nThese files are already loaded and ready to use with pandas, " + "open(), etc." + ) + return base def __init__( self, @@ -552,28 +502,12 @@ def __init__( stateful: bool = False, timeout_seconds: float | None = 60, allow_net: list[str] | bool = False, - enable_filesystem: bool = False, # NOVO: habilitar filesystem explicitamente + enable_filesystem: bool = False, **kwargs: dict[str, Any], ) -> None: - """Initialize the tool. - - Args: - stateful: Whether to use a stateful sandbox. - timeout_seconds: Timeout for code execution in seconds. - allow_net: configure network access. - enable_filesystem: Enable filesystem operations in the sandbox. - This is automatically enabled when files are attached. - **kwargs: Other attributes will be passed to the PyodideSandbox - """ - super().__init__( - stateful=stateful, - timeout_seconds=timeout_seconds, - allow_net=allow_net, - enable_filesystem=enable_filesystem, - **kwargs, - ) - - if self.stateful: + """Initialize the tool.""" + + if stateful: try: from langgraph.prebuilt import InjectedState except ImportError as e: @@ -587,8 +521,6 @@ class PyodideSandboxToolInput(BaseModel): """Python code to execute in the sandbox.""" code: str = Field(description="Code to execute.") - # these fields will be ignored by the LLM - # and automatically injected by LangGraph's ToolNode state: Annotated[dict[str, Any] | BaseModel, InjectedState] tool_call_id: Annotated[str, InjectedToolCallId] @@ -599,33 +531,60 @@ class PyodideSandboxToolInput(BaseModel): code: str = Field(description="Code to execute.") - self.args_schema: type[BaseModel] = PyodideSandboxToolInput - self._sandbox = PyodideSandbox( - stateful=self.stateful, - allow_env=self.allow_env, - allow_read=self.allow_read, - allow_write=self.allow_write, - allow_net=self.allow_net, - allow_run=self.allow_run, - allow_ffi=self.allow_ffi, - node_modules_dir=self.node_modules_dir, - enable_filesystem=self.enable_filesystem, # NOVO + # Criar os sandboxes + sandbox = PyodideSandbox( + stateful=stateful, + allow_env=kwargs.get('allow_env', False), + allow_read=kwargs.get('allow_read', False), + allow_write=kwargs.get('allow_write', False), + allow_net=allow_net, + allow_run=kwargs.get('allow_run', False), + allow_ffi=kwargs.get('allow_ffi', False), + node_modules_dir=kwargs.get('node_modules_dir', 'auto'), + enable_filesystem=enable_filesystem, + ) + sync_sandbox = SyncPyodideSandbox( + stateful=stateful, + allow_env=kwargs.get('allow_env', False), + allow_read=kwargs.get('allow_read', False), + allow_write=kwargs.get('allow_write', False), + allow_net=allow_net, + allow_run=kwargs.get('allow_run', False), + allow_ffi=kwargs.get('allow_ffi', False), + node_modules_dir=kwargs.get('node_modules_dir', 'auto'), + enable_filesystem=enable_filesystem, + skip_deno_check=True, ) - # Initialize sync sandbox with deno check skipped since async sandbox already - # checked - self._sync_sandbox = SyncPyodideSandbox( - stateful=self.stateful, - allow_env=self.allow_env, - allow_read=self.allow_read, - allow_write=self.allow_write, - allow_net=self.allow_net, - allow_run=self.allow_run, - allow_ffi=self.allow_ffi, - node_modules_dir=self.node_modules_dir, - enable_filesystem=self.enable_filesystem, # NOVO - skip_deno_check=True, # Skip deno check since async sandbox already checked + + # Definir a descrição inicial + initial_description = ( + "A secure Python code sandbox with filesystem support. " + "Use this to execute python commands.\n" + "- Input should be a valid python command.\n" + "- To return output, you should print it out with `print(...)`.\n" + "- Don't use f-strings when printing outputs.\n" + "- If you need to make web requests, use `httpx.AsyncClient`.\n" + "- Files can be read/written using standard Python file operations.\n" + ) + + # Chamar super().__init__() com a descrição calculada + super().__init__( + stateful=stateful, + timeout_seconds=timeout_seconds, + allow_net=allow_net, + enable_filesystem=enable_filesystem, + description=initial_description, + args_schema=PyodideSandboxToolInput, + **kwargs, ) + # IMPORTANTE: Definir atributos privados APÓS super().__init__() + self._sandbox = sandbox + self._sync_sandbox = sync_sandbox + self._stateful = stateful + self._input_schema = PyodideSandboxToolInput + self._structured_tool = None + def attach_file( self, path: str, @@ -636,6 +595,11 @@ def attach_file( """Attach a file to the sandbox environment.""" self._sandbox.attach_file(path, content, encoding=encoding) self._sync_sandbox.attach_file(path, content, encoding=encoding) + # Atualizar descrição em ambas as versões + new_description = self._build_description() + self.description = new_description + if self._structured_tool: + self._structured_tool.description = new_description def attach_binary_file( self, @@ -645,36 +609,128 @@ def attach_binary_file( """Attach a binary file to the sandbox environment.""" self._sandbox.attach_binary_file(path, content) self._sync_sandbox.attach_binary_file(path, content) + # Atualizar descrição em ambas as versões + new_description = self._build_description() + self.description = new_description + if self._structured_tool: + self._structured_tool.description = new_description def create_directory(self, path: str) -> None: """Create a directory in the sandbox environment.""" self._sandbox.create_directory(path) self._sync_sandbox.create_directory(path) + # Atualizar descrição em ambas as versões + new_description = self._build_description() + self.description = new_description + if self._structured_tool: + self._structured_tool.description = new_description - def read_file(self, path: str, *, encoding: str = "utf-8") -> None: - """Queue a file read operation for the next execution.""" - self._sandbox.read_file(path, encoding=encoding) - self._sync_sandbox.read_file(path, encoding=encoding) - - def list_directory(self, path: str = ".") -> None: - """Queue a directory listing operation for the next execution.""" - self._sandbox.list_directory(path) - self._sync_sandbox.list_directory(path) - - def remove_path(self, path: str) -> None: - """Queue a file/directory removal operation for the next execution.""" - self._sandbox.remove_path(path) - self._sync_sandbox.remove_path(path) - - def copy_path(self, source: str, destination: str) -> None: - """Queue a file/directory copy operation for the next execution.""" - self._sandbox.copy_path(source, destination) - self._sync_sandbox.copy_path(source, destination) + def get_attached_files(self) -> list[str]: + """Get list of attached file paths.""" + return self._sandbox.get_attached_files() def clear_filesystem_operations(self) -> None: - """Clear all queued filesystem operations.""" + """Clear all filesystem operations and update description.""" self._sandbox.clear_filesystem_operations() self._sync_sandbox.clear_filesystem_operations() + # Atualizar descrição em ambas as versões + new_description = self._build_description() + self.description = new_description + if self._structured_tool: + self._structured_tool.description = new_description + + def as_structured_tool(self) -> StructuredTool: + """ + Return a StructuredTool version of this tool. + + This allows users to access the tool as a StructuredTool when needed, + while maintaining the BaseTool interface as the primary one. + """ + if self._structured_tool is None: + self._structured_tool = StructuredTool.from_function( + name=self.name, + description=self.description, + func=self._run_sync if not self._stateful else self._run_stateful_sync, + args_schema=self._input_schema, + ) + return self._structured_tool + + @property + def tool(self) -> StructuredTool: + """ + Legacy property for backwards compatibility. + + DEPRECATED: Use as_structured_tool() instead. + """ + return self.as_structured_tool() + + def _run_sync(self, code: str) -> str: + """Synchronous execution function for non-stateful mode.""" + result = self._sync_sandbox.execute( + code, timeout_seconds=self.timeout_seconds + ) + + if result.status == "error": + error_msg = result.stderr if result.stderr else "Execution failed with unknown error" + return f"Error during execution: {error_msg}" + + if result.stdout: + return result.stdout + + if result.result is not None: + return str(result.result) + + return "" + + def _run_stateful_sync( + self, + code: str, + state: dict[str, Any] | BaseModel, + tool_call_id: str, + ) -> Any: + """Synchronous execution function for stateful mode.""" + required_keys = {"session_bytes", "session_metadata", "messages"} + actual_keys = set(state) if isinstance(state, dict) else set(state.__dict__) + if missing_keys := required_keys - actual_keys: + error_msg = ( + "Input state is missing " + f"the following required keys: {missing_keys}" + ) + raise ValueError(error_msg) + + if isinstance(state, dict): + session_bytes = state["session_bytes"] + session_metadata = state["session_metadata"] + else: + session_bytes = state.session_bytes + session_metadata = state.session_metadata + + result = self._sync_sandbox.execute( + code, + session_bytes=session_bytes, + session_metadata=session_metadata, + timeout_seconds=self.timeout_seconds, + ) + + if result.stderr: + tool_result = f"Error during execution: {result.stderr}" + else: + tool_result = result.stdout + + from langgraph.types import Command + + return Command( + update={ + "session_bytes": result.session_bytes, + "session_metadata": result.session_metadata, + "messages": [ + ToolMessage( + content=tool_result, + tool_call_id=tool_call_id, + ) + ], + } + ) def _run( self, @@ -683,72 +739,12 @@ def _run( tool_call_id: str | None = None, config: RunnableConfig | None = None, run_manager: CallbackManagerForToolRun | None = None, - ) -> Any: # noqa: ANN401 - """Use the tool synchronously.""" + ) -> Any: + """Use the tool synchronously (BaseTool interface).""" if self.stateful: - required_keys = {"session_bytes", "session_metadata", "messages"} - actual_keys = set(state) if isinstance(state, dict) else set(state.__dict__) - if missing_keys := required_keys - actual_keys: - error_msg = ( - "Input state is missing " - f"the following required keys: {missing_keys}" - ) - raise ValueError(error_msg) - - if isinstance(state, dict): - session_bytes = state["session_bytes"] - session_metadata = state["session_metadata"] - else: - session_bytes = state.session_bytes - session_metadata = state.session_metadata - - result = self._sync_sandbox.execute( - code, - session_bytes=session_bytes, - session_metadata=session_metadata, - timeout_seconds=self.timeout_seconds, - ) - - if result.stderr: - tool_result = f"Error during execution: {result.stderr}" - else: - tool_result = result.stdout - - from langgraph.types import Command - - # if the tool is used with a stateful sandbox, - # we need to update the graph state with the new session bytes and metadata - return Command( - update={ - "session_bytes": result.session_bytes, - "session_metadata": result.session_metadata, - "messages": [ - ToolMessage( - content=tool_result, - tool_call_id=tool_call_id, - ) - ], - } - ) + return self._run_stateful_sync(code, state, tool_call_id) else: - # Para sandbox não stateful - result = self._sync_sandbox.execute( - code, timeout_seconds=self.timeout_seconds - ) - - # Tratamento mais robusto de erros - if result.status == "error": - error_msg = result.stderr if result.stderr else "Execution failed with unknown error" - return f"Error during execution: {error_msg}" - - # Se foi sucesso, retornar stdout ou result - if result.stdout: - return result.stdout - - if result.result is not None: - return str(result.result) - - return "" + return self._run_sync(code) async def _arun( self, @@ -757,8 +753,8 @@ async def _arun( tool_call_id: str | None = None, config: RunnableConfig | None = None, run_manager: AsyncCallbackManagerForToolRun | None = None, - ) -> Any: # noqa: ANN401 - """Use the tool asynchronously.""" + ) -> Any: + """Use the tool asynchronously (BaseTool interface).""" if self.stateful: required_keys = {"session_bytes", "session_metadata", "messages"} actual_keys = set(state) if isinstance(state, dict) else set(state.__dict__) @@ -790,8 +786,6 @@ async def _arun( from langgraph.types import Command - # if the tool is used with a stateful sandbox, - # we need to update the graph state with the new session bytes and metadata return Command( update={ "session_bytes": result.session_bytes, @@ -805,21 +799,67 @@ async def _arun( } ) else: - # Para sandbox não stateful result = await self._sandbox.execute( code, timeout_seconds=self.timeout_seconds ) - # Tratamento mais robusto de erros if result.status == "error": error_msg = result.stderr if result.stderr else "Execution failed with unknown error" return f"Error during execution: {error_msg}" - # Se foi sucesso, retornar stdout ou result if result.stdout: return result.stdout if result.result is not None: return str(result.result) - return "" \ No newline at end of file + return "" + + +# ============================================================================= +# WRAPPER ALTERNATIVO - Para manter compatibilidade com código existente +# ============================================================================= + +class PyodideSandboxDynamicTool: + """ + Pure StructuredTool wrapper for PyodideSandbox (legacy compatibility). + + DEPRECATED: Use PyodideSandboxTool instead. + """ + + def __init__(self, **kwargs): + """Initialize the wrapper - prefer PyodideSandboxTool instead.""" + logger.warning( + "PyodideSandboxDynamicTool is deprecated. " + "Use PyodideSandboxTool instead." + ) + self._base_tool = PyodideSandboxTool(**kwargs) + self.tool = self._base_tool.as_structured_tool() + + def attach_file(self, path: str, content: str, *, encoding: str = "utf-8") -> None: + """Attach a file to the sandbox environment.""" + self._base_tool.attach_file(path, content, encoding=encoding) + + def attach_binary_file(self, path: str, content: bytes) -> None: + """Attach a binary file to the sandbox environment.""" + self._base_tool.attach_binary_file(path, content) + + def create_directory(self, path: str) -> None: + """Create a directory in the sandbox environment.""" + self._base_tool.create_directory(path) + + def get_attached_files(self) -> list[str]: + """Get list of attached file paths.""" + return self._base_tool.get_attached_files() + + def clear_filesystem_operations(self) -> None: + """Clear all filesystem operations and update description.""" + self._base_tool.clear_filesystem_operations() + + def invoke(self, input_data: dict[str, Any]) -> str: + """Direct invoke method for easier usage.""" + return self.tool.invoke(input_data) + + async def ainvoke(self, input_data: dict[str, Any]) -> str: + """Async direct invoke method for easier usage.""" + return await self.tool.ainvoke(input_data) \ No newline at end of file diff --git a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py index ecf3422..b069a31 100644 --- a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py +++ b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py @@ -20,6 +20,17 @@ def pyodide_package(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr("langchain_sandbox.pyodide.PKG_NAME", local_script) +@pytest.fixture +def mock_csv_data(): + """Sample sales data for testing.""" + return """date,product_id,category,quantity,price,customer_id,region +2024-01-15,P001,Electronics,2,499.99,C123,North +2024-01-16,P002,Furniture,1,899.50,C124,South +2024-01-16,P003,Clothing,5,59.99,C125,East +2024-01-17,P001,Electronics,1,499.99,C126,West +2024-01-18,P004,Electronics,3,299.99,C127,North""" + + def get_default_sandbox(stateful: bool = False) -> PyodideSandbox: """Get default PyodideSandbox instance for testing.""" return PyodideSandbox( @@ -167,226 +178,136 @@ def test_sync_pyodide_sandbox_timeout(pyodide_package: None) -> None: def test_pyodide_sandbox_tool() -> None: """Test synchronous invocation of PyodideSandboxTool.""" - tool = PyodideSandboxTool(stateful=False, allow_net=True) + tool = PyodideSandboxTool(stateful=False, allow_net=True, allow_read=True, allow_write=True) result = tool.invoke("x = 5; print(x)") assert result == "5" result = tool.invoke("x = 5; print(1); print(2)") - assert result == "12" + assert result == "1\n2" def test_pyodide_timeout() -> None: """Test synchronous invocation of PyodideSandboxTool with timeout.""" - tool = PyodideSandboxTool(stateful=False, timeout_seconds=0.1, allow_net=True) + tool = PyodideSandboxTool(stateful=False, timeout_seconds=0.1, allow_net=True, allow_read=True, allow_write=True) result = tool.invoke("while True: pass") assert result == "Error during execution: Execution timed out after 0.1 seconds" async def test_async_pyodide_sandbox_tool() -> None: """Test synchronous invocation of PyodideSandboxTool.""" - tool = PyodideSandboxTool(stateful=False, allow_net=True) + tool = PyodideSandboxTool(stateful=False, allow_net=True, allow_read=True, allow_write=True) result = await tool.ainvoke("x = 5; print(x)") assert result == "5" result = await tool.ainvoke("x = 5; print(1); print(2)") # TODO: Need to preserve newlines in the output # noqa: FIX002, TD002 # https://github.com/langchain-ai/langchain-sandbox/issues/26 - assert result == "12" + assert result == "1\n2" async def test_async_pyodide_timeout() -> None: """Test synchronous invocation of PyodideSandboxTool with timeout.""" - tool = PyodideSandboxTool(stateful=False, timeout_seconds=0.1, allow_net=True) + tool = PyodideSandboxTool(stateful=False, timeout_seconds=0.1, allow_net=True, allow_read=True, allow_write=True) result = await tool.ainvoke("while True: pass") assert result == "Error during execution: Execution timed out after 0.1 seconds" - -async def test_attach_binary_file(pyodide_package: None) -> None: - """Test attaching and reading a binary file.""" - sandbox = PyodideSandbox( - allow_read=True, - allow_write=True, - ) - - simple_binary = bytes([0x01, 0x02, 0x03, 0x04, 0x05]) - - sandbox.attach_file("test_binary.bin", simple_binary) - +async def test_filesystem_basic_operations(): + """Test basic filesystem operations.""" + sandbox = PyodideSandbox(enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True) + + # Attach files + sandbox.attach_file("test.txt", "Hello, World!") + sandbox.attach_file("data.json", '{"key": "value"}') + sandbox.create_directory("output") + code = """ import os -import base64 - -file_path = "/sandbox/test_binary.bin" -if os.path.exists(file_path): - with open(file_path, "rb") as f: - content = f.read() - - print(f"File exists: True") - print(f"Content length: {len(content)}") - print(f"Content bytes: {', '.join(str(b) for b in content)}") -else: - print("File exists: False") -""" - - result = await sandbox.execute(code) - - assert result.status == "success", f"Error in execution: {result.stderr}" - assert "File exists: True" in result.stdout - assert "Content length: 5" in result.stdout - assert "Content bytes: 1, 2, 3, 4, 5" in result.stdout - - -async def test_clear_files_after_execution(pyodide_package: None) -> None: - """Test clearing files after execution.""" - sandbox = get_default_sandbox() - - sandbox.attach_file("temp.txt", "Temporary content") - - result1 = await sandbox.execute( - 'print(open("/sandbox/temp.txt").read())', - clear_files=True - ) - assert result1.status == "success" - assert "Temporary content" in result1.stdout - - assert len(sandbox.file_operations) == 0 - - result2 = await sandbox.execute(""" -import os -if os.path.exists("/sandbox/temp.txt"): - print("File still exists") -else: - print("File is gone") -""") - assert result2.status == "success" - assert "File is gone" in result2.stdout - - -async def test_tool_with_file_attachment(pyodide_package: None) -> None: - """Test using PyodideSandboxTool with file attachment.""" - tool = PyodideSandboxTool(allow_read=True, allow_write=True, allow_net=True) - - tool.attach_file("data.csv", "id,value\n1,100\n2,200\n3,300") - tool.attach_file("config.json", '{"max_value": 250, "min_value": 50}') - - code = """ -import csv import json -with open("/sandbox/data.csv", "r") as f: - reader = csv.DictReader(f) - rows = list(reader) - -with open("/sandbox/config.json", "r") as f: - config = json.load(f) - -# Filter data based on config -filtered = [] -for row in rows: - value = int(row["value"]) - if config["min_value"] <= value <= config["max_value"]: - filtered.append(row) +# Read files +with open("test.txt", "r") as f: + txt_content = f.read() -print(f"Filtered data:") -for row in filtered: - print(f"id: {row['id']}, value: {row['value']}") -""" +with open("data.json", "r") as f: + json_data = json.load(f) - result = await tool.ainvoke(code) +# Create new file +with open("output/result.txt", "w") as f: + f.write("Processing complete!") - assert "Filtered data:" in result - assert "id: 1, value: 100" in result - assert "id: 2, value: 200" in result - # Value 300 should be excluded by filter - assert "id: 3, value: 300" not in result +# List files +root_files = sorted(os.listdir(".")) +output_files = sorted(os.listdir("output")) +print(f"Text: {txt_content}") +print(f"JSON key: {json_data['key']}") +print(f"Root files: {root_files}") +print(f"Output files: {output_files}") -async def test_directory_operations(pyodide_package: None) -> None: - """Test directory creation and file operations within directories.""" - sandbox = get_default_sandbox() - - sandbox.attach_file("nested/dir/file.txt", "Content in nested directory") - - code = """ -import os -from pathlib import Path - -dir_exists = os.path.isdir("/sandbox/nested/dir") -file_exists = os.path.exists("/sandbox/nested/dir/file.txt") -content = Path("/sandbox/nested/dir/file.txt").read_text() if file_exists else "" - -print(f"Directory exists: {dir_exists}") -print(f"File exists: {file_exists}") -print(f"Content: {content}") +# Read the created file to verify it was written +with open("output/result.txt", "r") as f: + created_content = f.read() +print(f"Created file content: {created_content}") """ - + result = await sandbox.execute(code) + print(f"DEBUG - stdout: {repr(result.stdout)}") # Para ver as quebras de linha assert result.status == "success" - assert "Directory exists: True" in result.stdout - assert "File exists: True" in result.stdout - assert "Content: Content in nested directory" in result.stdout - - -def test_sync_file_operations(pyodide_package: None) -> None: - """Test synchronous file operations.""" - sandbox = get_default_sync_sandbox() - - sandbox.attach_files({ - "data.txt": "Text file content", - "config.json": '{"enabled": true}' - }) - + assert "Hello, World!" in result.stdout + assert "value" in result.stdout + assert "Processing complete!" in result.stdout + + +def test_filesystem_tool_usage(): + """Test filesystem with PyodideSandboxTool.""" + tool = PyodideSandboxTool(enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True) + + # Attach CSV data + csv_data = "name,age\nAlice,30\nBob,25" + tool.attach_file("users.csv", csv_data) + code = """ -import json -from pathlib import Path +import csv -text_content = Path("/sandbox/data.txt").read_text() -json_content = json.loads(Path("/sandbox/config.json").read_text()) +users = [] +with open("users.csv", "r") as f: + reader = csv.DictReader(f) + for row in reader: + users.append(row) -print(f"Text content: {text_content}") -print(f"JSON enabled: {json_content['enabled']}") +for user in users: + print(f"{user['name']} is {user['age']} years old") """ + + result = tool.invoke(code) + assert "Alice is 30 years old" in result + assert "Bob is 25 years old" in result + + +async def test_binary_file_operations(): + """Test binary file operations.""" + sandbox = PyodideSandbox(enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True) + + # Create some binary data + binary_data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01' + sandbox.attach_binary_file("image.png", binary_data) + + code = """ +import base64 - result = sandbox.execute(code) - assert result.status == "success" - assert "Text content: Text file content" in result.stdout - assert "JSON enabled: True" in result.stdout - - -async def test_attach_files_with_explicit_binary_flag(pyodide_package: None) -> None: - """Test attaching files with explicit binary flag in dictionary format.""" - sandbox = get_default_sandbox() - - text_content = "Hello world" - binary_content = b"\x00\x01\x02\x03" - - sandbox.attach_files({ - "text_file.txt": {"content": text_content, "binary": False}, - "binary_file.bin": {"content": binary_content, "binary": True} - }) +# Read binary file +with open("image.png", "rb") as f: + data = f.read() - code = """ -from pathlib import Path -import os +# Check if it's the PNG header +is_png = data.startswith(b'\\x89PNG') +size = len(data) -# Check text file -text_path = "/sandbox/text_file.txt" -if os.path.exists(text_path): - with open(text_path, "r") as f: - text_content = f.read() - print(f"Text content: {text_content}") - -# Check binary file -bin_path = "/sandbox/binary_file.bin" -if os.path.exists(bin_path): - with open(bin_path, "rb") as f: - bin_content = f.read() - print(f"Binary exists: True") - print(f"Binary length: {len(bin_content)}") - print(f"Binary bytes: {', '.join(str(b) for b in bin_content)}") +print(f"Is PNG: {is_png}") +print(f"Size: {size} bytes") +print(f"Original size: {len(data)}") # Debug """ - + result = await sandbox.execute(code) assert result.status == "success" - assert "Text content: Hello world" in result.stdout - assert "Binary exists: True" in result.stdout - assert "Binary length: 4" in result.stdout - assert "Binary bytes: 0, 1, 2, 3" in result.stdout + assert "Is PNG: True" in result.stdout + # Ajustar para o tamanho real ou verificar se é >= 16 + assert f"Size: {len(binary_data)} bytes" in result.stdout \ No newline at end of file From bb23978adeb285f7eb06bbeb845e7c27ace7d57c Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Wed, 28 May 2025 12:34:54 -0300 Subject: [PATCH 08/27] fix: follow python conventions --- examples/react_agent_with_csv.py | 48 + libs/sandbox-py/langchain_sandbox/pyodide.py | 1013 ++++++++++++----- .../tests/unit_tests/test_pyodide_sandbox.py | 87 +- 3 files changed, 808 insertions(+), 340 deletions(-) create mode 100644 examples/react_agent_with_csv.py diff --git a/examples/react_agent_with_csv.py b/examples/react_agent_with_csv.py new file mode 100644 index 0000000..f5cb913 --- /dev/null +++ b/examples/react_agent_with_csv.py @@ -0,0 +1,48 @@ +# pip install langgraph-codeact "langchain[anthropic]" +import asyncio + +from langchain_sandbox import PyodideSandboxTool +from langgraph.prebuilt import create_react_agent + + +# Define the sandbox tool with filesystem support +sandbox_tool = PyodideSandboxTool( + enable_filesystem=True, + allow_net=True, +) + +sales_data = """date,product,category,quantity,price,region +2024-01-15,Laptop,Electronics,2,1299.99,North +2024-01-16,Chair,Furniture,1,249.50,South +2024-01-16,T-shirt,Clothing,5,29.99,East +2024-01-17,Laptop,Electronics,1,1299.99,West +2024-01-18,Phone,Electronics,3,799.99,North +2024-01-19,Desk,Furniture,2,399.99,South +2024-01-20,Jeans,Clothing,4,79.99,East +2024-01-21,Tablet,Electronics,2,499.99,West +2024-01-22,Sofa,Furniture,1,899.99,North +2024-01-23,Shoes,Clothing,3,129.99,South""" + +sandbox_tool.attach_file("sales.csv", sales_data) + +# Create an agent with the sandbox tool +agent = create_react_agent( + "anthropic:claude-3-7-sonnet-latest", [sandbox_tool] +) + +query = """Please analyze the sales data and tell me: +1. What is the total revenue by category? +2. Which region has the highest sales? +3. What are the top 3 best-selling products by revenue? + +Use pandas to read the CSV file and perform the analysis.""" + +async def run_agent(query: str): + # Stream agent outputs + async for chunk in agent.astream({"messages": query}): + print(chunk) + print("\n") + +if __name__ == "__main__": + # Run the agent + asyncio.run(run_agent(query)) diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index 63a76ea..99d7f3a 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -15,8 +15,8 @@ ) from langchain_core.messages import ToolMessage from langchain_core.runnables import RunnableConfig -from langchain_core.tools import BaseTool, StructuredTool, InjectedToolCallId -from pydantic import BaseModel, Field, PrivateAttr +from langchain_core.tools import BaseTool, InjectedToolCallId, StructuredTool +from pydantic import BaseModel, Field logger = logging.getLogger(__name__) @@ -41,33 +41,51 @@ class CodeExecutionResult: @dataclasses.dataclass(kw_only=True) class FileSystemOperation: - """Container for filesystem operations.""" - + """Container for filesystem operations. + + This class encapsulates a single filesystem operation that can be performed + within the sandboxed environment. Operations are serialized to JSON and + passed to the Deno subprocess for execution. + + Supported operations: + - write: Create or write a file + - read: Read file contents + - mkdir: Create a directory + - list: List directory contents + - exists: Check if file/directory exists + - remove: Delete file/directory + - copy: Copy file/directory + """ + operation: Literal["read", "write", "list", "mkdir", "exists", "remove", "copy"] path: str content: str | None = None encoding: str | None = None destination: str | None = None - + def to_dict(self) -> dict[str, str]: - """Convert to dict for JSON serialization.""" + """Convert to dict for JSON serialization. + + Returns: + Dictionary representation suitable for JSON serialization. + """ result = { "operation": self.operation, "path": self.path, } - + if self.content is not None: result["content"] = self.content if self.encoding is not None: result["encoding"] = self.encoding if self.destination is not None: result["destination"] = self.destination - + return result # Published package name -PKG_NAME = "../pyodide-sandbox-js/main.ts" +PKG_NAME = "jsr:@langchain/pyodide-sandbox@0.0.4" def build_permission_flag( @@ -75,7 +93,17 @@ def build_permission_flag( *, value: bool | list[str], ) -> str | None: - """Build a permission flag string based on the provided setting.""" + """Build a permission flag string based on the provided setting. + + Args: + flag: The base permission flag (e.g., "--allow-read"). + value: Either a boolean (True for unrestricted access, False for no access) + or a list of allowed items. + + Returns: + A string with the permission flag and items, or None if no permission should + be added. + """ if value is True: return flag if isinstance(value, list) and value: @@ -84,7 +112,30 @@ def build_permission_flag( class BasePyodideSandbox: - """Base class for PyodideSandbox implementations.""" + """Base class for PyodideSandbox implementations. + + This class provides the common initialization and configuration logic for both + synchronous and asynchronous PyodideSandbox implementations. + + The sandbox leverages Deno's security model to create a secure runtime for + executing untrusted Python code. It works by spawning a Deno subprocess that loads + Pyodide (Python compiled to WebAssembly) and executes the provided code in an + isolated environment. + + Security features: + - Configurable permissions for file system, network, and environment access + - Support for execution timeouts to prevent infinite loops + - Memory usage monitoring + - Process isolation via Deno's security sandbox + - In-memory filesystem with file attachment capabilities + + The sandbox supports fine-grained permission control through its initializer: + - Restrict network access to specific domains + - Limit file system access to specific directories + - Control environment variable access + - Prevent subprocess execution and FFI + - Attach files to in-memory filesystem before execution + """ def __init__( self, @@ -100,11 +151,69 @@ def __init__( skip_deno_check: bool = False, enable_filesystem: bool = False, ) -> None: - """Initialize the sandbox with specific Deno permissions.""" + """Initialize the sandbox with specific Deno permissions. + + This method configures the security permissions for the Deno subprocess that + will execute Python code via Pyodide. By default, all permissions are + disabled (False) for maximum security. Permissions can be enabled selectively + based on the needs of the code being executed. + + Args: + stateful: Whether to use a stateful session. If True, `sandbox.execute` + will include session metadata and the session bytes containing the + session state (variables, imports, etc.) in the execution result. + This allows saving and reusing the session state between executions. + + allow_env: Environment variable access configuration: + - False: No environment access (default, most secure) + - True: Unrestricted access to all environment variables + - List[str]: Access restricted to specific environment variables, e.g. + ["PATH", "PYTHONPATH"] + + allow_read: File system read access configuration: + - False: No file system read access (default, most secure) + - True: Unrestricted read access to the file system + - List[str]: Read access restricted to specific paths, e.g. + ["/tmp/sandbox", "./data"] + + By default allows read from node_modules + + allow_write: File system write access configuration: + - False: No file system write access (default, most secure) + - True: Unrestricted write access to the file system + - List[str]: Write access restricted to specific paths, e.g. + ["/tmp/sandbox/output"] + + By default allows write to node_modules + + allow_net: Network access configuration: + - False: No network access (default, most secure) + - True: Unrestricted network access + - List[str]: Network access restricted to specific domains/IPs, e.g. + ["api.example.com", "data.example.org:8080"] + + allow_run: Subprocess execution configuration: + - False: No subprocess execution allowed (default, most secure) + - True: Unrestricted subprocess execution + - List[str]: Subprocess execution restricted to specific commands, e.g. + ["python", "git"] + + allow_ffi: Foreign Function Interface access configuration: + - False: No FFI access (default, most secure) + - True: Unrestricted FFI access + - List[str]: FFI access restricted to specific libraries, e.g. + ["/usr/lib/libm.so"] + + node_modules_dir: Directory for Node.js modules. Set to "auto" to use + the default directory for Deno modules. + skip_deno_check: If True, skip the check for Deno installation. + enable_filesystem: If True, enable in-memory filesystem support for + attaching files and directories to the sandbox environment. + """ self.stateful = stateful self.enable_filesystem = enable_filesystem self._filesystem_operations: list[FileSystemOperation] = [] - + if not skip_deno_check: # Check if Deno is installed try: @@ -116,9 +225,12 @@ def __init__( msg = "Deno is not installed or not in PATH." raise RuntimeError(msg) from e - # Define permission configurations + # Define permission configurations: + # each tuple contains (flag, setting, defaults) perm_defs = [ ("--allow-env", allow_env, None), + # For file system permissions, if no permission is specified, + # force node_modules ("--allow-read", allow_read, ["node_modules"]), ("--allow-write", allow_write, ["node_modules"]), ("--allow-net", allow_net, None), @@ -144,12 +256,26 @@ def attach_file( *, encoding: str = "utf-8", ) -> None: - """Attach a file to the sandbox filesystem.""" + """Attach a text file to the sandbox filesystem. + + This method queues a file to be created in the sandbox's in-memory + filesystem when code is executed. The file will be available for + reading and manipulation within the Python environment. + + Args: + path: File path within the sandbox filesystem + content: Text content of the file + encoding: Text encoding (default: utf-8) + + Raises: + TypeError: If content is not a string + """ self.enable_filesystem = True - + if not isinstance(content, str): - raise ValueError("Content must be a string for text files") - + msg = "Content must be a string for text files" + raise TypeError(msg) + operation = FileSystemOperation( operation="write", path=path, @@ -157,19 +283,37 @@ def attach_file( encoding=encoding, ) self._filesystem_operations.append(operation) - logger.debug(f"Attached file: {path} ({len(content)} chars, encoding: {encoding})") + logger.debug( + "Attached file: %s (%d chars, encoding: %s)", + path, + len(content), + encoding, + ) def attach_binary_file( self, path: str, content: bytes, ) -> None: - """Attach a binary file to the sandbox filesystem.""" + """Attach a binary file to the sandbox filesystem. + + This method queues a binary file to be created in the sandbox's in-memory + filesystem when code is executed. The content is base64-encoded for + transport to the sandbox environment. + + Args: + path: File path within the sandbox filesystem + content: Binary content of the file + + Raises: + TypeError: If content is not bytes + """ self.enable_filesystem = True - + if not isinstance(content, bytes): - raise ValueError("Content must be bytes for binary files") - + msg = "Content must be bytes for binary files" + raise TypeError(msg) + b64_content = base64.b64encode(content).decode("ascii") operation = FileSystemOperation( operation="write", @@ -178,75 +322,123 @@ def attach_binary_file( encoding="binary", ) self._filesystem_operations.append(operation) - logger.debug(f"Attached binary file: {path} ({len(content)} bytes -> {len(b64_content)} b64 chars)") + logger.debug( + "Attached binary file: %s (%d bytes -> %d b64 chars)", + path, + len(content), + len(b64_content), + ) def create_directory(self, path: str) -> None: - """Create a directory in the sandbox filesystem.""" + """Create a directory in the sandbox filesystem. + + This method queues a directory to be created in the sandbox's in-memory + filesystem when code is executed. + + Args: + path: Directory path within the sandbox filesystem + """ self.enable_filesystem = True - + operation = FileSystemOperation( operation="mkdir", path=path, ) self._filesystem_operations.append(operation) - logger.debug(f"Created directory: {path}") + logger.debug("Created directory: %s", path) def get_attached_files(self) -> list[str]: - """Get list of attached file paths.""" - files = [] - for op in self._filesystem_operations: - if op.operation in ["write"]: - files.append(op.path) - return files + """Get list of attached file paths. + + Returns: + List of file paths that will be available in the sandbox filesystem + """ + return [ + op.path + for op in self._filesystem_operations + if op.operation == "write" + ] def clear_filesystem_operations(self) -> None: - """Clear all queued filesystem operations.""" + """Clear all queued filesystem operations. + + This removes all files and directories that were queued to be created + in the sandbox filesystem. + """ self._filesystem_operations.clear() logger.debug("Cleared filesystem operations") - def _build_command(self, code: str, **kwargs) -> list[str]: - cmd = ["deno", "run"] - + def _build_command( + self, + code: str, + *, + session_bytes: bytes | None = None, + session_metadata: dict | None = None, + memory_limit_mb: int | None = None, + ) -> list[str]: + """Build the Deno command with all necessary arguments. + + Args: + code: The Python code to execute + session_bytes: Optional session state bytes + session_metadata: Optional session metadata + memory_limit_mb: Optional memory limit in MB + + Returns: + List of command arguments for subprocess execution + """ + cmd = [ + "deno", + "run", + ] + # Apply permissions cmd.extend(self.permissions) - - # Memory limit - if kwargs.get('memory_limit_mb'): - cmd.append(f"--v8-flags=--max-old-space-size={kwargs['memory_limit_mb']}") + # Deno uses the V8 flag --max-old-space-size to limit memory usage in MB + if memory_limit_mb is not None and memory_limit_mb > 0: + cmd.append(f"--v8-flags=--max-old-space-size={memory_limit_mb}") + + # Add the path to the JavaScript wrapper script cmd.append(PKG_NAME) + + # Add script path and code cmd.extend(["-c", code]) - # Stateful if self.stateful: cmd.extend(["-s"]) - # Session data - if kwargs.get('session_bytes'): - bytes_array = list(kwargs['session_bytes']) + if session_bytes: + # Convert bytes to list of integers and then to JSON string + bytes_array = list(session_bytes) cmd.extend(["-b", json.dumps(bytes_array)]) - if kwargs.get('session_metadata'): - cmd.extend(["-m", json.dumps(kwargs['session_metadata'])]) + if session_metadata: + cmd.extend(["-m", json.dumps(session_metadata)]) - # FILESYSTEM + # Add filesystem operations if any are queued if self._filesystem_operations or self.enable_filesystem: if self._filesystem_operations: fs_ops = [op.to_dict() for op in self._filesystem_operations] - fs_json = json.dumps(fs_ops, ensure_ascii=True, separators=(',', ':')) + fs_json = json.dumps( + fs_ops, ensure_ascii=True, separators=(",", ":") + ) cmd.extend(["-x", fs_json]) - - logger.debug(f"Filesystem enabled with {len(fs_ops)} operations") + logger.debug("Filesystem enabled with %d operations", len(fs_ops)) else: cmd.extend(["-x", "[]"]) logger.debug("Filesystem enabled with no initial operations") - logger.debug(f"Full command: {' '.join(cmd)}") return cmd class PyodideSandbox(BasePyodideSandbox): - """Asynchronous implementation of PyodideSandbox.""" + """Asynchronous implementation of PyodideSandbox. + + This class provides an asynchronous interface for executing Python code in a + sandboxed Deno environment using Pyodide. It supports file attachment and + in-memory filesystem operations. + """ async def execute( self, @@ -257,7 +449,26 @@ async def execute( timeout_seconds: float | None = None, memory_limit_mb: int | None = None, ) -> CodeExecutionResult: - """Execute Python code asynchronously in a sandboxed Deno subprocess.""" + """Execute Python code asynchronously in a sandboxed Deno subprocess. + + This method spawns a Deno subprocess that loads Pyodide (Python compiled + to WebAssembly) and executes the provided code within that sandboxed + environment. The execution is subject to the permissions configured in the + sandbox's initialization and the resource constraints provided as arguments. + + Any attached files will be made available in the sandbox's in-memory + filesystem before code execution begins. + + Args: + code: The Python code to execute in the sandbox + session_bytes: Optional bytes containing session state + session_metadata: Optional metadata for session state + timeout_seconds: Maximum execution time in seconds + memory_limit_mb: Maximum memory usage in MB + + Returns: + CodeExecutionResult containing execution results and metadata + """ start_time = time.time() stdout = "" stderr = "" @@ -279,6 +490,7 @@ async def execute( ) try: + # Wait for process with a timeout stdout_bytes, stderr_bytes = await asyncio.wait_for( process.communicate(), timeout=timeout_seconds, @@ -286,6 +498,8 @@ async def execute( stdout = stdout_bytes.decode("utf-8", errors="replace") if stdout: + # stdout encodes the full result from the sandbox. + # including stdout, stderr, and the json result. full_result = json.loads(stdout) stdout = full_result.get("stdout", None) stderr = full_result.get("stderr", None) @@ -294,7 +508,7 @@ async def execute( session_metadata = full_result.get("sessionMetadata", None) filesystem_info = full_result.get("fileSystemInfo", None) filesystem_operations = full_result.get("fileSystemOperations", None) - + # Convert the Uint8Array to Python bytes session_bytes_array = full_result.get("sessionBytes", None) session_bytes = ( bytes(session_bytes_array) if session_bytes_array else None @@ -317,8 +531,9 @@ async def execute( filesystem_info = None filesystem_operations = None except asyncio.CancelledError: + # Optionally: log cancellation if needed pass - + end_time = time.time() return CodeExecutionResult( @@ -335,7 +550,11 @@ async def execute( class SyncPyodideSandbox(BasePyodideSandbox): - """Synchronous version of PyodideSandbox.""" + """Synchronous version of PyodideSandbox. + + This class provides a synchronous interface to the PyodideSandbox functionality, + including file attachment and in-memory filesystem operations. + """ def execute( self, @@ -346,7 +565,22 @@ def execute( timeout_seconds: float | None = None, memory_limit_mb: int | None = None, ) -> CodeExecutionResult: - """Execute Python code synchronously in a sandboxed Deno subprocess.""" + """Execute Python code synchronously in a sandboxed Deno subprocess. + + This method provides the same functionality as PyodideSandbox.execute() but + in a synchronous/blocking manner. Any attached files will be made available + in the sandbox's in-memory filesystem before code execution begins. + + Args: + code: The Python code to execute in the sandbox + session_bytes: Optional bytes containing session state + session_metadata: Optional metadata for session state + timeout_seconds: Maximum execution time in seconds + memory_limit_mb: Maximum memory usage in MB + + Returns: + CodeExecutionResult containing execution results and metadata + """ start_time = time.time() stdout = "" result = None @@ -361,12 +595,16 @@ def execute( ) try: + # Run the subprocess with timeout + # Ignoring S603 for subprocess.run as the cmd is built safely. + # Untrusted input comes from `code` parameter, which should be + # escaped properly as we are **not** using shell=True. process = subprocess.run( # noqa: S603 cmd, capture_output=True, - text=False, + text=False, # Keep as bytes for proper decoding timeout=timeout_seconds, - check=False, + check=False, # Don't raise on non-zero exit ) stdout_bytes = process.stdout @@ -375,6 +613,8 @@ def execute( stdout = stdout_bytes.decode("utf-8", errors="replace") if stdout: + # stdout encodes the full result from the sandbox + # including stdout, stderr, and the json result full_result = json.loads(stdout) stdout = full_result.get("stdout", None) stderr = full_result.get("stderr", None) @@ -383,7 +623,7 @@ def execute( session_metadata = full_result.get("sessionMetadata", None) filesystem_info = full_result.get("fileSystemInfo", None) filesystem_operations = full_result.get("fileSystemOperations", None) - + # Convert the Uint8Array to Python bytes session_bytes_array = full_result.get("sessionBytes", None) session_bytes = ( bytes(session_bytes_array) if session_bytes_array else None @@ -420,39 +660,91 @@ def execute( ) -# Input schema para ferramentas -class PyodideSandboxInput(BaseModel): - """Input schema for PyodideSandbox tool.""" - code: str = Field(description="Python code to execute.") +class PyodideSandboxTool(BaseTool): + r"""Tool for running python code in a PyodideSandbox. + This tool extends the base PyodideSandbox functionality with support for + attaching files and creating an in-memory filesystem. Files attached to + the tool will be available within the Python execution environment. -# ============================================================================= -# CLASSE PRINCIPAL - Herda de BaseTool mas oferece acesso ao StructuredTool -# ============================================================================= + If you use a stateful sandbox (PyodideSandboxTool(stateful=True)), + the state between code executions (to variables, imports, + and definitions, etc.), will be persisted using LangGraph checkpointer. -class PyodideSandboxTool(BaseTool): - """ - Flexible PyodideSandbox tool that can be used as BaseTool or StructuredTool. - - Usage examples: - - # As BaseTool (herança direta): - tool = PyodideSandboxTool(enable_filesystem=True) - result = tool.invoke({"code": "print('Hello')"}) - - # As StructuredTool (via propriedade): - tool = PyodideSandboxTool(enable_filesystem=True) - result = tool.as_structured_tool().invoke({"code": "print('Hello')"}) - - # Para agents que precisam de StructuredTool: - agent = create_react_agent(llm, [tool.as_structured_tool()]) - - # Para agents que aceitam BaseTool: - agent = create_react_agent(llm, [tool]) + !!! important + When you use a stateful sandbox, this tool can only be used + inside a LangGraph graph with a checkpointer, and + has to be used with the prebuilt `create_react_agent` or `ToolNode`. + + Example: stateless sandbox usage + + ```python + from langgraph.prebuilt import create_react_agent + from langchain_sandbox import PyodideSandboxTool + + tool = PyodideSandboxTool(enable_filesystem=True, allow_net=True) + + # Attach data files + tool.attach_file("data.csv", "name,age\\nJohn,25\\nMary,30") + + agent = create_react_agent( + "anthropic:claude-3-7-sonnet-latest", + tools=[tool], + ) + result = await agent.ainvoke( + {"messages": [{"role": "user", "content": "analyze the data.csv file"}]}, + ) + ``` + + Example: stateful sandbox usage + + ```python + from langgraph.prebuilt import create_react_agent + from langgraph.prebuilt.chat_agent_executor import AgentState + from langgraph.checkpoint.memory import InMemorySaver + from langchain_sandbox import PyodideSandboxTool, PyodideSandbox + + class State(AgentState): + session_bytes: bytes + session_metadata: dict + + tool = PyodideSandboxTool(stateful=True, enable_filesystem=True, allow_net=True) + agent = create_react_agent( + "anthropic:claude-3-7-sonnet-latest", + tools=[tool], + checkpointer=InMemorySaver(), + state_schema=State + ) + result = await agent.ainvoke( + { + "messages": [ + {"role": "user", "content": "what's 5 + 7? save result as 'a'"} + ], + "session_bytes": None, + "session_metadata": None + }, + config={"configurable": {"thread_id": "123"}}, + ) + second_result = await agent.ainvoke( + {"messages": [{"role": "user", "content": "what's the sine of 'a'?"}]}, + config={"configurable": {"thread_id": "123"}}, + ) + ``` """ name: str = "python_code_sandbox" - + description: str = ( + "A secure Python code sandbox with filesystem support. " + "Use this to execute python commands.\n" + "- Input should be a valid python command.\n" + "- To return output, you should print it out with `print(...)`.\n" + "- Don't use f-strings when printing outputs.\n" + "- If you need to make web requests, use `httpx.AsyncClient`.\n" + "- Files can be read/written using standard Python file operations.\n" + "- All file operations work within a sandboxed memory filesystem.\n" + "- Check for attached files using: import os; print(os.listdir('.'))" + ) + # Mirror the PyodideSandbox constructor arguments stateful: bool = False allow_env: list[str] | bool = False @@ -462,39 +754,13 @@ class PyodideSandboxTool(BaseTool): allow_run: list[str] | bool = False allow_ffi: list[str] | bool = False timeout_seconds: float | None + """Timeout for code execution in seconds. By default set to 60 seconds.""" node_modules_dir: str = "auto" enable_filesystem: bool = False - # CORREÇÃO: Usar PrivateAttr para atributos privados no Pydantic - _sandbox: PyodideSandbox = PrivateAttr() - _sync_sandbox: SyncPyodideSandbox = PrivateAttr() - _structured_tool: StructuredTool | None = PrivateAttr(default=None) - _stateful: bool = PrivateAttr() - _input_schema: type[BaseModel] = PrivateAttr() - - def _build_description(self) -> str: - """Build the complete description string with attached files.""" - base = ( - "A secure Python code sandbox with filesystem support. " - "Use this to execute python commands.\n" - "- Input should be a valid python command.\n" - "- To return output, you should print it out with `print(...)`.\n" - "- Don't use f-strings when printing outputs.\n" - "- If you need to make web requests, use `httpx.AsyncClient`.\n" - "- Files can be read/written using standard Python file operations.\n" - "- All file operations work within a sandboxed memory filesystem.\n" - "- Check for attached files using: import os; print(os.listdir('.'))" - ) - - files = self._sandbox.get_attached_files() - if files: - base += "\n\n🗂️ ATTACHED FILES AVAILABLE:\n" - base += "\n".join(f" • {p}" for p in files) - base += ( - "\nThese files are already loaded and ready to use with pandas, " - "open(), etc." - ) - return base + _sandbox: PyodideSandbox + _sync_sandbox: SyncPyodideSandbox + _structured_tool: StructuredTool | None def __init__( self, @@ -505,8 +771,23 @@ def __init__( enable_filesystem: bool = False, **kwargs: dict[str, Any], ) -> None: - """Initialize the tool.""" - + """Initialize the tool. + + Args: + stateful: Whether to use a stateful sandbox. If True, `sandbox.execute` + will include session metadata and the session bytes containing the + session state (variables, imports, etc.) in the execution result. + This allows saving and reusing the session state between executions. + timeout_seconds: Timeout for code execution in seconds. + enable_filesystem: Enable in-memory filesystem support for attaching files. + allow_net: configure network access. If setting to True, any network access + is allowed, including potentially internal network addresses that you + may not want to expose to a malicious actor. + Depending on your use case, you can restrict the network access to + only the URLs you need (e.g., required to set up micropip / pyodide). + Please refer to pyodide documentation for more details. + **kwargs: Other attributes will be passed to the PyodideSandbox + """ if stateful: try: from langgraph.prebuilt import InjectedState @@ -521,6 +802,8 @@ class PyodideSandboxToolInput(BaseModel): """Python code to execute in the sandbox.""" code: str = Field(description="Code to execute.") + # these fields will be ignored by the LLM + # and automatically injected by LangGraph's ToolNode state: Annotated[dict[str, Any] | BaseModel, InjectedState] tool_call_id: Annotated[str, InjectedToolCallId] @@ -531,136 +814,100 @@ class PyodideSandboxToolInput(BaseModel): code: str = Field(description="Code to execute.") - # Criar os sandboxes - sandbox = PyodideSandbox( + # CORREÇÃO: Seguir exatamente o padrão da main_lib + super().__init__( stateful=stateful, - allow_env=kwargs.get('allow_env', False), - allow_read=kwargs.get('allow_read', False), - allow_write=kwargs.get('allow_write', False), + timeout_seconds=timeout_seconds, allow_net=allow_net, - allow_run=kwargs.get('allow_run', False), - allow_ffi=kwargs.get('allow_ffi', False), - node_modules_dir=kwargs.get('node_modules_dir', 'auto'), enable_filesystem=enable_filesystem, + **kwargs, ) - sync_sandbox = SyncPyodideSandbox( - stateful=stateful, - allow_env=kwargs.get('allow_env', False), - allow_read=kwargs.get('allow_read', False), - allow_write=kwargs.get('allow_write', False), - allow_net=allow_net, - allow_run=kwargs.get('allow_run', False), - allow_ffi=kwargs.get('allow_ffi', False), - node_modules_dir=kwargs.get('node_modules_dir', 'auto'), - enable_filesystem=enable_filesystem, - skip_deno_check=True, + + self.args_schema: type[BaseModel] = PyodideSandboxToolInput + self._structured_tool = None # Initialize as None + self._sandbox = PyodideSandbox( + stateful=self.stateful, + allow_env=self.allow_env, + allow_read=self.allow_read, + allow_write=self.allow_write, + allow_net=self.allow_net, + allow_run=self.allow_run, + allow_ffi=self.allow_ffi, + node_modules_dir=self.node_modules_dir, + enable_filesystem=self.enable_filesystem, + ) + # Initialize sync sandbox with deno check skipped since async sandbox already + # checked + self._sync_sandbox = SyncPyodideSandbox( + stateful=self.stateful, + allow_env=self.allow_env, + allow_read=self.allow_read, + allow_write=self.allow_write, + allow_net=self.allow_net, + allow_run=self.allow_run, + allow_ffi=self.allow_ffi, + node_modules_dir=self.node_modules_dir, + enable_filesystem=self.enable_filesystem, + skip_deno_check=True, # Skip deno check since async sandbox already checked ) - # Definir a descrição inicial - initial_description = ( + def _build_description(self) -> str: + """Build the complete description string with attached files. + + Returns: + Tool description including information about attached files + """ + base = ( "A secure Python code sandbox with filesystem support. " "Use this to execute python commands.\n" "- Input should be a valid python command.\n" "- To return output, you should print it out with `print(...)`.\n" "- Don't use f-strings when printing outputs.\n" "- If you need to make web requests, use `httpx.AsyncClient`.\n" - "- Files can be read/written using standard Python file operations.\n" - ) - - # Chamar super().__init__() com a descrição calculada - super().__init__( - stateful=stateful, - timeout_seconds=timeout_seconds, - allow_net=allow_net, - enable_filesystem=enable_filesystem, - description=initial_description, - args_schema=PyodideSandboxToolInput, - **kwargs, + "- Files can be read/written using standard Python file operations." ) - # IMPORTANTE: Definir atributos privados APÓS super().__init__() - self._sandbox = sandbox - self._sync_sandbox = sync_sandbox - self._stateful = stateful - self._input_schema = PyodideSandboxToolInput - self._structured_tool = None - - def attach_file( - self, - path: str, - content: str, - *, - encoding: str = "utf-8", - ) -> None: - """Attach a file to the sandbox environment.""" - self._sandbox.attach_file(path, content, encoding=encoding) - self._sync_sandbox.attach_file(path, content, encoding=encoding) - # Atualizar descrição em ambas as versões - new_description = self._build_description() - self.description = new_description - if self._structured_tool: - self._structured_tool.description = new_description - - def attach_binary_file( - self, - path: str, - content: bytes, - ) -> None: - """Attach a binary file to the sandbox environment.""" - self._sandbox.attach_binary_file(path, content) - self._sync_sandbox.attach_binary_file(path, content) - # Atualizar descrição em ambas as versões - new_description = self._build_description() - self.description = new_description - if self._structured_tool: - self._structured_tool.description = new_description - - def create_directory(self, path: str) -> None: - """Create a directory in the sandbox environment.""" - self._sandbox.create_directory(path) - self._sync_sandbox.create_directory(path) - # Atualizar descrição em ambas as versões - new_description = self._build_description() - self.description = new_description - if self._structured_tool: - self._structured_tool.description = new_description + files = self._sandbox.get_attached_files() + if files: + base += "\n\nATTACHED FILES AVAILABLE:\n" + base += "\n".join(f" • {p}" for p in files) + base += ( + "\nThese files are already loaded and ready to use with pandas, " + "open(), etc." + ) + return base - def get_attached_files(self) -> list[str]: - """Get list of attached file paths.""" - return self._sandbox.get_attached_files() + def as_structured_tool(self) -> StructuredTool: + """Return a StructuredTool version of this tool. - def clear_filesystem_operations(self) -> None: - """Clear all filesystem operations and update description.""" - self._sandbox.clear_filesystem_operations() - self._sync_sandbox.clear_filesystem_operations() - # Atualizar descrição em ambas as versões - new_description = self._build_description() - self.description = new_description - if self._structured_tool: - self._structured_tool.description = new_description + This method provides access to a StructuredTool interface while maintaining + the BaseTool as the primary interface. The StructuredTool's description + is kept in sync with attached files. - def as_structured_tool(self) -> StructuredTool: - """ - Return a StructuredTool version of this tool. - - This allows users to access the tool as a StructuredTool when needed, - while maintaining the BaseTool interface as the primary one. + Returns: + StructuredTool instance with dynamic description updates """ if self._structured_tool is None: self._structured_tool = StructuredTool.from_function( name=self.name, - description=self.description, - func=self._run_sync if not self._stateful else self._run_stateful_sync, - args_schema=self._input_schema, + description=self._build_description(), + func=( + self._run_sync + if not self.stateful + else self._run_stateful_sync + ), + args_schema=self.args_schema, ) return self._structured_tool @property def tool(self) -> StructuredTool: - """ - Legacy property for backwards compatibility. - + """Legacy property for backwards compatibility. + DEPRECATED: Use as_structured_tool() instead. + + Returns: + StructuredTool instance """ return self.as_structured_tool() @@ -671,23 +918,27 @@ def _run_sync(self, code: str) -> str: ) if result.status == "error": - error_msg = result.stderr if result.stderr else "Execution failed with unknown error" + error_msg = ( + result.stderr + if result.stderr + else "Execution failed with unknown error" + ) return f"Error during execution: {error_msg}" - + if result.stdout: return result.stdout - + if result.result is not None: return str(result.result) - + return "" - + def _run_stateful_sync( self, code: str, state: dict[str, Any] | BaseModel, tool_call_id: str, - ) -> Any: + ) -> Any: # noqa: ANN401 """Synchronous execution function for stateful mode.""" required_keys = {"session_bytes", "session_metadata", "messages"} actual_keys = set(state) if isinstance(state, dict) else set(state.__dict__) @@ -711,7 +962,7 @@ def _run_stateful_sync( session_metadata=session_metadata, timeout_seconds=self.timeout_seconds, ) - + if result.stderr: tool_result = f"Error during execution: {result.stderr}" else: @@ -732,6 +983,94 @@ def _run_stateful_sync( } ) + def attach_file( + self, + path: str, + content: str, + *, + encoding: str = "utf-8", + ) -> None: + """Attach a text file to the sandbox environment. + + This file will be available in the sandbox's in-memory filesystem + when code is executed. The tool's description will be automatically + updated to reflect the attached files. + + Args: + path: File path within the sandbox filesystem + content: Text content of the file + encoding: Text encoding (default: utf-8) + """ + self._sandbox.attach_file(path, content, encoding=encoding) + self._sync_sandbox.attach_file(path, content, encoding=encoding) + # Update both BaseTool and StructuredTool descriptions + new_description = self._build_description() + self.description = new_description + if self._structured_tool: + self._structured_tool.description = new_description + + def attach_binary_file( + self, + path: str, + content: bytes, + ) -> None: + """Attach a binary file to the sandbox environment. + + This file will be available in the sandbox's in-memory filesystem + when code is executed. The tool's description will be automatically + updated to reflect the attached files. + + Args: + path: File path within the sandbox filesystem + content: Binary content of the file + """ + self._sandbox.attach_binary_file(path, content) + self._sync_sandbox.attach_binary_file(path, content) + # Update both BaseTool and StructuredTool descriptions + new_description = self._build_description() + self.description = new_description + if self._structured_tool: + self._structured_tool.description = new_description + + def create_directory(self, path: str) -> None: + """Create a directory in the sandbox environment. + + This directory will be available in the sandbox's in-memory filesystem + when code is executed. + + Args: + path: Directory path within the sandbox filesystem + """ + self._sandbox.create_directory(path) + self._sync_sandbox.create_directory(path) + # Update both BaseTool and StructuredTool descriptions + new_description = self._build_description() + self.description = new_description + if self._structured_tool: + self._structured_tool.description = new_description + + def get_attached_files(self) -> list[str]: + """Get list of attached file paths. + + Returns: + List of file paths that will be available in the sandbox filesystem + """ + return self._sandbox.get_attached_files() + + def clear_filesystem_operations(self) -> None: + """Clear all attached files and directories. + + This removes all files and directories that were queued to be created + in the sandbox filesystem and updates the tool description. + """ + self._sandbox.clear_filesystem_operations() + self._sync_sandbox.clear_filesystem_operations() + # Update both BaseTool and StructuredTool descriptions + new_description = self._build_description() + self.description = new_description + if self._structured_tool: + self._structured_tool.description = new_description + def _run( self, code: str, @@ -739,12 +1078,60 @@ def _run( tool_call_id: str | None = None, config: RunnableConfig | None = None, run_manager: CallbackManagerForToolRun | None = None, - ) -> Any: - """Use the tool synchronously (BaseTool interface).""" + ) -> Any: # noqa: ANN401 + """Use the tool synchronously.""" if self.stateful: - return self._run_stateful_sync(code, state, tool_call_id) + required_keys = {"session_bytes", "session_metadata", "messages"} + actual_keys = set(state) if isinstance(state, dict) else set(state.__dict__) + if missing_keys := required_keys - actual_keys: + error_msg = ( + "Input state is missing " + f"the following required keys: {missing_keys}" + ) + raise ValueError(error_msg) + + if isinstance(state, dict): + session_bytes = state["session_bytes"] + session_metadata = state["session_metadata"] + else: + session_bytes = state.session_bytes + session_metadata = state.session_metadata + + result = self._sync_sandbox.execute( + code, + session_bytes=session_bytes, + session_metadata=session_metadata, + timeout_seconds=self.timeout_seconds, + ) + else: + result = self._sync_sandbox.execute( + code, timeout_seconds=self.timeout_seconds + ) + + if result.stderr: + tool_result = f"Error during execution: {result.stderr}" else: - return self._run_sync(code) + tool_result = result.stdout + + if self.stateful: + from langgraph.types import Command + + # if the tool is used with a stateful sandbox, + # we need to update the graph state with the new session bytes and metadata + return Command( + update={ + "session_bytes": result.session_bytes, + "session_metadata": result.session_metadata, + "messages": [ + ToolMessage( + content=tool_result, + tool_call_id=tool_call_id, + ) + ], + } + ) + + return tool_result async def _arun( self, @@ -753,8 +1140,8 @@ async def _arun( tool_call_id: str | None = None, config: RunnableConfig | None = None, run_manager: AsyncCallbackManagerForToolRun | None = None, - ) -> Any: - """Use the tool asynchronously (BaseTool interface).""" + ) -> Any: # noqa: ANN401 + """Use the tool asynchronously.""" if self.stateful: required_keys = {"session_bytes", "session_metadata", "messages"} actual_keys = set(state) if isinstance(state, dict) else set(state.__dict__) @@ -778,14 +1165,21 @@ async def _arun( session_metadata=session_metadata, timeout_seconds=self.timeout_seconds, ) - - if result.stderr: - tool_result = f"Error during execution: {result.stderr}" - else: - tool_result = result.stdout + else: + result = await self._sandbox.execute( + code, timeout_seconds=self.timeout_seconds + ) + + if result.stderr: + tool_result = f"Error during execution: {result.stderr}" + else: + tool_result = result.stdout + if self.stateful: from langgraph.types import Command + # if the tool is used with a stateful sandbox, + # we need to update the graph state with the new session bytes and metadata return Command( update={ "session_bytes": result.session_bytes, @@ -798,68 +1192,123 @@ async def _arun( ], } ) - else: - result = await self._sandbox.execute( - code, timeout_seconds=self.timeout_seconds - ) - if result.status == "error": - error_msg = result.stderr if result.stderr else "Execution failed with unknown error" - return f"Error during execution: {error_msg}" - - if result.stdout: - return result.stdout - - if result.result is not None: - return str(result.result) - - return "" + return tool_result -# ============================================================================= -# WRAPPER ALTERNATIVO - Para manter compatibilidade com código existente -# ============================================================================= +class PyodideSandboxStructuredTool: + r"""Pure StructuredTool wrapper for PyodideSandbox with dynamic description updates. -class PyodideSandboxDynamicTool: - """ - Pure StructuredTool wrapper for PyodideSandbox (legacy compatibility). - - DEPRECATED: Use PyodideSandboxTool instead. - """ - - def __init__(self, **kwargs): - """Initialize the wrapper - prefer PyodideSandboxTool instead.""" - logger.warning( - "PyodideSandboxDynamicTool is deprecated. " - "Use PyodideSandboxTool instead." + This class provides a standalone StructuredTool interface for users who prefer + to work exclusively with StructuredTool rather than BaseTool. It maintains all + the filesystem functionality and dynamic description updates. + + Example usage: + ```python + from langchain_sandbox import PyodideSandboxStructuredTool + from langgraph.prebuilt import create_react_agent + from langchain_openai import ChatOpenAI + + # Create tool + sandbox_tool = PyodideSandboxStructuredTool( + enable_filesystem=True, + allow_net=True, ) + + # Attach files + sandbox_tool.attach_file("data.csv", "name,age\\nJohn,25") + + # Use in agent + agent = create_react_agent(llm, [sandbox_tool.tool]) + ``` + """ + + def __init__(self, **kwargs: Any) -> None: # noqa: ANN401 + """Initialize the StructuredTool wrapper. + + Args: + **kwargs: All arguments are passed to PyodideSandboxTool + """ self._base_tool = PyodideSandboxTool(**kwargs) - self.tool = self._base_tool.as_structured_tool() + # Force creation of the StructuredTool + self._tool = self._base_tool.as_structured_tool() + + @property + def tool(self) -> StructuredTool: + """Access to the underlying StructuredTool. + + Returns: + StructuredTool instance with current description + """ + return self._tool + + def attach_file( + self, + path: str, + content: str, + *, + encoding: str = "utf-8", + ) -> None: + """Attach a text file to the sandbox environment. - def attach_file(self, path: str, content: str, *, encoding: str = "utf-8") -> None: - """Attach a file to the sandbox environment.""" + Args: + path: File path within the sandbox filesystem + content: Text content of the file + encoding: Text encoding (default: utf-8) + """ self._base_tool.attach_file(path, content, encoding=encoding) - def attach_binary_file(self, path: str, content: bytes) -> None: - """Attach a binary file to the sandbox environment.""" + def attach_binary_file( + self, + path: str, + content: bytes, + ) -> None: + """Attach a binary file to the sandbox environment. + + Args: + path: File path within the sandbox filesystem + content: Binary content of the file + """ self._base_tool.attach_binary_file(path, content) def create_directory(self, path: str) -> None: - """Create a directory in the sandbox environment.""" + """Create a directory in the sandbox environment. + + Args: + path: Directory path within the sandbox filesystem + """ self._base_tool.create_directory(path) def get_attached_files(self) -> list[str]: - """Get list of attached file paths.""" + """Get list of attached file paths. + + Returns: + List of file paths that will be available in the sandbox filesystem + """ return self._base_tool.get_attached_files() def clear_filesystem_operations(self) -> None: - """Clear all filesystem operations and update description.""" + """Clear all attached files and directories.""" self._base_tool.clear_filesystem_operations() def invoke(self, input_data: dict[str, Any]) -> str: - """Direct invoke method for easier usage.""" + """Direct invoke method for easier usage. + + Args: + input_data: Input data containing 'code' key + + Returns: + Execution result as string + """ return self.tool.invoke(input_data) async def ainvoke(self, input_data: dict[str, Any]) -> str: - """Async direct invoke method for easier usage.""" - return await self.tool.ainvoke(input_data) \ No newline at end of file + """Async direct invoke method for easier usage. + + Args: + input_data: Input data containing 'code' key + + Returns: + Execution result as string + """ + return await self.tool.ainvoke(input_data) diff --git a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py index b069a31..82bb427 100644 --- a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py +++ b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py @@ -21,7 +21,7 @@ def pyodide_package(monkeypatch: pytest.MonkeyPatch) -> None: @pytest.fixture -def mock_csv_data(): +def mock_csv_data() -> str: """Sample sales data for testing.""" return """date,product_id,category,quantity,price,customer_id,region 2024-01-15,P001,Electronics,2,499.99,C123,North @@ -35,8 +35,6 @@ def get_default_sandbox(stateful: bool = False) -> PyodideSandbox: """Get default PyodideSandbox instance for testing.""" return PyodideSandbox( stateful=stateful, - allow_read=True, - allow_write=True, allow_net=True, allow_env=False, allow_run=False, @@ -48,8 +46,6 @@ def get_default_sync_sandbox(stateful: bool = False) -> SyncPyodideSandbox: """Get default SyncPyodideSandbox instance for testing.""" return SyncPyodideSandbox( stateful=stateful, - allow_read=True, - allow_write=True, allow_net=True, allow_env=False, allow_run=False, @@ -176,48 +172,18 @@ def test_sync_pyodide_sandbox_timeout(pyodide_package: None) -> None: assert "timed out" in result.stderr.lower() -def test_pyodide_sandbox_tool() -> None: - """Test synchronous invocation of PyodideSandboxTool.""" - tool = PyodideSandboxTool(stateful=False, allow_net=True, allow_read=True, allow_write=True) - result = tool.invoke("x = 5; print(x)") - assert result == "5" - result = tool.invoke("x = 5; print(1); print(2)") - assert result == "1\n2" - - -def test_pyodide_timeout() -> None: - """Test synchronous invocation of PyodideSandboxTool with timeout.""" - tool = PyodideSandboxTool(stateful=False, timeout_seconds=0.1, allow_net=True, allow_read=True, allow_write=True) - result = tool.invoke("while True: pass") - assert result == "Error during execution: Execution timed out after 0.1 seconds" - - -async def test_async_pyodide_sandbox_tool() -> None: - """Test synchronous invocation of PyodideSandboxTool.""" - tool = PyodideSandboxTool(stateful=False, allow_net=True, allow_read=True, allow_write=True) - result = await tool.ainvoke("x = 5; print(x)") - assert result == "5" - result = await tool.ainvoke("x = 5; print(1); print(2)") - # TODO: Need to preserve newlines in the output # noqa: FIX002, TD002 - # https://github.com/langchain-ai/langchain-sandbox/issues/26 - assert result == "1\n2" - - -async def test_async_pyodide_timeout() -> None: - """Test synchronous invocation of PyodideSandboxTool with timeout.""" - tool = PyodideSandboxTool(stateful=False, timeout_seconds=0.1, allow_net=True, allow_read=True, allow_write=True) - result = await tool.ainvoke("while True: pass") - assert result == "Error during execution: Execution timed out after 0.1 seconds" - -async def test_filesystem_basic_operations(): +async def test_filesystem_basic_operations() -> None: """Test basic filesystem operations.""" - sandbox = PyodideSandbox(enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True) - + sandbox = PyodideSandbox( + enable_filesystem=True, + allow_net=True, + ) + # Attach files sandbox.attach_file("test.txt", "Hello, World!") sandbox.attach_file("data.json", '{"key": "value"}') sandbox.create_directory("output") - + code = """ import os import json @@ -239,7 +205,7 @@ async def test_filesystem_basic_operations(): print(f"Text: {txt_content}") print(f"JSON key: {json_data['key']}") -print(f"Root files: {root_files}") +print(f"Root files: {root_files}") print(f"Output files: {output_files}") # Read the created file to verify it was written @@ -247,23 +213,25 @@ async def test_filesystem_basic_operations(): created_content = f.read() print(f"Created file content: {created_content}") """ - + result = await sandbox.execute(code) - print(f"DEBUG - stdout: {repr(result.stdout)}") # Para ver as quebras de linha assert result.status == "success" assert "Hello, World!" in result.stdout assert "value" in result.stdout assert "Processing complete!" in result.stdout -def test_filesystem_tool_usage(): +def test_filesystem_tool_usage() -> None: """Test filesystem with PyodideSandboxTool.""" - tool = PyodideSandboxTool(enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True) - + tool = PyodideSandboxTool( + enable_filesystem=True, + allow_net=True, + ) + # Attach CSV data csv_data = "name,age\nAlice,30\nBob,25" tool.attach_file("users.csv", csv_data) - + code = """ import csv @@ -276,20 +244,23 @@ def test_filesystem_tool_usage(): for user in users: print(f"{user['name']} is {user['age']} years old") """ - + result = tool.invoke(code) assert "Alice is 30 years old" in result assert "Bob is 25 years old" in result -async def test_binary_file_operations(): +async def test_binary_file_operations() -> None: """Test binary file operations.""" - sandbox = PyodideSandbox(enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True) - + sandbox = PyodideSandbox( + enable_filesystem=True, + allow_net=True, + ) + # Create some binary data - binary_data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01' + binary_data = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01" sandbox.attach_binary_file("image.png", binary_data) - + code = """ import base64 @@ -305,9 +276,9 @@ async def test_binary_file_operations(): print(f"Size: {size} bytes") print(f"Original size: {len(data)}") # Debug """ - + result = await sandbox.execute(code) assert result.status == "success" assert "Is PNG: True" in result.stdout - # Ajustar para o tamanho real ou verificar se é >= 16 - assert f"Size: {len(binary_data)} bytes" in result.stdout \ No newline at end of file + # Verify the size matches the binary data size + assert f"Size: {len(binary_data)} bytes" in result.stdout From 867b6745b572b938dfea631ee3b4e4517a691906 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Wed, 28 May 2025 13:12:23 -0300 Subject: [PATCH 09/27] fix: typescript conventions --- libs/pyodide-sandbox-js/main.ts | 50 ++++++++++++++----- libs/pyodide-sandbox-js/main_test.ts | 72 +--------------------------- 2 files changed, 40 insertions(+), 82 deletions(-) diff --git a/libs/pyodide-sandbox-js/main.ts b/libs/pyodide-sandbox-js/main.ts index d5d7a8d..56f1b2a 100644 --- a/libs/pyodide-sandbox-js/main.ts +++ b/libs/pyodide-sandbox-js/main.ts @@ -298,17 +298,23 @@ interface FileSystemOperation { destination?: string; } - +/** + * Resolves a relative path within the sandbox environment. + * + * @param inputPath - The input path to resolve + * @param mountPoint - The sandbox mount point (default: "/sandbox") + * @returns The resolved absolute path within the sandbox + */ function resolvePathInSandbox( inputPath: string, mountPoint: string = "/sandbox" ): string { - // Se já é absoluto, retorna como está + // If already absolute, return as is if (inputPath.startsWith("/")) { return inputPath; } - // Resolve direto no mount point + // Resolve directly in mount point if (inputPath.startsWith("./")) { const cleanPath = inputPath.substring(2); return `${mountPoint}/${cleanPath}`.replace(/\/+/g, "/"); @@ -320,7 +326,7 @@ function resolvePathInSandbox( } /** - * Setup memory filesystem environment in Python + * Setup memory filesystem environment in Python. */ function setupFileSystem(pyodide: any): void { const mountPoint = "/sandbox"; @@ -506,6 +512,11 @@ async function runPython( } let sessionData: Uint8Array | null = null; + if (options.sessionBytes && !options.sessionMetadata) { + console.error("sessionMetadata is required when providing sessionBytes"); + return { success: false, error: "sessionMetadata is required when providing sessionBytes" }; + } + // Import prepared environment module const prepare_env = pyodide.pyimport("prepare_env"); @@ -515,7 +526,7 @@ async function runPython( fileSystemResults = await performFileSystemOperations(pyodide, options.fileSystemOperations, fsOptions); } - // Prepare packages to install + // Prepare packages to install (include dill) const defaultPackages = options.stateful ? ["dill"] : []; const additionalPackagesToInstall = options.sessionBytes ? [...new Set([...defaultPackages, ...sessionMetadata.packages])] @@ -534,6 +545,7 @@ async function runPython( ); if (installErrors.length > 0) { + // Restore the original console.log function console.log = originalLog; return { success: false, @@ -547,28 +559,32 @@ async function runPython( if (options.sessionBytes) { sessionData = Uint8Array.from(JSON.parse(options.sessionBytes)); + // Run session preamble await prepare_env.load_session_bytes(sessionData); } const packages = installedPackages.map((pkg: any) => pkg.get("package")); + // Restore the original console.log function console.log = originalLog; - // Execute Python code + // Run the Python code const rawValue = await pyodide.runPythonAsync(pythonCode); + // Dump result to string const jsonValue = await prepare_env.dumps(rawValue); - // Update session metadata + // Update session metadata with installed packages sessionMetadata.packages = [ ...new Set([...sessionMetadata.packages, ...packages]), ]; sessionMetadata.lastModified = new Date().toISOString(); if (options.stateful) { + // Save session state to sessionBytes sessionData = await prepare_env.dump_session_bytes() as Uint8Array; } - // Build result + // Return the result with stdout and stderr output const result: PyodideResult = { success: true, result: rawValue, @@ -658,14 +674,18 @@ OPTIONS: }; if (!options.code && !options.file) { - console.error("Error: You must provide Python code using either -c/--code or -f/--file option."); + console.error( + "Error: You must provide Python code using either -c/--code or -f/--file option.\nUse --help for usage information." + ); Deno.exit(1); } + // Get Python code from file or command line argument let pythonCode = ""; if (options.file) { try { + // Resolve relative or absolute file path const filePath = options.file.startsWith("/") ? options.file : join(Deno.cwd(), options.file); @@ -676,6 +696,7 @@ OPTIONS: Deno.exit(1); } } else { + // Process code from command line (replacing escaped newlines) pythonCode = options.code?.replace(/\\n/g, "\n") ?? ""; } @@ -705,9 +726,9 @@ OPTIONS: const result = await runPython(pythonCode, runOptions); - // Output result + // Create output JSON with stdout, stderr, and result const outputJson: any = { - stdout: result.stdout?.join('\n') || null, // <-- ADICIONAR '\n' + stdout: result.stdout?.join('\n') || null, stderr: result.success ? (result.stderr?.join('\n') || null) : result.error || null, result: result.success ? JSON.parse(result.jsonResult || 'null') : null, success: result.success, @@ -723,18 +744,23 @@ OPTIONS: outputJson.fileSystemOperations = result.fileSystemOperations; } + // Output as JSON to stdout console.log(JSON.stringify(outputJson)); + // Exit with error code if Python execution failed if (!result.success) { Deno.exit(1); } } +// If this module is run directly if (import.meta.main) { + // Override the global environment variables that Deno's permission prompts look for + // to suppress color-related permission prompts main().catch((err) => { console.error("Unhandled error:", err); Deno.exit(1); }); } -export { runPython, resolvePathInSandbox, type FileSystemOperation, type FileSystemOptions }; \ No newline at end of file +export { runPython, resolvePathInSandbox, type FileSystemOperation, type FileSystemOptions }; diff --git a/libs/pyodide-sandbox-js/main_test.ts b/libs/pyodide-sandbox-js/main_test.ts index 4a53cb8..36a0dc1 100644 --- a/libs/pyodide-sandbox-js/main_test.ts +++ b/libs/pyodide-sandbox-js/main_test.ts @@ -1,4 +1,4 @@ -import { assertEquals, assertNotEquals, assertExists } from "@std/assert"; +import { assertEquals, assertNotEquals } from "@std/assert"; import { runPython, resolvePathInSandbox, type FileSystemOperation } from "./main.ts"; Deno.test("runPython simple test", async () => { @@ -36,9 +36,7 @@ Deno.test("resolvePathInSandbox - basic resolution", () => { assertEquals(resolvePathInSandbox("/tmp/absolute.txt"), "/tmp/absolute.txt"); }); -// REMOVIDO: teste "resolvePathInSandbox - with working directory" pois working directory foi removido - -Deno.test("FileSystem - basic operations", async () => { +Deno.test("FileSystem - operations", async () => { const operations: FileSystemOperation[] = [ { operation: "write", @@ -95,71 +93,6 @@ result assertEquals(resultObj.working_dir, "/sandbox"); }); -// REMOVIDO: teste "FileSystem - working directory" pois working directory foi removido - -Deno.test("FileSystem - complex workflow", async () => { - const operations: FileSystemOperation[] = [ - { - operation: "mkdir", - path: "workspace", - }, - { - operation: "write", - path: "workspace/input.txt", - content: "oldvalue=100\nother line", - }, - { - operation: "write", - path: "workspace/config.ini", - content: "[database]\nhost=localhost\nport=5432", - } - ]; - - const result = await runPython(` -import os -import configparser - -# Modify input file -with open("workspace/input.txt", "r") as f: - content = f.read() - -modified_content = content.replace("oldvalue=100", "newvalue=200") - -with open("workspace/input.txt", "w") as f: - f.write(modified_content) - -# Read config -config = configparser.ConfigParser() -config.read("workspace/config.ini") - -# Create report -with open("workspace/report.txt", "w") as f: - f.write(f"Host: {config['database']['host']}\\n") - f.write("Modification successful\\n") - -workspace_files = os.listdir("workspace") - -result = { - "modification_success": "newvalue=200" in modified_content, - "db_host": config['database']['host'], - "workspace_files": sorted(workspace_files), - "working_dir": os.getcwd() -} - -result - `, { - fileSystemOperations: operations - }); - - assertEquals(result.success, true); - const resultObj = JSON.parse(result.jsonResult || "null"); - - assertEquals(resultObj.modification_success, true); - assertEquals(resultObj.db_host, "localhost"); - assertEquals(resultObj.workspace_files, ["config.ini", "input.txt", "report.txt"]); - assertEquals(resultObj.working_dir, "/sandbox"); -}); - Deno.test("FileSystem - binary operations", async () => { const operations: FileSystemOperation[] = [ { @@ -204,7 +137,6 @@ result assertEquals(resultObj.working_dir, "/sandbox"); }); -// NOVO: Teste adicional para verificar o memfs funcionando com diferentes estruturas de diretórios Deno.test("FileSystem - memfs directory structure", async () => { const operations: FileSystemOperation[] = [ { From 0bac09aff48724130b4542c63d3ea893c2077dff Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Wed, 28 May 2025 13:23:20 -0300 Subject: [PATCH 10/27] fix: rollback some tests --- libs/sandbox-py/langchain_sandbox/pyodide.py | 3 +- .../tests/unit_tests/test_pyodide_sandbox.py | 43 +++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index 99d7f3a..20eff4a 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -86,7 +86,7 @@ def to_dict(self) -> dict[str, str]: # Published package name PKG_NAME = "jsr:@langchain/pyodide-sandbox@0.0.4" - +#PKG_NAME = "../pyodide-sandbox-js/main.ts" # noqa: ERA001 def build_permission_flag( flag: str, @@ -814,7 +814,6 @@ class PyodideSandboxToolInput(BaseModel): code: str = Field(description="Code to execute.") - # CORREÇÃO: Seguir exatamente o padrão da main_lib super().__init__( stateful=stateful, timeout_seconds=timeout_seconds, diff --git a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py index 82bb427..4ba6c60 100644 --- a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py +++ b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py @@ -52,6 +52,49 @@ def get_default_sync_sandbox(stateful: bool = False) -> SyncPyodideSandbox: allow_ffi=False, ) +def test_pyodide_sandbox_tool() -> None: + """Test synchronous invocation of PyodideSandboxTool.""" + tool = PyodideSandboxTool( + enable_filesystem=True, + allow_net=True, + ) + result = tool.invoke("x = 5; print(x)") + assert result == "5" + result = tool.invoke("x = 5; print(1); print(2)") + assert result == "1\n2" + + +def test_pyodide_timeout() -> None: + """Test synchronous invocation of PyodideSandboxTool with timeout.""" + tool = PyodideSandboxTool( + enable_filesystem=True, + allow_net=True, + ) + result = tool.invoke("while True: pass") + assert result == "Error during execution: Execution timed out after 0.1 seconds" + + +async def test_async_pyodide_sandbox_tool() -> None: + """Test synchronous invocation of PyodideSandboxTool.""" + tool = PyodideSandboxTool( + enable_filesystem=True, + allow_net=True, + ) + result = await tool.ainvoke("x = 5; print(x)") + assert result == "5" + result = await tool.ainvoke("x = 5; print(1); print(2)") + assert result == "1\n2" + + +async def test_async_pyodide_timeout() -> None: + """Test synchronous invocation of PyodideSandboxTool with timeout.""" + tool = PyodideSandboxTool( + enable_filesystem=True, + allow_net=True, + ) + result = await tool.ainvoke("while True: pass") + assert result == "Error during execution: Execution timed out after 0.1 seconds" + async def test_stdout_sessionless(pyodide_package: None) -> None: """Test without a session ID.""" From 251111991928becfade00ad793c6f880b5f66982 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Wed, 28 May 2025 13:25:46 -0300 Subject: [PATCH 11/27] fix: permissions --- libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py index 4ba6c60..9a96a07 100644 --- a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py +++ b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py @@ -35,6 +35,8 @@ def get_default_sandbox(stateful: bool = False) -> PyodideSandbox: """Get default PyodideSandbox instance for testing.""" return PyodideSandbox( stateful=stateful, + allow_read=True, + allow_write=True, allow_net=True, allow_env=False, allow_run=False, @@ -46,6 +48,8 @@ def get_default_sync_sandbox(stateful: bool = False) -> SyncPyodideSandbox: """Get default SyncPyodideSandbox instance for testing.""" return SyncPyodideSandbox( stateful=stateful, + allow_read=True, + allow_write=True, allow_net=True, allow_env=False, allow_run=False, From 6e9c10432d6df358887f9b4301e0f2f6d38edca4 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Wed, 28 May 2025 13:34:08 -0300 Subject: [PATCH 12/27] feat: README changes --- README.md | 59 +++++++++++++++++++++++++++++++++--- libs/sandbox-py/README.md | 64 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 115 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e72e9d1..55c4a2a 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,6 @@ LangChain Sandbox provides a secure environment for executing untrusted Python c ## Limitations - **Latency**: There is a few seconds of latency when starting the sandbox per run -- **File access**: Currently not supported. You will not be able to access the files written by the sandbox. - **Network requests**: If you need to make network requests please use `httpx.AsyncClient` instead of `requests`. ## 🚀 Quick Install @@ -141,6 +140,59 @@ result = await agent.ainvoke( ) ``` +### File System Support + +You can now attach files to the sandbox environment and perform data analysis: + +```python +import asyncio + +from langchain_sandbox import PyodideSandboxTool +from langgraph.prebuilt import create_react_agent + +# Define the sandbox tool with filesystem support +sandbox_tool = PyodideSandboxTool( + enable_filesystem=True, + allow_net=True, +) + +sales_data = """date,product,category,quantity,price,region +2024-01-15,Laptop,Electronics,2,1299.99,North +2024-01-16,Chair,Furniture,1,249.50,South +2024-01-16,T-shirt,Clothing,5,29.99,East +2024-01-17,Laptop,Electronics,1,1299.99,West +2024-01-18,Phone,Electronics,3,799.99,North +2024-01-19,Desk,Furniture,2,399.99,South +2024-01-20,Jeans,Clothing,4,79.99,East +2024-01-21,Tablet,Electronics,2,499.99,West +2024-01-22,Sofa,Furniture,1,899.99,North +2024-01-23,Shoes,Clothing,3,129.99,South""" + +sandbox_tool.attach_file("sales.csv", sales_data) + +# Create an agent with the sandbox tool +agent = create_react_agent( + "anthropic:claude-3-7-sonnet-latest", [sandbox_tool] +) + +query = """Please analyze the sales data and tell me: +1. What is the total revenue by category? +2. Which region has the highest sales? +3. What are the top 3 best-selling products by revenue? + +Use pandas to read the CSV file and perform the analysis.""" + +async def run_agent(query: str): + # Stream agent outputs + async for chunk in agent.astream({"messages": query}): + print(chunk) + print("\n") + +if __name__ == "__main__": + # Run the agent + asyncio.run(run_agent(query)) +``` + #### Stateful Tool > [!important] @@ -192,16 +244,15 @@ second_result = await agent.ainvoke( ) ``` - - See full examples here: * [ReAct agent](examples/react_agent.py) * [CodeAct agent](examples/codeact_agent.py) +* [ReAct agent with csv](examples/react_agent_with_csv.py) ## 🧩 Components The sandbox consists of two main components: - **`pyodide-sandbox-js`**: JavaScript/TypeScript module using Deno to provide the core sandboxing functionality. -- **`sandbox-py`**: Contains `PyodideSandbox` which just wraps the JavaScript/TypeScript module and executes it as a subprocess. +- **`sandbox-py`**: Contains `PyodideSandbox` which just wraps the JavaScript/TypeScript module and executes it as a subprocess. \ No newline at end of file diff --git a/libs/sandbox-py/README.md b/libs/sandbox-py/README.md index e37ba58..55c4a2a 100644 --- a/libs/sandbox-py/README.md +++ b/libs/sandbox-py/README.md @@ -19,7 +19,6 @@ LangChain Sandbox provides a secure environment for executing untrusted Python c ## Limitations - **Latency**: There is a few seconds of latency when starting the sandbox per run -- **File access**: Currently not supported. You will not be able to access the files written by the sandbox. - **Network requests**: If you need to make network requests please use `httpx.AsyncClient` instead of `requests`. ## 🚀 Quick Install @@ -34,6 +33,11 @@ LangChain Sandbox provides a secure environment for executing untrusted Python c ## 💡 Example Usage + +> [!warning] +> Use `alllow_net` to limit the network requests that can be made by the sandboxed code to avoid SSRF attacks +> https://docs.deno.com/runtime/fundamentals/security/#network-access + ```python from langchain_sandbox import PyodideSandbox @@ -136,6 +140,59 @@ result = await agent.ainvoke( ) ``` +### File System Support + +You can now attach files to the sandbox environment and perform data analysis: + +```python +import asyncio + +from langchain_sandbox import PyodideSandboxTool +from langgraph.prebuilt import create_react_agent + +# Define the sandbox tool with filesystem support +sandbox_tool = PyodideSandboxTool( + enable_filesystem=True, + allow_net=True, +) + +sales_data = """date,product,category,quantity,price,region +2024-01-15,Laptop,Electronics,2,1299.99,North +2024-01-16,Chair,Furniture,1,249.50,South +2024-01-16,T-shirt,Clothing,5,29.99,East +2024-01-17,Laptop,Electronics,1,1299.99,West +2024-01-18,Phone,Electronics,3,799.99,North +2024-01-19,Desk,Furniture,2,399.99,South +2024-01-20,Jeans,Clothing,4,79.99,East +2024-01-21,Tablet,Electronics,2,499.99,West +2024-01-22,Sofa,Furniture,1,899.99,North +2024-01-23,Shoes,Clothing,3,129.99,South""" + +sandbox_tool.attach_file("sales.csv", sales_data) + +# Create an agent with the sandbox tool +agent = create_react_agent( + "anthropic:claude-3-7-sonnet-latest", [sandbox_tool] +) + +query = """Please analyze the sales data and tell me: +1. What is the total revenue by category? +2. Which region has the highest sales? +3. What are the top 3 best-selling products by revenue? + +Use pandas to read the CSV file and perform the analysis.""" + +async def run_agent(query: str): + # Stream agent outputs + async for chunk in agent.astream({"messages": query}): + print(chunk) + print("\n") + +if __name__ == "__main__": + # Run the agent + asyncio.run(run_agent(query)) +``` + #### Stateful Tool > [!important] @@ -187,16 +244,15 @@ second_result = await agent.ainvoke( ) ``` - - See full examples here: * [ReAct agent](examples/react_agent.py) * [CodeAct agent](examples/codeact_agent.py) +* [ReAct agent with csv](examples/react_agent_with_csv.py) ## 🧩 Components The sandbox consists of two main components: - **`pyodide-sandbox-js`**: JavaScript/TypeScript module using Deno to provide the core sandboxing functionality. -- **`sandbox-py`**: Contains `PyodideSandbox` which just wraps the JavaScript/TypeScript module and executes it as a subprocess. +- **`sandbox-py`**: Contains `PyodideSandbox` which just wraps the JavaScript/TypeScript module and executes it as a subprocess. \ No newline at end of file From 13f30571fed189346edaadc5838888f7413a0492 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Wed, 28 May 2025 14:56:58 -0300 Subject: [PATCH 13/27] fix: unified PyodideSandboxTool with complete backward compatibility --- libs/sandbox-py/langchain_sandbox/pyodide.py | 212 +++++++++--------- .../tests/unit_tests/test_pyodide_sandbox.py | 32 +-- 2 files changed, 119 insertions(+), 125 deletions(-) diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index 20eff4a..08e6541 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -86,7 +86,8 @@ def to_dict(self) -> dict[str, str]: # Published package name PKG_NAME = "jsr:@langchain/pyodide-sandbox@0.0.4" -#PKG_NAME = "../pyodide-sandbox-js/main.ts" # noqa: ERA001 +# PKG_NAME = "../pyodide-sandbox-js/main.ts" # noqa: ERA001 + def build_permission_flag( flag: str, @@ -663,10 +664,6 @@ def execute( class PyodideSandboxTool(BaseTool): r"""Tool for running python code in a PyodideSandbox. - This tool extends the base PyodideSandbox functionality with support for - attaching files and creating an in-memory filesystem. Files attached to - the tool will be available within the Python execution environment. - If you use a stateful sandbox (PyodideSandboxTool(stateful=True)), the state between code executions (to variables, imports, and definitions, etc.), will be persisted using LangGraph checkpointer. @@ -822,6 +819,14 @@ class PyodideSandboxToolInput(BaseModel): **kwargs, ) + # Store initialization parameters + self.allow_env = kwargs.get("allow_env", False) + self.allow_read = kwargs.get("allow_read", False) + self.allow_write = kwargs.get("allow_write", False) + self.allow_run = kwargs.get("allow_run", False) + self.allow_ffi = kwargs.get("allow_ffi", False) + self.node_modules_dir = kwargs.get("node_modules_dir", "auto") + self.args_schema: type[BaseModel] = PyodideSandboxToolInput self._structured_tool = None # Initialize as None self._sandbox = PyodideSandbox( @@ -876,6 +881,94 @@ def _build_description(self) -> str: ) return base + def attach_file( + self, + path: str, + content: str, + *, + encoding: str = "utf-8", + ) -> None: + """Attach a text file to the sandbox environment. + + This file will be available in the sandbox's in-memory filesystem + when code is executed. The tool's description will be automatically + updated to reflect the attached files. + + Args: + path: File path within the sandbox filesystem + content: Text content of the file + encoding: Text encoding (default: utf-8) + """ + self._sandbox.attach_file(path, content, encoding=encoding) + self._sync_sandbox.attach_file(path, content, encoding=encoding) + # Update both BaseTool and StructuredTool descriptions + new_description = self._build_description() + self.description = new_description + if self._structured_tool: + self._structured_tool.description = new_description + + def attach_binary_file( + self, + path: str, + content: bytes, + ) -> None: + """Attach a binary file to the sandbox environment. + + This file will be available in the sandbox's in-memory filesystem + when code is executed. The tool's description will be automatically + updated to reflect the attached files. + + Args: + path: File path within the sandbox filesystem + content: Binary content of the file + """ + self._sandbox.attach_binary_file(path, content) + self._sync_sandbox.attach_binary_file(path, content) + # Update both BaseTool and StructuredTool descriptions + new_description = self._build_description() + self.description = new_description + if self._structured_tool: + self._structured_tool.description = new_description + + def create_directory(self, path: str) -> None: + """Create a directory in the sandbox environment. + + This directory will be available in the sandbox's in-memory filesystem + when code is executed. + + Args: + path: Directory path within the sandbox filesystem + """ + self._sandbox.create_directory(path) + self._sync_sandbox.create_directory(path) + # Update both BaseTool and StructuredTool descriptions + new_description = self._build_description() + self.description = new_description + if self._structured_tool: + self._structured_tool.description = new_description + + def get_attached_files(self) -> list[str]: + """Get list of attached file paths. + + Returns: + List of file paths that will be available in the sandbox filesystem + """ + return self._sandbox.get_attached_files() + + def clear_filesystem_operations(self) -> None: + """Clear all attached files and directories. + + This removes all files and directories that were queued to be created + in the sandbox filesystem and updates the tool description. + """ + self._sandbox.clear_filesystem_operations() + self._sync_sandbox.clear_filesystem_operations() + # Update both BaseTool and StructuredTool descriptions + new_description = self._build_description() + self.description = new_description + if self._structured_tool: + self._structured_tool.description = new_description + def as_structured_tool(self) -> StructuredTool: """Return a StructuredTool version of this tool. @@ -899,17 +992,6 @@ def as_structured_tool(self) -> StructuredTool: ) return self._structured_tool - @property - def tool(self) -> StructuredTool: - """Legacy property for backwards compatibility. - - DEPRECATED: Use as_structured_tool() instead. - - Returns: - StructuredTool instance - """ - return self.as_structured_tool() - def _run_sync(self, code: str) -> str: """Synchronous execution function for non-stateful mode.""" result = self._sync_sandbox.execute( @@ -982,94 +1064,6 @@ def _run_stateful_sync( } ) - def attach_file( - self, - path: str, - content: str, - *, - encoding: str = "utf-8", - ) -> None: - """Attach a text file to the sandbox environment. - - This file will be available in the sandbox's in-memory filesystem - when code is executed. The tool's description will be automatically - updated to reflect the attached files. - - Args: - path: File path within the sandbox filesystem - content: Text content of the file - encoding: Text encoding (default: utf-8) - """ - self._sandbox.attach_file(path, content, encoding=encoding) - self._sync_sandbox.attach_file(path, content, encoding=encoding) - # Update both BaseTool and StructuredTool descriptions - new_description = self._build_description() - self.description = new_description - if self._structured_tool: - self._structured_tool.description = new_description - - def attach_binary_file( - self, - path: str, - content: bytes, - ) -> None: - """Attach a binary file to the sandbox environment. - - This file will be available in the sandbox's in-memory filesystem - when code is executed. The tool's description will be automatically - updated to reflect the attached files. - - Args: - path: File path within the sandbox filesystem - content: Binary content of the file - """ - self._sandbox.attach_binary_file(path, content) - self._sync_sandbox.attach_binary_file(path, content) - # Update both BaseTool and StructuredTool descriptions - new_description = self._build_description() - self.description = new_description - if self._structured_tool: - self._structured_tool.description = new_description - - def create_directory(self, path: str) -> None: - """Create a directory in the sandbox environment. - - This directory will be available in the sandbox's in-memory filesystem - when code is executed. - - Args: - path: Directory path within the sandbox filesystem - """ - self._sandbox.create_directory(path) - self._sync_sandbox.create_directory(path) - # Update both BaseTool and StructuredTool descriptions - new_description = self._build_description() - self.description = new_description - if self._structured_tool: - self._structured_tool.description = new_description - - def get_attached_files(self) -> list[str]: - """Get list of attached file paths. - - Returns: - List of file paths that will be available in the sandbox filesystem - """ - return self._sandbox.get_attached_files() - - def clear_filesystem_operations(self) -> None: - """Clear all attached files and directories. - - This removes all files and directories that were queued to be created - in the sandbox filesystem and updates the tool description. - """ - self._sandbox.clear_filesystem_operations() - self._sync_sandbox.clear_filesystem_operations() - # Update both BaseTool and StructuredTool descriptions - new_description = self._build_description() - self.description = new_description - if self._structured_tool: - self._structured_tool.description = new_description - def _run( self, code: str, @@ -1199,8 +1193,8 @@ class PyodideSandboxStructuredTool: r"""Pure StructuredTool wrapper for PyodideSandbox with dynamic description updates. This class provides a standalone StructuredTool interface for users who prefer - to work exclusively with StructuredTool rather than BaseTool. It maintains all - the filesystem functionality and dynamic description updates. + to work exclusively with StructuredTool rather than the main PyodideSandboxTool. + It maintains all the filesystem functionality and dynamic description updates. Example usage: ```python @@ -1217,7 +1211,7 @@ class PyodideSandboxStructuredTool: # Attach files sandbox_tool.attach_file("data.csv", "name,age\\nJohn,25") - # Use in agent + # Use in agent - access via .tool property agent = create_react_agent(llm, [sandbox_tool.tool]) ``` """ @@ -1229,8 +1223,6 @@ def __init__(self, **kwargs: Any) -> None: # noqa: ANN401 **kwargs: All arguments are passed to PyodideSandboxTool """ self._base_tool = PyodideSandboxTool(**kwargs) - # Force creation of the StructuredTool - self._tool = self._base_tool.as_structured_tool() @property def tool(self) -> StructuredTool: @@ -1239,7 +1231,7 @@ def tool(self) -> StructuredTool: Returns: StructuredTool instance with current description """ - return self._tool + return self._base_tool.as_structured_tool() def attach_file( self, diff --git a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py index 9a96a07..ebbed3b 100644 --- a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py +++ b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py @@ -62,9 +62,9 @@ def test_pyodide_sandbox_tool() -> None: enable_filesystem=True, allow_net=True, ) - result = tool.invoke("x = 5; print(x)") + result = tool.invoke({"code": "x = 5; print(x)"}) assert result == "5" - result = tool.invoke("x = 5; print(1); print(2)") + result = tool.invoke({"code": "x = 5; print(1); print(2)"}) assert result == "1\n2" @@ -73,31 +73,33 @@ def test_pyodide_timeout() -> None: tool = PyodideSandboxTool( enable_filesystem=True, allow_net=True, + timeout_seconds=0.1, ) - result = tool.invoke("while True: pass") - assert result == "Error during execution: Execution timed out after 0.1 seconds" + result = tool.invoke({"code": "while True: pass"}) + assert "timed out after 0.1 seconds" in result async def test_async_pyodide_sandbox_tool() -> None: - """Test synchronous invocation of PyodideSandboxTool.""" + """Test asynchronous invocation of PyodideSandboxTool.""" tool = PyodideSandboxTool( enable_filesystem=True, allow_net=True, ) - result = await tool.ainvoke("x = 5; print(x)") + result = await tool.ainvoke({"code": "x = 5; print(x)"}) assert result == "5" - result = await tool.ainvoke("x = 5; print(1); print(2)") + result = await tool.ainvoke({"code": "x = 5; print(1); print(2)"}) assert result == "1\n2" async def test_async_pyodide_timeout() -> None: - """Test synchronous invocation of PyodideSandboxTool with timeout.""" + """Test asynchronous invocation of PyodideSandboxTool with timeout.""" tool = PyodideSandboxTool( enable_filesystem=True, allow_net=True, + timeout_seconds=0.1, ) - result = await tool.ainvoke("while True: pass") - assert result == "Error during execution: Execution timed out after 0.1 seconds" + result = await tool.ainvoke({"code": "while True: pass"}) + assert "timed out after 0.1 seconds" in result async def test_stdout_sessionless(pyodide_package: None) -> None: @@ -219,7 +221,7 @@ def test_sync_pyodide_sandbox_timeout(pyodide_package: None) -> None: assert "timed out" in result.stderr.lower() -async def test_filesystem_basic_operations() -> None: +async def test_filesystem_basic_operations(pyodide_package: None) -> None: """Test basic filesystem operations.""" sandbox = PyodideSandbox( enable_filesystem=True, @@ -262,7 +264,7 @@ async def test_filesystem_basic_operations() -> None: """ result = await sandbox.execute(code) - assert result.status == "success" + assert result.status == "success", f"Execution failed: {result.stderr}" assert "Hello, World!" in result.stdout assert "value" in result.stdout assert "Processing complete!" in result.stdout @@ -292,12 +294,12 @@ def test_filesystem_tool_usage() -> None: print(f"{user['name']} is {user['age']} years old") """ - result = tool.invoke(code) + result = tool.invoke({"code": code}) assert "Alice is 30 years old" in result assert "Bob is 25 years old" in result -async def test_binary_file_operations() -> None: +async def test_binary_file_operations(pyodide_package: None) -> None: """Test binary file operations.""" sandbox = PyodideSandbox( enable_filesystem=True, @@ -325,7 +327,7 @@ async def test_binary_file_operations() -> None: """ result = await sandbox.execute(code) - assert result.status == "success" + assert result.status == "success", f"Execution failed: {result.stderr}" assert "Is PNG: True" in result.stdout # Verify the size matches the binary data size assert f"Size: {len(binary_data)} bytes" in result.stdout From 0d274359908d991948a6072b2a6e3e4deee4bcb5 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Wed, 28 May 2025 15:16:38 -0300 Subject: [PATCH 14/27] fix: clean README --- README.md | 15 +-------------- libs/sandbox-py/README.md | 15 +-------------- 2 files changed, 2 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 55c4a2a..f28f959 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,6 @@ LangChain Sandbox provides a secure environment for executing untrusted Python c ## 💡 Example Usage -> [!warning] -> Use `alllow_net` to limit the network requests that can be made by the sandboxed code to avoid SSRF attacks -> https://docs.deno.com/runtime/fundamentals/security/#network-access ```python from langchain_sandbox import PyodideSandbox @@ -156,17 +153,7 @@ sandbox_tool = PyodideSandboxTool( allow_net=True, ) -sales_data = """date,product,category,quantity,price,region -2024-01-15,Laptop,Electronics,2,1299.99,North -2024-01-16,Chair,Furniture,1,249.50,South -2024-01-16,T-shirt,Clothing,5,29.99,East -2024-01-17,Laptop,Electronics,1,1299.99,West -2024-01-18,Phone,Electronics,3,799.99,North -2024-01-19,Desk,Furniture,2,399.99,South -2024-01-20,Jeans,Clothing,4,79.99,East -2024-01-21,Tablet,Electronics,2,499.99,West -2024-01-22,Sofa,Furniture,1,899.99,North -2024-01-23,Shoes,Clothing,3,129.99,South""" +sales_data = """...csv_data""" sandbox_tool.attach_file("sales.csv", sales_data) diff --git a/libs/sandbox-py/README.md b/libs/sandbox-py/README.md index 55c4a2a..f28f959 100644 --- a/libs/sandbox-py/README.md +++ b/libs/sandbox-py/README.md @@ -34,9 +34,6 @@ LangChain Sandbox provides a secure environment for executing untrusted Python c ## 💡 Example Usage -> [!warning] -> Use `alllow_net` to limit the network requests that can be made by the sandboxed code to avoid SSRF attacks -> https://docs.deno.com/runtime/fundamentals/security/#network-access ```python from langchain_sandbox import PyodideSandbox @@ -156,17 +153,7 @@ sandbox_tool = PyodideSandboxTool( allow_net=True, ) -sales_data = """date,product,category,quantity,price,region -2024-01-15,Laptop,Electronics,2,1299.99,North -2024-01-16,Chair,Furniture,1,249.50,South -2024-01-16,T-shirt,Clothing,5,29.99,East -2024-01-17,Laptop,Electronics,1,1299.99,West -2024-01-18,Phone,Electronics,3,799.99,North -2024-01-19,Desk,Furniture,2,399.99,South -2024-01-20,Jeans,Clothing,4,79.99,East -2024-01-21,Tablet,Electronics,2,499.99,West -2024-01-22,Sofa,Furniture,1,899.99,North -2024-01-23,Shoes,Clothing,3,129.99,South""" +sales_data = """...csv_data""" sandbox_tool.attach_file("sales.csv", sales_data) From 672eeb1d2b9ea14d5756cc6d2f4d42a46c16b1dd Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Wed, 28 May 2025 19:02:04 -0300 Subject: [PATCH 15/27] fix: lint --- libs/sandbox-py/langchain_sandbox/__init__.py | 6 +----- libs/sandbox-py/langchain_sandbox/pyodide.py | 21 +++++-------------- .../tests/unit_tests/test_pyodide_sandbox.py | 1 + 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/libs/sandbox-py/langchain_sandbox/__init__.py b/libs/sandbox-py/langchain_sandbox/__init__.py index 9265d7e..526a70f 100644 --- a/libs/sandbox-py/langchain_sandbox/__init__.py +++ b/libs/sandbox-py/langchain_sandbox/__init__.py @@ -6,8 +6,4 @@ SyncPyodideSandbox, ) -__all__ = [ - "PyodideSandbox", - "PyodideSandboxTool", - "SyncPyodideSandbox" -] +__all__ = ["PyodideSandbox", "PyodideSandboxTool", "SyncPyodideSandbox"] diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index 08e6541..fb459db 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -355,9 +355,7 @@ def get_attached_files(self) -> list[str]: List of file paths that will be available in the sandbox filesystem """ return [ - op.path - for op in self._filesystem_operations - if op.operation == "write" + op.path for op in self._filesystem_operations if op.operation == "write" ] def clear_filesystem_operations(self) -> None: @@ -421,9 +419,7 @@ def _build_command( if self._filesystem_operations or self.enable_filesystem: if self._filesystem_operations: fs_ops = [op.to_dict() for op in self._filesystem_operations] - fs_json = json.dumps( - fs_ops, ensure_ascii=True, separators=(",", ":") - ) + fs_json = json.dumps(fs_ops, ensure_ascii=True, separators=(",", ":")) cmd.extend(["-x", fs_json]) logger.debug("Filesystem enabled with %d operations", len(fs_ops)) else: @@ -983,20 +979,14 @@ def as_structured_tool(self) -> StructuredTool: self._structured_tool = StructuredTool.from_function( name=self.name, description=self._build_description(), - func=( - self._run_sync - if not self.stateful - else self._run_stateful_sync - ), + func=(self._run_sync if not self.stateful else self._run_stateful_sync), args_schema=self.args_schema, ) return self._structured_tool def _run_sync(self, code: str) -> str: """Synchronous execution function for non-stateful mode.""" - result = self._sync_sandbox.execute( - code, timeout_seconds=self.timeout_seconds - ) + result = self._sync_sandbox.execute(code, timeout_seconds=self.timeout_seconds) if result.status == "error": error_msg = ( @@ -1025,8 +1015,7 @@ def _run_stateful_sync( actual_keys = set(state) if isinstance(state, dict) else set(state.__dict__) if missing_keys := required_keys - actual_keys: error_msg = ( - "Input state is missing " - f"the following required keys: {missing_keys}" + f"Input state is missing the following required keys: {missing_keys}" ) raise ValueError(error_msg) diff --git a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py index ebbed3b..4e3cd7c 100644 --- a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py +++ b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py @@ -56,6 +56,7 @@ def get_default_sync_sandbox(stateful: bool = False) -> SyncPyodideSandbox: allow_ffi=False, ) + def test_pyodide_sandbox_tool() -> None: """Test synchronous invocation of PyodideSandboxTool.""" tool = PyodideSandboxTool( From fd0285c4e693404363cc683e522686f957ec2b30 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Wed, 28 May 2025 23:52:52 -0300 Subject: [PATCH 16/27] fix: Remove PyodideSandboxStructuredTool and fix unintentional README changes --- README.md | 3 + libs/sandbox-py/README.md | 3 + libs/sandbox-py/langchain_sandbox/pyodide.py | 109 ++++++++++++++++++- 3 files changed, 112 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f28f959..b8a4a00 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,9 @@ LangChain Sandbox provides a secure environment for executing untrusted Python c ## 💡 Example Usage +> [!warning] +> Use `alllow_net` to limit the network requests that can be made by the sandboxed code to avoid SSRF attacks +> https://docs.deno.com/runtime/fundamentals/security/#network-access ```python from langchain_sandbox import PyodideSandbox diff --git a/libs/sandbox-py/README.md b/libs/sandbox-py/README.md index f28f959..b8a4a00 100644 --- a/libs/sandbox-py/README.md +++ b/libs/sandbox-py/README.md @@ -34,6 +34,9 @@ LangChain Sandbox provides a secure environment for executing untrusted Python c ## 💡 Example Usage +> [!warning] +> Use `alllow_net` to limit the network requests that can be made by the sandboxed code to avoid SSRF attacks +> https://docs.deno.com/runtime/fundamentals/security/#network-access ```python from langchain_sandbox import PyodideSandbox diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index fb459db..bb4d885 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -7,6 +7,9 @@ import logging import subprocess import time +import os +import glob +from pathlib import Path from typing import Annotated, Any, Literal from langchain_core.callbacks import ( @@ -112,6 +115,93 @@ def build_permission_flag( return None +def get_pyodide_required_paths() -> list[str]: + """Get the specific paths required for Pyodide to function properly. + + This function automatically detects the Pyodide installation paths that need + read permissions instead of requiring global read access. + + Returns: + List of specific paths that Pyodide needs to read from + """ + required_paths = ["node_modules"] # Always include node_modules + + # Try to find Pyodide installation paths + try: + # Look for pyodide in common Deno cache locations + home_dir = Path.home() + + # Common Deno cache locations + deno_cache_paths = [ + home_dir / ".cache" / "deno", + home_dir / ".deno", + ] + + # Also check current working directory node_modules + cwd_node_modules = Path.cwd() / "node_modules" + if cwd_node_modules.exists(): + deno_cache_paths.append(cwd_node_modules) + + # Look for relative paths in the project + project_paths = [ + Path("libs/pyodide-sandbox-js/node_modules"), + Path("../pyodide-sandbox-js/node_modules"), + Path("./node_modules"), + ] + + for project_path in project_paths: + if project_path.exists(): + deno_cache_paths.append(project_path) + + for cache_path in deno_cache_paths: + if not cache_path.exists(): + continue + + # Look for pyodide directories + pyodide_patterns = [ + cache_path / "**" / "pyodide*", + cache_path / ".deno" / "pyodide*", + ] + + for pattern in pyodide_patterns: + for pyodide_dir in glob.glob(str(pattern), recursive=True): + pyodide_path = Path(pyodide_dir) + if pyodide_path.is_dir(): + # Add the pyodide directory and its contents + required_paths.append(str(pyodide_path)) + + # Look for specific files that pyodide needs + wasm_files = list(pyodide_path.glob("**/*.wasm")) + zip_files = list(pyodide_path.glob("**/*.zip")) + js_files = list(pyodide_path.glob("**/*.js")) + + # Add parent directories of essential files + for file_list in [wasm_files, zip_files, js_files]: + for file_path in file_list: + parent_dir = str(file_path.parent) + if parent_dir not in required_paths: + required_paths.append(parent_dir) + + except Exception as e: + logger.debug(f"Error detecting Pyodide paths: {e}") + # Fallback to common patterns + fallback_paths = [ + "~/.cache/deno", + "~/.deno", + "./node_modules", + "../pyodide-sandbox-js/node_modules", + "libs/pyodide-sandbox-js/node_modules", + ] + + for path in fallback_paths: + expanded_path = os.path.expanduser(path) + if os.path.exists(expanded_path): + required_paths.append(expanded_path) + + # Remove duplicates and return + return list(set(required_paths)) + + class BasePyodideSandbox: """Base class for PyodideSandbox implementations. @@ -228,11 +318,19 @@ def __init__( # Define permission configurations: # each tuple contains (flag, setting, defaults) + + # For read permissions, automatically include Pyodide paths if not explicitly set + read_defaults = ["node_modules"] + if allow_read is False: + # If read is False, add specific Pyodide paths instead of global access + pyodide_paths = get_pyodide_required_paths() + read_defaults.extend(pyodide_paths) + logger.debug(f"Auto-detected Pyodide paths for read access: {pyodide_paths}") + perm_defs = [ ("--allow-env", allow_env, None), - # For file system permissions, if no permission is specified, - # force node_modules - ("--allow-read", allow_read, ["node_modules"]), + # For file system permissions, use the enhanced read_defaults + ("--allow-read", allow_read, read_defaults), ("--allow-write", allow_write, ["node_modules"]), ("--allow-net", allow_net, None), ("--allow-run", allow_run, None), @@ -426,6 +524,11 @@ def _build_command( cmd.extend(["-x", "[]"]) logger.debug("Filesystem enabled with no initial operations") + # Log the complete command for debugging + cmd_str = ' '.join(cmd) + logger.info(f"Executing Deno command: {cmd_str}") + print(f"🚀 DENO CMD: {cmd_str}") + return cmd From f03e6a07bf6a475511dae1d4ec7602e0e55e5172 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Thu, 29 May 2025 08:06:30 -0300 Subject: [PATCH 17/27] fix: return file output limitation | remove local test functions --- README.md | 1 + libs/sandbox-py/README.md | 1 + libs/sandbox-py/langchain_sandbox/pyodide.py | 225 +------------------ 3 files changed, 5 insertions(+), 222 deletions(-) diff --git a/README.md b/README.md index b8a4a00..386d54b 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ LangChain Sandbox provides a secure environment for executing untrusted Python c ## Limitations - **Latency**: There is a few seconds of latency when starting the sandbox per run +- **File access**: Currently not supported. You will not be able to access the files written by the sandbox. - **Network requests**: If you need to make network requests please use `httpx.AsyncClient` instead of `requests`. ## 🚀 Quick Install diff --git a/libs/sandbox-py/README.md b/libs/sandbox-py/README.md index b8a4a00..386d54b 100644 --- a/libs/sandbox-py/README.md +++ b/libs/sandbox-py/README.md @@ -19,6 +19,7 @@ LangChain Sandbox provides a secure environment for executing untrusted Python c ## Limitations - **Latency**: There is a few seconds of latency when starting the sandbox per run +- **File access**: Currently not supported. You will not be able to access the files written by the sandbox. - **Network requests**: If you need to make network requests please use `httpx.AsyncClient` instead of `requests`. ## 🚀 Quick Install diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index bb4d885..13ccc25 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -7,9 +7,6 @@ import logging import subprocess import time -import os -import glob -from pathlib import Path from typing import Annotated, Any, Literal from langchain_core.callbacks import ( @@ -115,93 +112,6 @@ def build_permission_flag( return None -def get_pyodide_required_paths() -> list[str]: - """Get the specific paths required for Pyodide to function properly. - - This function automatically detects the Pyodide installation paths that need - read permissions instead of requiring global read access. - - Returns: - List of specific paths that Pyodide needs to read from - """ - required_paths = ["node_modules"] # Always include node_modules - - # Try to find Pyodide installation paths - try: - # Look for pyodide in common Deno cache locations - home_dir = Path.home() - - # Common Deno cache locations - deno_cache_paths = [ - home_dir / ".cache" / "deno", - home_dir / ".deno", - ] - - # Also check current working directory node_modules - cwd_node_modules = Path.cwd() / "node_modules" - if cwd_node_modules.exists(): - deno_cache_paths.append(cwd_node_modules) - - # Look for relative paths in the project - project_paths = [ - Path("libs/pyodide-sandbox-js/node_modules"), - Path("../pyodide-sandbox-js/node_modules"), - Path("./node_modules"), - ] - - for project_path in project_paths: - if project_path.exists(): - deno_cache_paths.append(project_path) - - for cache_path in deno_cache_paths: - if not cache_path.exists(): - continue - - # Look for pyodide directories - pyodide_patterns = [ - cache_path / "**" / "pyodide*", - cache_path / ".deno" / "pyodide*", - ] - - for pattern in pyodide_patterns: - for pyodide_dir in glob.glob(str(pattern), recursive=True): - pyodide_path = Path(pyodide_dir) - if pyodide_path.is_dir(): - # Add the pyodide directory and its contents - required_paths.append(str(pyodide_path)) - - # Look for specific files that pyodide needs - wasm_files = list(pyodide_path.glob("**/*.wasm")) - zip_files = list(pyodide_path.glob("**/*.zip")) - js_files = list(pyodide_path.glob("**/*.js")) - - # Add parent directories of essential files - for file_list in [wasm_files, zip_files, js_files]: - for file_path in file_list: - parent_dir = str(file_path.parent) - if parent_dir not in required_paths: - required_paths.append(parent_dir) - - except Exception as e: - logger.debug(f"Error detecting Pyodide paths: {e}") - # Fallback to common patterns - fallback_paths = [ - "~/.cache/deno", - "~/.deno", - "./node_modules", - "../pyodide-sandbox-js/node_modules", - "libs/pyodide-sandbox-js/node_modules", - ] - - for path in fallback_paths: - expanded_path = os.path.expanduser(path) - if os.path.exists(expanded_path): - required_paths.append(expanded_path) - - # Remove duplicates and return - return list(set(required_paths)) - - class BasePyodideSandbox: """Base class for PyodideSandbox implementations. @@ -318,19 +228,11 @@ def __init__( # Define permission configurations: # each tuple contains (flag, setting, defaults) - - # For read permissions, automatically include Pyodide paths if not explicitly set - read_defaults = ["node_modules"] - if allow_read is False: - # If read is False, add specific Pyodide paths instead of global access - pyodide_paths = get_pyodide_required_paths() - read_defaults.extend(pyodide_paths) - logger.debug(f"Auto-detected Pyodide paths for read access: {pyodide_paths}") - perm_defs = [ ("--allow-env", allow_env, None), - # For file system permissions, use the enhanced read_defaults - ("--allow-read", allow_read, read_defaults), + # For file system permissions, if no permission is specified, + # force node_modules + ("--allow-read", allow_read, ["node_modules"]), ("--allow-write", allow_write, ["node_modules"]), ("--allow-net", allow_net, None), ("--allow-run", allow_run, None), @@ -524,11 +426,6 @@ def _build_command( cmd.extend(["-x", "[]"]) logger.debug("Filesystem enabled with no initial operations") - # Log the complete command for debugging - cmd_str = ' '.join(cmd) - logger.info(f"Executing Deno command: {cmd_str}") - print(f"🚀 DENO CMD: {cmd_str}") - return cmd @@ -1279,119 +1176,3 @@ async def _arun( ) return tool_result - - -class PyodideSandboxStructuredTool: - r"""Pure StructuredTool wrapper for PyodideSandbox with dynamic description updates. - - This class provides a standalone StructuredTool interface for users who prefer - to work exclusively with StructuredTool rather than the main PyodideSandboxTool. - It maintains all the filesystem functionality and dynamic description updates. - - Example usage: - ```python - from langchain_sandbox import PyodideSandboxStructuredTool - from langgraph.prebuilt import create_react_agent - from langchain_openai import ChatOpenAI - - # Create tool - sandbox_tool = PyodideSandboxStructuredTool( - enable_filesystem=True, - allow_net=True, - ) - - # Attach files - sandbox_tool.attach_file("data.csv", "name,age\\nJohn,25") - - # Use in agent - access via .tool property - agent = create_react_agent(llm, [sandbox_tool.tool]) - ``` - """ - - def __init__(self, **kwargs: Any) -> None: # noqa: ANN401 - """Initialize the StructuredTool wrapper. - - Args: - **kwargs: All arguments are passed to PyodideSandboxTool - """ - self._base_tool = PyodideSandboxTool(**kwargs) - - @property - def tool(self) -> StructuredTool: - """Access to the underlying StructuredTool. - - Returns: - StructuredTool instance with current description - """ - return self._base_tool.as_structured_tool() - - def attach_file( - self, - path: str, - content: str, - *, - encoding: str = "utf-8", - ) -> None: - """Attach a text file to the sandbox environment. - - Args: - path: File path within the sandbox filesystem - content: Text content of the file - encoding: Text encoding (default: utf-8) - """ - self._base_tool.attach_file(path, content, encoding=encoding) - - def attach_binary_file( - self, - path: str, - content: bytes, - ) -> None: - """Attach a binary file to the sandbox environment. - - Args: - path: File path within the sandbox filesystem - content: Binary content of the file - """ - self._base_tool.attach_binary_file(path, content) - - def create_directory(self, path: str) -> None: - """Create a directory in the sandbox environment. - - Args: - path: Directory path within the sandbox filesystem - """ - self._base_tool.create_directory(path) - - def get_attached_files(self) -> list[str]: - """Get list of attached file paths. - - Returns: - List of file paths that will be available in the sandbox filesystem - """ - return self._base_tool.get_attached_files() - - def clear_filesystem_operations(self) -> None: - """Clear all attached files and directories.""" - self._base_tool.clear_filesystem_operations() - - def invoke(self, input_data: dict[str, Any]) -> str: - """Direct invoke method for easier usage. - - Args: - input_data: Input data containing 'code' key - - Returns: - Execution result as string - """ - return self.tool.invoke(input_data) - - async def ainvoke(self, input_data: dict[str, Any]) -> str: - """Async direct invoke method for easier usage. - - Args: - input_data: Input data containing 'code' key - - Returns: - Execution result as string - """ - return await self.tool.ainvoke(input_data) From 95e0f9695dfc817db1b35004ce1c1e5159a0e8f9 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Sat, 31 May 2025 18:21:00 -0300 Subject: [PATCH 18/27] fixes --- libs/sandbox-py/langchain_sandbox/pyodide.py | 51 ++++++++++++------- .../tests/unit_tests/test_pyodide_sandbox.py | 14 +++++ libs/sandbox-py/uv.lock | 2 +- 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index 13ccc25..7af6217 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -85,8 +85,8 @@ def to_dict(self) -> dict[str, str]: # Published package name -PKG_NAME = "jsr:@langchain/pyodide-sandbox@0.0.4" -# PKG_NAME = "../pyodide-sandbox-js/main.ts" # noqa: ERA001 +# PKG_NAME = "jsr:@langchain/pyodide-sandbox@0.0.4" +PKG_NAME = "../pyodide-sandbox-js/main.ts" # noqa: ERA001 def build_permission_flag( @@ -375,7 +375,7 @@ def _build_command( session_metadata: dict | None = None, memory_limit_mb: int | None = None, ) -> list[str]: - """Build the Deno command with all necessary arguments. + """Build the Deno command with necessary arguments, using stdin for file operations. Args: code: The Python code to execute @@ -414,18 +414,7 @@ def _build_command( if session_metadata: cmd.extend(["-m", json.dumps(session_metadata)]) - - # Add filesystem operations if any are queued - if self._filesystem_operations or self.enable_filesystem: - if self._filesystem_operations: - fs_ops = [op.to_dict() for op in self._filesystem_operations] - fs_json = json.dumps(fs_ops, ensure_ascii=True, separators=(",", ":")) - cmd.extend(["-x", fs_json]) - logger.debug("Filesystem enabled with %d operations", len(fs_ops)) - else: - cmd.extend(["-x", "[]"]) - logger.debug("Filesystem enabled with no initial operations") - + return cmd @@ -479,7 +468,24 @@ async def execute( memory_limit_mb=memory_limit_mb, ) - # Create and run the subprocess + # Preparar dados para envio via stdin + stdin_data = { + "fileSystemOperations": [op.to_dict() for op in self._filesystem_operations] + if self._filesystem_operations else [] + } + + # Codificar dados para stdin + stdin_json = json.dumps(stdin_data).encode("utf-8") + + # Corrigido: Não usar stdin com asyncio devido a problemas de compatibilidade + # Usaremos temporariamente uma abordagem de argumento até resolver o problema de stdin + # na versão assíncrona + fs_ops_json = json.dumps([op.to_dict() for op in self._filesystem_operations]) + # Adicionar dados de filesystem ao comando + if self._filesystem_operations or self.enable_filesystem: + cmd.extend(["-x", fs_ops_json]) + + # Create and run the subprocess process = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, @@ -591,13 +597,23 @@ def execute( memory_limit_mb=memory_limit_mb, ) + # Preparar dados para envio via stdin + stdin_data = { + "fileSystemOperations": [op.to_dict() for op in self._filesystem_operations] + if self._filesystem_operations else [] + } + + # Codificar dados para stdin + stdin_json = json.dumps(stdin_data).encode("utf-8") + try: - # Run the subprocess with timeout + # Run the subprocess with timeout and stdin data # Ignoring S603 for subprocess.run as the cmd is built safely. # Untrusted input comes from `code` parameter, which should be # escaped properly as we are **not** using shell=True. process = subprocess.run( # noqa: S603 cmd, + input=stdin_json, # Passar dados via stdin capture_output=True, text=False, # Keep as bytes for proper decoding timeout=timeout_seconds, @@ -997,6 +1013,7 @@ def _run_sync(self, code: str) -> str: return f"Error during execution: {error_msg}" if result.stdout: + # Ensure newlines are preserved return result.stdout if result.result is not None: diff --git a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py index 4e3cd7c..2ca207b 100644 --- a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py +++ b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py @@ -62,6 +62,8 @@ def test_pyodide_sandbox_tool() -> None: tool = PyodideSandboxTool( enable_filesystem=True, allow_net=True, + allow_read=True, + allow_write=True, ) result = tool.invoke({"code": "x = 5; print(x)"}) assert result == "5" @@ -74,6 +76,8 @@ def test_pyodide_timeout() -> None: tool = PyodideSandboxTool( enable_filesystem=True, allow_net=True, + allow_read=True, + allow_write=True, timeout_seconds=0.1, ) result = tool.invoke({"code": "while True: pass"}) @@ -85,6 +89,8 @@ async def test_async_pyodide_sandbox_tool() -> None: tool = PyodideSandboxTool( enable_filesystem=True, allow_net=True, + allow_read=True, + allow_write=True, ) result = await tool.ainvoke({"code": "x = 5; print(x)"}) assert result == "5" @@ -97,6 +103,8 @@ async def test_async_pyodide_timeout() -> None: tool = PyodideSandboxTool( enable_filesystem=True, allow_net=True, + allow_read=True, + allow_write=True, timeout_seconds=0.1, ) result = await tool.ainvoke({"code": "while True: pass"}) @@ -227,6 +235,8 @@ async def test_filesystem_basic_operations(pyodide_package: None) -> None: sandbox = PyodideSandbox( enable_filesystem=True, allow_net=True, + allow_read=True, + allow_write=True, ) # Attach files @@ -276,6 +286,8 @@ def test_filesystem_tool_usage() -> None: tool = PyodideSandboxTool( enable_filesystem=True, allow_net=True, + allow_read=True, + allow_write=True, ) # Attach CSV data @@ -305,6 +317,8 @@ async def test_binary_file_operations(pyodide_package: None) -> None: sandbox = PyodideSandbox( enable_filesystem=True, allow_net=True, + allow_read=True, + allow_write=True, ) # Create some binary data diff --git a/libs/sandbox-py/uv.lock b/libs/sandbox-py/uv.lock index 6441963..dcdb6d8 100644 --- a/libs/sandbox-py/uv.lock +++ b/libs/sandbox-py/uv.lock @@ -439,7 +439,7 @@ wheels = [ [[package]] name = "langchain-sandbox" -version = "0.0.5" +version = "0.0.6" source = { editable = "." } dependencies = [ { name = "langchain-core" }, From 3d7317c4c7844347cac1670722246acbe4802da8 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Sat, 31 May 2025 18:21:50 -0300 Subject: [PATCH 19/27] fixes --- libs/pyodide-sandbox-js/main.ts | 43 +++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/libs/pyodide-sandbox-js/main.ts b/libs/pyodide-sandbox-js/main.ts index 56f1b2a..46e599e 100644 --- a/libs/pyodide-sandbox-js/main.ts +++ b/libs/pyodide-sandbox-js/main.ts @@ -621,8 +621,27 @@ async function runPython( } async function main(): Promise { + // Ler dados do stdin primeiro + let stdinData: any = {}; + + // Verificar se há dados disponíveis no stdin (não é um terminal) + if (!Deno.isatty(Deno.stdin.rid)) { + const buffer = new Uint8Array(50 * 1024 * 1024); // Buffer de 50MB + const bytesRead = await Deno.stdin.read(buffer); + + if (bytesRead) { + try { + const stdinText = new TextDecoder().decode(buffer.subarray(0, bytesRead)); + stdinData = JSON.parse(stdinText); + } catch (error) { + console.error("Error parsing stdin data:", error); + Deno.exit(1); + } + } + } + const flags = parseArgs(Deno.args, { - string: ["code", "file", "session-bytes", "session-metadata", "fs-operations"], + string: ["code", "file", "session-bytes", "session-metadata"], alias: { c: "code", f: "file", @@ -631,7 +650,7 @@ async function main(): Promise { s: "stateful", b: "session-bytes", m: "session-metadata", - x: "fs-operations", + // Removido x: "fs-operations" - agora vem do stdin }, boolean: ["help", "version", "stateful"], default: { @@ -652,7 +671,6 @@ OPTIONS: -s, --stateful Use a stateful session -b, --session-bytes Session bytes -m, --session-metadata Session metadata - -x, --fs-operations JSON array of filesystem operations -h, --help Display help -V, --version Display version `); @@ -668,9 +686,8 @@ OPTIONS: code: flags.code, file: flags.file, stateful: flags.stateful, - sessionBytes: flags["session-bytes"], - sessionMetadata: flags["session-metadata"], - fsOperations: flags["fs-operations"], + sessionBytes: flags["session-bytes"] || (stdinData.sessionBytes ? JSON.stringify(stdinData.sessionBytes) : null), + sessionMetadata: flags["session-metadata"] || stdinData.sessionMetadata, }; if (!options.code && !options.file) { @@ -700,14 +717,10 @@ OPTIONS: pythonCode = options.code?.replace(/\\n/g, "\n") ?? ""; } + // Extrair as operações de filesystem do stdin let fileSystemOperations: FileSystemOperation[] = []; - if (options.fsOperations) { - try { - fileSystemOperations = JSON.parse(options.fsOperations); - } catch (error: unknown) { - console.error("Error parsing filesystem operations:", error instanceof Error ? error.message : String(error)); - Deno.exit(1); - } + if (stdinData.fileSystemOperations && Array.isArray(stdinData.fileSystemOperations)) { + fileSystemOperations = stdinData.fileSystemOperations; } const runOptions: any = { @@ -716,7 +729,7 @@ OPTIONS: sessionMetadata: options.sessionMetadata, }; - // Enable filesystem if operations are provided + // Habilitar sistema de arquivos se operações foram fornecidas if (fileSystemOperations.length > 0) { runOptions.fileSystemOptions = { enableFileSystem: true, @@ -763,4 +776,4 @@ if (import.meta.main) { }); } -export { runPython, resolvePathInSandbox, type FileSystemOperation, type FileSystemOptions }; +export { runPython, resolvePathInSandbox, type FileSystemOperation, type FileSystemOptions }; \ No newline at end of file From a042fd2f945dcff1873ea941a1abe3255a2ee8d6 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Sun, 1 Jun 2025 15:12:38 -0300 Subject: [PATCH 20/27] fix: improve filesystem operations and dynamic tool descriptions --- libs/pyodide-sandbox-js/main.ts | 377 ++++---- libs/pyodide-sandbox-js/main_test.ts | 177 +++- libs/sandbox-py/langchain_sandbox/pyodide.py | 888 ++++++++---------- .../tests/unit_tests/test_pyodide_sandbox.py | 61 +- 4 files changed, 751 insertions(+), 752 deletions(-) diff --git a/libs/pyodide-sandbox-js/main.ts b/libs/pyodide-sandbox-js/main.ts index 46e599e..7ac1f08 100644 --- a/libs/pyodide-sandbox-js/main.ts +++ b/libs/pyodide-sandbox-js/main.ts @@ -274,14 +274,14 @@ interface FileSystemOptions { interface PyodideResult { success: boolean; - result?: any; + result?: unknown; stdout?: string[]; stderr?: string[]; error?: string; jsonResult?: string; sessionBytes?: Uint8Array; sessionMetadata?: SessionMetadata; - fileSystemOperations?: any[]; + fileSystemOperations?: Record[]; fileSystemInfo?: { type: "memfs"; mountPoint: string; @@ -300,10 +300,6 @@ interface FileSystemOperation { /** * Resolves a relative path within the sandbox environment. - * - * @param inputPath - The input path to resolve - * @param mountPoint - The sandbox mount point (default: "/sandbox") - * @returns The resolved absolute path within the sandbox */ function resolvePathInSandbox( inputPath: string, @@ -328,10 +324,10 @@ function resolvePathInSandbox( /** * Setup memory filesystem environment in Python. */ -function setupFileSystem(pyodide: any): void { +function setupFileSystem(pyodide: unknown): void { const mountPoint = "/sandbox"; - pyodide.runPython(` + (pyodide as { runPython: (code: string) => void }).runPython(` import os import sys @@ -350,7 +346,7 @@ sys.modules['__main__'].MOUNT_POINT = MOUNT_POINT # Add helper function for path resolution def resolve_path(path): """Resolve a path relative to the sandbox""" - if path.startswith("/"): + if isinstance(path, str) and path.startswith("/"): return path return os.path.join(MOUNT_POINT, path) @@ -358,98 +354,144 @@ sys.modules['__main__'].resolve_path = resolve_path `); } -async function initPyodide(pyodide: any, options: FileSystemOptions = {}): Promise { - const sys = pyodide.pyimport("sys"); - const pathlib = pyodide.pyimport("pathlib"); +function initPyodide(pyodide: unknown): void { + const sys = (pyodide as { pyimport: (name: string) => unknown }).pyimport("sys"); + const pathlib = (pyodide as { pyimport: (name: string) => unknown }).pyimport("pathlib"); const dirPath = "/tmp/pyodide_worker_runner/"; - sys.path.append(dirPath); - pathlib.Path(dirPath).mkdir(); - pathlib.Path(dirPath + "prepare_env.py").write_text(prepareEnvCode); - - // Initialize filesystem if enabled - if (options.enableFileSystem) { - // Ensure sandbox mount point exists - try { - pyodide.FS.mkdirTree("/sandbox"); - } catch (e) { - // Directory might already exist, which is fine - } - - setupFileSystem(pyodide); - } -} - -async function performFileSystemOperations( - pyodide: any, - operations: FileSystemOperation[], - options: FileSystemOptions = {} -): Promise { - const results: any[] = []; + (sys as { path: { append: (path: string) => void } }).path.append(dirPath); + (pathlib as { Path: (path: string) => { mkdir: () => void; write_text: (text: string) => void } }).Path(dirPath).mkdir(); + (pathlib as { Path: (path: string) => { mkdir: () => void; write_text: (text: string) => void } }).Path(dirPath + "prepare_env.py").write_text(prepareEnvCode); // Ensure sandbox mount point exists try { - pyodide.FS.mkdirTree("/sandbox"); - } catch (e) { + (pyodide as { FS: { mkdirTree: (path: string) => void } }).FS.mkdirTree("/sandbox"); + } catch (_e) { // Directory might already exist, which is fine } + + setupFileSystem(pyodide); +} - const prepare_env = pyodide.pyimport("prepare_env"); +/** + * Process stdin using ReadableStream for large files + */ +async function processStreamedFiles(pyodide: unknown): Promise[]> { + const results: Record[] = []; + + // Read binary protocol header + const headerBuffer = new Uint8Array(8); + const headerRead = await Deno.stdin.read(headerBuffer); + + if (!headerRead || headerRead < 8) { + // No stdin data or insufficient data + return results; + } - for (const op of operations) { - try { - // Resolve paths using sandbox resolution - const resolvedPath = resolvePathInSandbox(op.path, "/sandbox"); - let resolvedDestination: string | undefined; - - if (op.operation === "copy" && op.destination) { - resolvedDestination = resolvePathInSandbox(op.destination, "/sandbox"); - } + // Check magic header + const magic = new TextDecoder().decode(headerBuffer.slice(0, 3)); + const version = headerBuffer[3]; + if (magic !== "PSB" || version !== 1) { + throw new Error(`Invalid PSB header: ${magic} v${version}`); + } - // Create resolved operation - const resolvedOp = { - ...op, - path: resolvedPath, - ...(resolvedDestination && { destination: resolvedDestination }) - }; + // Get metadata length + const metadataLength = new DataView(headerBuffer.buffer).getUint32(4, false); + + // Read metadata + const metadataBuffer = new Uint8Array(metadataLength); + const metadataRead = await Deno.stdin.read(metadataBuffer); + + if (!metadataRead || metadataRead < metadataLength) { + throw new Error("Failed to read metadata"); + } - // Handle binary write operations - if (op.operation === "write" && typeof op.content === "string") { - if (op.encoding === "binary") { - const result = await prepare_env.perform_fs_operation(resolvedOp); - results.push(result.toJs()); - continue; + // Parse metadata + const metadata = JSON.parse(new TextDecoder().decode(metadataBuffer)) as { + directories?: string[]; + files?: Array<{ path: string; size: number; binary: boolean }>; + }; + + // Process directories first + if (metadata.directories) { + for (const dir of metadata.directories) { + const resolvedPath = resolvePathInSandbox(dir, "/sandbox"); + try { + (pyodide as { FS: { mkdirTree: (path: string) => void } }).FS.mkdirTree(resolvedPath); + results.push({ + success: true, + operation: "mkdir", + path: dir + }); + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + results.push({ + success: false, + error: errorMsg, + operation: "mkdir", + path: dir + }); + } + } + } + + // Process files + if (metadata.files && metadata.files.length > 0) { + for (const fileInfo of metadata.files) { + const resolvedPath = resolvePathInSandbox(fileInfo.path, "/sandbox"); + + // Create parent directories if needed + const parentDir = resolvedPath.substring(0, resolvedPath.lastIndexOf("/")); + if (parentDir) { + try { + (pyodide as { FS: { mkdirTree: (path: string) => void } }).FS.mkdirTree(parentDir); + } catch (_e) { + // Directory might already exist } + } - // Use pyodide.FS for text writes (better performance) - try { - const parentDir = resolvedPath.substring(0, resolvedPath.lastIndexOf("/")); - if (parentDir) { - pyodide.FS.mkdirTree(parentDir); + try { + // Read file data + const fileBuffer = new Uint8Array(fileInfo.size); + let bytesRead = 0; + + // Read in chunks to handle large files efficiently + while (bytesRead < fileInfo.size) { + const chunkSize = Math.min(65536, fileInfo.size - bytesRead); + const chunkBuffer = new Uint8Array(chunkSize); + const readResult = await Deno.stdin.read(chunkBuffer); + + if (readResult === null) { + throw new Error(`Unexpected end of stream at ${bytesRead}/${fileInfo.size} bytes`); } - pyodide.FS.writeFile(resolvedPath, op.content, { encoding: op.encoding || "utf8" }); - results.push({ success: true, operation: op.operation, path: resolvedPath }); - continue; - } catch { - // Fallback to Python method if pyodide.FS fails + + // Copy to the main buffer + fileBuffer.set(chunkBuffer.subarray(0, readResult), bytesRead); + bytesRead += readResult; } + + // Write to PyFS + (pyodide as { FS: { writeFile: (path: string, data: Uint8Array) => void } }).FS.writeFile(resolvedPath, fileBuffer); + + results.push({ + success: true, + operation: "write", + path: fileInfo.path, + size: bytesRead, + binary: fileInfo.binary + }); + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + results.push({ + success: false, + error: errorMsg, + operation: "write", + path: fileInfo.path + }); } - - // Use Python method for other operations - const result = await prepare_env.perform_fs_operation(resolvedOp); - results.push(result.toJs()); - - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - results.push({ - success: false, - error: errorMessage, - operation: op.operation, - path: op.path, - }); } } - + return results; } @@ -459,14 +501,12 @@ async function runPython( stateful?: boolean; sessionBytes?: string; sessionMetadata?: string; - fileSystemOptions?: FileSystemOptions; - fileSystemOperations?: FileSystemOperation[]; } = {} ): Promise { const output: string[] = []; const err_output: string[] = []; const originalLog = console.log; - console.log = (...args: any[]) => {} + console.log = (..._args: unknown[]) => {} try { const pyodide = await loadPyodide({ @@ -481,23 +521,7 @@ async function runPython( }, }); - // Auto-enable filesystem if operations are provided or explicitly enabled - const shouldEnableFileSystem = - options.fileSystemOperations?.length > 0 || - options.fileSystemOptions?.enableFileSystem || - // Detect file operations in Python code - (pythonCode.includes("open(") || - pythonCode.includes("with open") || - pythonCode.includes("os.") || - pythonCode.includes("pathlib") || - pythonCode.includes("Path(")); - - const fsOptions: FileSystemOptions = { - enableFileSystem: shouldEnableFileSystem, - ...options.fileSystemOptions - }; - - await initPyodide(pyodide, fsOptions); + initPyodide(pyodide); // Determine session metadata let sessionMetadata: SessionMetadata; @@ -518,12 +542,12 @@ async function runPython( } // Import prepared environment module - const prepare_env = pyodide.pyimport("prepare_env"); + const prepare_env = (pyodide as { pyimport: (name: string) => unknown }).pyimport("prepare_env"); - // Execute filesystem operations before Python code - let fileSystemResults: any[] = []; - if (options.fileSystemOperations && options.fileSystemOperations.length > 0) { - fileSystemResults = await performFileSystemOperations(pyodide, options.fileSystemOperations, fsOptions); + let fileSystemResults: Record[] = []; + + if (!Deno.stdin.isTerminal()) { + fileSystemResults = await processStreamedFiles(pyodide); } // Prepare packages to install (include dill) @@ -532,9 +556,15 @@ async function runPython( ? [...new Set([...defaultPackages, ...sessionMetadata.packages])] : defaultPackages; - let installErrors: string[] = [] + const installErrors: string[] = [] - const installedPackages = await prepare_env.install_imports( + const installedPackages = await (prepare_env as { + install_imports: ( + code: string, + packages: string[], + callback: (event: string, data: string) => void + ) => Promise; + }).install_imports( pythonCode, additionalPackagesToInstall, (event_type: string, data: string) => { @@ -560,18 +590,22 @@ async function runPython( if (options.sessionBytes) { sessionData = Uint8Array.from(JSON.parse(options.sessionBytes)); // Run session preamble - await prepare_env.load_session_bytes(sessionData); + await (prepare_env as { load_session_bytes: (data: Uint8Array) => Promise }) + .load_session_bytes(sessionData); } - const packages = installedPackages.map((pkg: any) => pkg.get("package")); + const packages = installedPackages.map((pkg: unknown) => + (pkg as { get?: (key: string) => string }).get?.("package") as string + ); // Restore the original console.log function console.log = originalLog; // Run the Python code - const rawValue = await pyodide.runPythonAsync(pythonCode); + const rawValue = await (pyodide as { runPythonAsync: (code: string) => Promise }).runPythonAsync(pythonCode); // Dump result to string - const jsonValue = await prepare_env.dumps(rawValue); + const jsonValue = await (prepare_env as { dumps: (value: unknown) => Promise }) + .dumps(rawValue); // Update session metadata with installed packages sessionMetadata.packages = [ @@ -581,15 +615,19 @@ async function runPython( if (options.stateful) { // Save session state to sessionBytes - sessionData = await prepare_env.dump_session_bytes() as Uint8Array; + sessionData = await (prepare_env as { dump_session_bytes: () => Promise }) + .dump_session_bytes(); } + // Process stdout - join array to string for consistent handling + const stdoutString = output.join('\n'); + // Return the result with stdout and stderr output const result: PyodideResult = { success: true, result: rawValue, jsonResult: jsonValue, - stdout: output, + stdout: stdoutString ? [stdoutString] : [], stderr: err_output, sessionMetadata: sessionMetadata, }; @@ -598,22 +636,20 @@ async function runPython( result["sessionBytes"] = sessionData; } - // Add filesystem info if enabled - if (fsOptions.enableFileSystem) { - result["fileSystemOperations"] = fileSystemResults; - result["fileSystemInfo"] = { - type: "memfs", - mountPoint: "/sandbox", - workingDirectory: "", - mounted: true - }; - } + // Add filesystem info + result["fileSystemOperations"] = fileSystemResults; + result["fileSystemInfo"] = { + type: "memfs", + mountPoint: "/sandbox", + workingDirectory: "", + mounted: true + }; return result; - } catch (error: any) { + } catch (error: unknown) { return { success: false, - error: error.message, + error: error instanceof Error ? error.message : String(error), stdout: output, stderr: err_output }; @@ -621,25 +657,6 @@ async function runPython( } async function main(): Promise { - // Ler dados do stdin primeiro - let stdinData: any = {}; - - // Verificar se há dados disponíveis no stdin (não é um terminal) - if (!Deno.isatty(Deno.stdin.rid)) { - const buffer = new Uint8Array(50 * 1024 * 1024); // Buffer de 50MB - const bytesRead = await Deno.stdin.read(buffer); - - if (bytesRead) { - try { - const stdinText = new TextDecoder().decode(buffer.subarray(0, bytesRead)); - stdinData = JSON.parse(stdinText); - } catch (error) { - console.error("Error parsing stdin data:", error); - Deno.exit(1); - } - } - } - const flags = parseArgs(Deno.args, { string: ["code", "file", "session-bytes", "session-metadata"], alias: { @@ -650,7 +667,6 @@ async function main(): Promise { s: "stateful", b: "session-bytes", m: "session-metadata", - // Removido x: "fs-operations" - agora vem do stdin }, boolean: ["help", "version", "stateful"], default: { @@ -682,66 +698,43 @@ OPTIONS: return } - const options = { - code: flags.code, - file: flags.file, - stateful: flags.stateful, - sessionBytes: flags["session-bytes"] || (stdinData.sessionBytes ? JSON.stringify(stdinData.sessionBytes) : null), - sessionMetadata: flags["session-metadata"] || stdinData.sessionMetadata, - }; - - if (!options.code && !options.file) { - console.error( - "Error: You must provide Python code using either -c/--code or -f/--file option.\nUse --help for usage information." - ); - Deno.exit(1); - } - // Get Python code from file or command line argument let pythonCode = ""; - if (options.file) { + if (flags.file) { try { // Resolve relative or absolute file path - const filePath = options.file.startsWith("/") - ? options.file - : join(Deno.cwd(), options.file); + const filePath = flags.file.startsWith("/") + ? flags.file + : join(Deno.cwd(), flags.file); pythonCode = await Deno.readTextFile(filePath); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); - console.error(`Error reading file ${options.file}:`, errorMessage); + console.error(`Error reading file ${flags.file}:`, errorMessage); Deno.exit(1); } } else { // Process code from command line (replacing escaped newlines) - pythonCode = options.code?.replace(/\\n/g, "\n") ?? ""; + pythonCode = flags.code?.replace(/\\n/g, "\n") ?? ""; } - // Extrair as operações de filesystem do stdin - let fileSystemOperations: FileSystemOperation[] = []; - if (stdinData.fileSystemOperations && Array.isArray(stdinData.fileSystemOperations)) { - fileSystemOperations = stdinData.fileSystemOperations; - } - - const runOptions: any = { - stateful: options.stateful, - sessionBytes: options.sessionBytes, - sessionMetadata: options.sessionMetadata, - }; - - // Habilitar sistema de arquivos se operações foram fornecidas - if (fileSystemOperations.length > 0) { - runOptions.fileSystemOptions = { - enableFileSystem: true, - }; - runOptions.fileSystemOperations = fileSystemOperations; + if (!pythonCode) { + console.error( + "Error: You must provide Python code using either -c/--code or -f/--file option.\nUse --help for usage information." + ); + Deno.exit(1); } - - const result = await runPython(pythonCode, runOptions); + + // Run the code + const result = await runPython(pythonCode, { + stateful: flags.stateful, + sessionBytes: flags["session-bytes"], + sessionMetadata: flags["session-metadata"], + }); // Create output JSON with stdout, stderr, and result - const outputJson: any = { - stdout: result.stdout?.join('\n') || null, + const outputJson: Record = { + stdout: result.stdout?.join('\n') || "", stderr: result.success ? (result.stderr?.join('\n') || null) : result.error || null, result: result.success ? JSON.parse(result.jsonResult || 'null') : null, success: result.success, @@ -768,8 +761,6 @@ OPTIONS: // If this module is run directly if (import.meta.main) { - // Override the global environment variables that Deno's permission prompts look for - // to suppress color-related permission prompts main().catch((err) => { console.error("Unhandled error:", err); Deno.exit(1); diff --git a/libs/pyodide-sandbox-js/main_test.ts b/libs/pyodide-sandbox-js/main_test.ts index 36a0dc1..77a1b4a 100644 --- a/libs/pyodide-sandbox-js/main_test.ts +++ b/libs/pyodide-sandbox-js/main_test.ts @@ -1,5 +1,5 @@ import { assertEquals, assertNotEquals } from "@std/assert"; -import { runPython, resolvePathInSandbox, type FileSystemOperation } from "./main.ts"; +import { runPython, resolvePathInSandbox } from "./main.ts"; Deno.test("runPython simple test", async () => { const result = await runPython("x = 2 + 3; x", {}); @@ -10,7 +10,7 @@ Deno.test("runPython simple test", async () => { Deno.test("runPython with stdout", async () => { const result = await runPython("x = 5; print(x); x", {}); assertEquals(result.success, true); - assertEquals(result.stdout?.join(''), "5"); + assertEquals(result.stdout?.join('').trim(), "5"); assertEquals(JSON.parse(result.jsonResult || "null"), 5); assertEquals(result.stderr?.length, 0); }); @@ -36,25 +36,128 @@ Deno.test("resolvePathInSandbox - basic resolution", () => { assertEquals(resolvePathInSandbox("/tmp/absolute.txt"), "/tmp/absolute.txt"); }); +// Helper function to create stdin data for filesystem operations +function createFilesystemStdin( + files: Array<{ path: string; content: string | Uint8Array; binary?: boolean }>, + directories: string[] = [] +): Uint8Array { + // Convert files to the expected format + const fileInfos = files.map(f => { + const contentBytes = typeof f.content === 'string' + ? new TextEncoder().encode(f.content) + : f.content; + + return { + path: f.path, + size: contentBytes.length, + binary: f.binary || false, + content: contentBytes + }; + }); + + // Create metadata + const metadata = { + files: fileInfos.map(f => ({ + path: f.path, + size: f.size, + binary: f.binary + })), + directories: directories + }; + + const metadataJson = new TextEncoder().encode(JSON.stringify(metadata)); + + // Create header: "PSB" + version + metadata size (4 bytes) + const header = new Uint8Array(8); + header.set(new TextEncoder().encode("PSB"), 0); + header[3] = 1; // version + + // Set metadata length (big endian) + const dataView = new DataView(header.buffer); + dataView.setUint32(4, metadataJson.length, false); + + // Combine header + metadata + file contents + const totalSize = header.length + metadataJson.length + + fileInfos.reduce((sum, f) => sum + f.content.length, 0); + + const result = new Uint8Array(totalSize); + let offset = 0; + + result.set(header, offset); + offset += header.length; + + result.set(metadataJson, offset); + offset += metadataJson.length; + + for (const fileInfo of fileInfos) { + result.set(fileInfo.content, offset); + offset += fileInfo.content.length; + } + + return result; +} + +// Mock Deno.stdin for filesystem tests +async function runPythonWithFiles( + code: string, + files: Array<{ path: string; content: string | Uint8Array; binary?: boolean }> = [], + directories: string[] = [], + options: Record = {} +) { + if (files.length === 0 && directories.length === 0) { + return await runPython(code, options); + } + + // Create the stdin data + const stdinData = createFilesystemStdin(files, directories); + + // Mock stdin for this test + const originalIsTerminal = Deno.stdin.isTerminal; + const originalRead = Deno.stdin.read; + let dataOffset = 0; + + // Mock isTerminal to return false (indicating we have stdin data) + Deno.stdin.isTerminal = () => false; + + // Mock stdin.read to return our data + Deno.stdin.read = (buffer: Uint8Array): Promise => { + if (dataOffset >= stdinData.length) { + return Promise.resolve(null); + } + + const remaining = stdinData.length - dataOffset; + const toRead = Math.min(buffer.length, remaining); + + buffer.set(stdinData.subarray(dataOffset, dataOffset + toRead)); + dataOffset += toRead; + + return Promise.resolve(toRead); + }; + + try { + return await runPython(code, options); + } finally { + // Restore original functions + Deno.stdin.isTerminal = originalIsTerminal; + Deno.stdin.read = originalRead; + } +} + Deno.test("FileSystem - operations", async () => { - const operations: FileSystemOperation[] = [ + const files = [ { - operation: "write", path: "config.json", - content: '{"app": "test", "version": "1.0"}', + content: '{"app": "test", "version": "1.0"}' }, { - operation: "mkdir", - path: "data", - }, - { - operation: "write", path: "data/output.txt", - content: "Hello World\nLine 2", + content: "Hello World\nLine 2" } ]; + + const directories = ["data"]; - const result = await runPython(` + const result = await runPythonWithFiles(` import os import json @@ -79,9 +182,7 @@ result = { } result - `, { - fileSystemOperations: operations - }); + `, files, directories); assertEquals(result.success, true); const resultObj = JSON.parse(result.jsonResult || "null"); @@ -94,28 +195,26 @@ result }); Deno.test("FileSystem - binary operations", async () => { - const operations: FileSystemOperation[] = [ + // Create binary content - "Binary data" encoded as bytes + const binaryContent = new TextEncoder().encode("Binary data"); + + const files = [ { - operation: "write", path: "test.bin", - content: "QmluYXJ5IGRhdGE=", // Base64 for "Binary data" - encoding: "binary" + content: binaryContent, + binary: true } ]; - const result = await runPython(` + const result = await runPythonWithFiles(` import os -import base64 # Read binary file with open("test.bin", "rb") as f: binary_content = f.read() # Decode content -try: - decoded = binary_content.decode('utf-8') -except UnicodeDecodeError: - decoded = base64.b64decode(binary_content).decode('utf-8') +decoded = binary_content.decode('utf-8') result = { "file_exists": os.path.exists("test.bin"), @@ -125,9 +224,7 @@ result = { } result - `, { - fileSystemOperations: operations - }); + `, files); assertEquals(result.success, true); const resultObj = JSON.parse(result.jsonResult || "null"); @@ -138,28 +235,20 @@ result }); Deno.test("FileSystem - memfs directory structure", async () => { - const operations: FileSystemOperation[] = [ - { - operation: "mkdir", - path: "project", - }, + const files = [ { - operation: "mkdir", - path: "project/src", - }, - { - operation: "write", path: "project/src/main.py", - content: "print('Hello from memfs!')", + content: "print('Hello from memfs!')" }, { - operation: "write", path: "project/README.md", - content: "# My Project\nRunning in memfs", + content: "# My Project\nRunning in memfs" } ]; + + const directories = ["project", "project/src"]; - const result = await runPython(` + const result = await runPythonWithFiles(` import os # Navigate and check structure @@ -192,9 +281,7 @@ result = { } result - `, { - fileSystemOperations: operations - }); + `, files, directories); assertEquals(result.success, true); const resultObj = JSON.parse(result.jsonResult || "null"); diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index 7af6217..40dd8aa 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -1,7 +1,6 @@ """Python wrapper that calls pyodide & deno for code execution.""" import asyncio -import base64 import dataclasses import json import logging @@ -15,8 +14,8 @@ ) from langchain_core.messages import ToolMessage from langchain_core.runnables import RunnableConfig -from langchain_core.tools import BaseTool, InjectedToolCallId, StructuredTool -from pydantic import BaseModel, Field +from langchain_core.tools import BaseTool, InjectedToolCallId +from pydantic import BaseModel, Field, PrivateAttr logger = logging.getLogger(__name__) @@ -39,54 +38,8 @@ class CodeExecutionResult: filesystem_operations: list[dict] | None = None -@dataclasses.dataclass(kw_only=True) -class FileSystemOperation: - """Container for filesystem operations. - - This class encapsulates a single filesystem operation that can be performed - within the sandboxed environment. Operations are serialized to JSON and - passed to the Deno subprocess for execution. - - Supported operations: - - write: Create or write a file - - read: Read file contents - - mkdir: Create a directory - - list: List directory contents - - exists: Check if file/directory exists - - remove: Delete file/directory - - copy: Copy file/directory - """ - - operation: Literal["read", "write", "list", "mkdir", "exists", "remove", "copy"] - path: str - content: str | None = None - encoding: str | None = None - destination: str | None = None - - def to_dict(self) -> dict[str, str]: - """Convert to dict for JSON serialization. - - Returns: - Dictionary representation suitable for JSON serialization. - """ - result = { - "operation": self.operation, - "path": self.path, - } - - if self.content is not None: - result["content"] = self.content - if self.encoding is not None: - result["encoding"] = self.encoding - if self.destination is not None: - result["destination"] = self.destination - - return result - - # Published package name -# PKG_NAME = "jsr:@langchain/pyodide-sandbox@0.0.4" -PKG_NAME = "../pyodide-sandbox-js/main.ts" # noqa: ERA001 +PKG_NAME = "jsr:@langchain/pyodide-sandbox@0.0.4" def build_permission_flag( @@ -135,7 +88,6 @@ class BasePyodideSandbox: - Limit file system access to specific directories - Control environment variable access - Prevent subprocess execution and FFI - - Attach files to in-memory filesystem before execution """ def __init__( @@ -150,7 +102,7 @@ def __init__( allow_ffi: list[str] | bool = False, node_modules_dir: str = "auto", skip_deno_check: bool = False, - enable_filesystem: bool = False, + files: dict[str, str | bytes] | None = None, ) -> None: """Initialize the sandbox with specific Deno permissions. @@ -208,12 +160,14 @@ def __init__( node_modules_dir: Directory for Node.js modules. Set to "auto" to use the default directory for Deno modules. skip_deno_check: If True, skip the check for Deno installation. - enable_filesystem: If True, enable in-memory filesystem support for - attaching files and directories to the sandbox environment. + files: Dictionary of files to attach to the sandbox filesystem. + Keys are file paths, values are file contents (str or bytes). """ self.stateful = stateful - self.enable_filesystem = enable_filesystem - self._filesystem_operations: list[FileSystemOperation] = [] + # List to store file information for binary streaming + self._sandbox_files = [] + # List to store directory paths + self._sandbox_dirs = [] if not skip_deno_check: # Check if Deno is installed @@ -250,122 +204,141 @@ def __init__( self.permissions.append(f"--node-modules-dir={node_modules_dir}") + # Attach files if provided during initialization + if files: + for path, content in files.items(): + self.attach_file(path, content) + def attach_file( self, path: str, - content: str, - *, - encoding: str = "utf-8", + content: str | bytes, ) -> None: - """Attach a text file to the sandbox filesystem. + """Attach a file to the sandbox filesystem using binary streaming. - This method queues a file to be created in the sandbox's in-memory - filesystem when code is executed. The file will be available for - reading and manipulation within the Python environment. + Files are stored in memory and streamed to the sandbox process via stdin + using a binary protocol. Both text and binary files are supported. Args: - path: File path within the sandbox filesystem - content: Text content of the file - encoding: Text encoding (default: utf-8) + path: Path where the file should be available in the sandbox + content: File content as string (for text files) or bytes (for binary files) Raises: - TypeError: If content is not a string + TypeError: If content is neither string nor bytes """ - self.enable_filesystem = True - - if not isinstance(content, str): - msg = "Content must be a string for text files" + if isinstance(content, str): + # Text file - convert to bytes + content_bytes = content.encode("utf-8") + self._sandbox_files.append( + { + "path": path, + "content": content_bytes, + "size": len(content_bytes), + "binary": False, + } + ) + logger.debug( + "Attached text file: %s (%d bytes)", + path, + len(content_bytes), + ) + elif isinstance(content, bytes): + # Binary file + self._sandbox_files.append( + {"path": path, "content": content, "size": len(content), "binary": True} + ) + logger.debug( + "Attached binary file: %s (%d bytes)", + path, + len(content), + ) + else: + msg = f"Content must be either a string or bytes, got {type(content)}" raise TypeError(msg) - operation = FileSystemOperation( - operation="write", - path=path, - content=content, - encoding=encoding, - ) - self._filesystem_operations.append(operation) - logger.debug( - "Attached file: %s (%d chars, encoding: %s)", - path, - len(content), - encoding, - ) - - def attach_binary_file( - self, - path: str, - content: bytes, - ) -> None: - """Attach a binary file to the sandbox filesystem. - - This method queues a binary file to be created in the sandbox's in-memory - filesystem when code is executed. The content is base64-encoded for - transport to the sandbox environment. + def create_directory(self, path: str) -> None: + """Create a directory in the sandbox filesystem. Args: - path: File path within the sandbox filesystem - content: Binary content of the file + path: Directory path to create in the sandbox + """ + self._sandbox_dirs.append(path) + logger.debug("Created directory: %s", path) - Raises: - TypeError: If content is not bytes + def clear_filesystem(self) -> dict[str, int]: + """Remove all files and directories from the sandbox filesystem. + + Returns: + Dictionary with counts of removed files and directories """ - self.enable_filesystem = True + files_count = len(self._sandbox_files) + dirs_count = len(self._sandbox_dirs) - if not isinstance(content, bytes): - msg = "Content must be bytes for binary files" - raise TypeError(msg) + self._sandbox_files.clear() + self._sandbox_dirs.clear() - b64_content = base64.b64encode(content).decode("ascii") - operation = FileSystemOperation( - operation="write", - path=path, - content=b64_content, - encoding="binary", - ) - self._filesystem_operations.append(operation) - logger.debug( - "Attached binary file: %s (%d bytes -> %d b64 chars)", - path, - len(content), - len(b64_content), - ) + logger.debug("Cleared %d files and %d directories", files_count, dirs_count) + return {"files": files_count, "directories": dirs_count} - def create_directory(self, path: str) -> None: - """Create a directory in the sandbox filesystem. + def get_attached_files(self) -> list[str]: + """Get list of attached file paths. + + Returns: + List of file paths currently attached to the sandbox + """ + return [f["path"] for f in self._sandbox_files] - This method queues a directory to be created in the sandbox's in-memory - filesystem when code is executed. + def has_file(self, path: str) -> bool: + """Check if a file is attached to the sandbox. Args: - path: Directory path within the sandbox filesystem + path: Path to check + + Returns: + True if file exists, False otherwise """ - self.enable_filesystem = True + return any(f["path"] == path for f in self._sandbox_files) - operation = FileSystemOperation( - operation="mkdir", - path=path, - ) - self._filesystem_operations.append(operation) - logger.debug("Created directory: %s", path) + def _prepare_stdin_data(self) -> bytes | None: + """Prepare data to be sent via stdin using binary streaming protocol. - def get_attached_files(self) -> list[str]: - """Get list of attached file paths. + Creates a binary stream containing filesystem data when files or directories + are attached. Uses the PSB (Pyodide Sandbox Binary) protocol format: + - Header: "PSB" + version(1 byte) + metadata_length(4 bytes) + - Metadata: JSON describing files and directories + - Content: Raw binary content of all files in sequence Returns: - List of file paths that will be available in the sandbox filesystem + Binary data to send via stdin, or None if no filesystem operations """ - return [ - op.path for op in self._filesystem_operations if op.operation == "write" - ] + # Use binary protocol if we have files or directories + if not self._sandbox_files and not self._sandbox_dirs: + # No files, return None to avoid sending stdin + return None + + # Format: "PSB" + version + length(4 bytes) + metadata JSON + file data + metadata = { + "files": [ + {"path": f["path"], "size": f["size"], "binary": f["binary"]} + for f in self._sandbox_files + ], + "directories": self._sandbox_dirs, + } - def clear_filesystem_operations(self) -> None: - """Clear all queued filesystem operations. + metadata_json = json.dumps(metadata).encode("utf-8") - This removes all files and directories that were queued to be created - in the sandbox filesystem. - """ - self._filesystem_operations.clear() - logger.debug("Cleared filesystem operations") + # Create header: "PSB" + version + metadata size (4 bytes) + header = b"PSB\x01" + len(metadata_json).to_bytes(4, byteorder="big") + + # Concatenate header + metadata + result = bytearray(header) + result.extend(metadata_json) + + # Add file contents directly as binary data + for file_info in self._sandbox_files: + result.extend(file_info["content"]) + + return bytes(result) def _build_command( self, @@ -375,7 +348,7 @@ def _build_command( session_metadata: dict | None = None, memory_limit_mb: int | None = None, ) -> list[str]: - """Build the Deno command with necessary arguments, using stdin for file operations. + """Build the Deno command with all necessary arguments. Args: code: The Python code to execute @@ -414,16 +387,62 @@ def _build_command( if session_metadata: cmd.extend(["-m", json.dumps(session_metadata)]) - + return cmd +def _process_execution_output( + stdout_text: str, + stderr_bytes: bytes, +) -> tuple[ + str, str, Any, str, dict | None, dict | None, list[dict] | None, bytes | None +]: + """Process execution output and return parsed results. + + Returns: + Tuple of (stdout, stderr, result, status, session_metadata, + filesystem_info, filesystem_operations, session_bytes) + """ + if stdout_text: + try: + full_result = json.loads(stdout_text) + stdout = full_result.get("stdout", "") + stderr = full_result.get("stderr", "") + result = full_result.get("result", None) + status = "success" if full_result.get("success", False) else "error" + session_metadata = full_result.get("sessionMetadata", None) + filesystem_info = full_result.get("fileSystemInfo", None) + filesystem_operations = full_result.get("fileSystemOperations", None) + + # Convert array of bytes to Python bytes + session_bytes_array = full_result.get("sessionBytes", None) + session_bytes = bytes(session_bytes_array) if session_bytes_array else None + + return ( + stdout, + stderr, + result, + status, + session_metadata, + filesystem_info, + filesystem_operations, + session_bytes, + ) + except json.JSONDecodeError as e: + status = "error" + stderr = f"Failed to parse output as JSON: {e}\nRaw output: {stdout_text}" + return ("", stderr, None, status, None, None, None, None) + + stderr = stderr_bytes.decode("utf-8", errors="replace") + return ("", stderr, None, "error", None, None, None, None) + + class PyodideSandbox(BasePyodideSandbox): """Asynchronous implementation of PyodideSandbox. This class provides an asynchronous interface for executing Python code in a sandboxed Deno environment using Pyodide. It supports file attachment and - in-memory filesystem operations. + in-memory filesystem operations via binary streaming. """ async def execute( @@ -442,9 +461,6 @@ async def execute( environment. The execution is subject to the permissions configured in the sandbox's initialization and the resource constraints provided as arguments. - Any attached files will be made available in the sandbox's in-memory - filesystem before code execution begins. - Args: code: The Python code to execute in the sandbox session_bytes: Optional bytes containing session state @@ -456,11 +472,8 @@ async def execute( CodeExecutionResult containing execution results and metadata """ start_time = time.time() - stdout = "" - stderr = "" - result = None - status: Literal["success", "error"] = "success" + # Build the command with all necessary arguments cmd = self._build_command( code, session_bytes=session_bytes, @@ -468,81 +481,70 @@ async def execute( memory_limit_mb=memory_limit_mb, ) - # Preparar dados para envio via stdin - stdin_data = { - "fileSystemOperations": [op.to_dict() for op in self._filesystem_operations] - if self._filesystem_operations else [] - } - - # Codificar dados para stdin - stdin_json = json.dumps(stdin_data).encode("utf-8") - - # Corrigido: Não usar stdin com asyncio devido a problemas de compatibilidade - # Usaremos temporariamente uma abordagem de argumento até resolver o problema de stdin - # na versão assíncrona - fs_ops_json = json.dumps([op.to_dict() for op in self._filesystem_operations]) - # Adicionar dados de filesystem ao comando - if self._filesystem_operations or self.enable_filesystem: - cmd.extend(["-x", fs_ops_json]) - - # Create and run the subprocess - process = await asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) + # Prepare stdin data with filesystem operations (always binary streaming) + stdin_data = self._prepare_stdin_data() try: - # Wait for process with a timeout + # Configure process + process = await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE if stdin_data else None, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + # Send stdin data if we have filesystem operations + communicate_args = {} + if stdin_data: + communicate_args["input"] = stdin_data + + # Wait for the process with timeout stdout_bytes, stderr_bytes = await asyncio.wait_for( - process.communicate(), + process.communicate(**communicate_args), timeout=timeout_seconds, ) - stdout = stdout_bytes.decode("utf-8", errors="replace") - - if stdout: - # stdout encodes the full result from the sandbox. - # including stdout, stderr, and the json result. - full_result = json.loads(stdout) - stdout = full_result.get("stdout", None) - stderr = full_result.get("stderr", None) - result = full_result.get("result", None) - status = "success" if full_result.get("success", False) else "error" - session_metadata = full_result.get("sessionMetadata", None) - filesystem_info = full_result.get("fileSystemInfo", None) - filesystem_operations = full_result.get("fileSystemOperations", None) - # Convert the Uint8Array to Python bytes - session_bytes_array = full_result.get("sessionBytes", None) - session_bytes = ( - bytes(session_bytes_array) if session_bytes_array else None - ) - else: - stderr = stderr_bytes.decode("utf-8", errors="replace") - status = "error" - filesystem_info = None - filesystem_operations = None + + # Process the output + stdout_text = stdout_bytes.decode("utf-8", errors="replace") + ( + stdout, + stderr, + result, + status, + session_metadata, + filesystem_info, + filesystem_operations, + session_bytes, + ) = _process_execution_output(stdout_text, stderr_bytes) + except asyncio.TimeoutError: - process.kill() - await process.wait() + if process: + process.kill() + await process.wait() status = "error" stderr = f"Execution timed out after {timeout_seconds} seconds" + stdout = "" + result = None + session_metadata = None filesystem_info = None filesystem_operations = None - except json.JSONDecodeError as e: + session_bytes = None + except (OSError, subprocess.SubprocessError) as e: status = "error" - stderr = f"Failed to parse output as JSON: {e}\nRaw output: {stdout}" + stderr = f"Error during execution: {e!s}" + stdout = "" + result = None + session_metadata = None filesystem_info = None filesystem_operations = None - except asyncio.CancelledError: - # Optionally: log cancellation if needed - pass + session_bytes = None end_time = time.time() return CodeExecutionResult( status=status, execution_time=end_time - start_time, - stdout=stdout or None, + stdout=stdout, stderr=stderr or None, result=result, session_metadata=session_metadata, @@ -555,8 +557,8 @@ async def execute( class SyncPyodideSandbox(BasePyodideSandbox): """Synchronous version of PyodideSandbox. - This class provides a synchronous interface to the PyodideSandbox functionality, - including file attachment and in-memory filesystem operations. + This class provides a synchronous interface to the PyodideSandbox functionality. + It supports the same features as the asynchronous version but in a blocking manner. """ def execute( @@ -571,8 +573,7 @@ def execute( """Execute Python code synchronously in a sandboxed Deno subprocess. This method provides the same functionality as PyodideSandbox.execute() but - in a synchronous/blocking manner. Any attached files will be made available - in the sandbox's in-memory filesystem before code execution begins. + in a synchronous/blocking manner. Args: code: The Python code to execute in the sandbox @@ -585,11 +586,8 @@ def execute( CodeExecutionResult containing execution results and metadata """ start_time = time.time() - stdout = "" - result = None - stderr: str - status: Literal["success", "error"] + # Build command cmd = self._build_command( code, session_bytes=session_bytes, @@ -597,73 +595,59 @@ def execute( memory_limit_mb=memory_limit_mb, ) - # Preparar dados para envio via stdin - stdin_data = { - "fileSystemOperations": [op.to_dict() for op in self._filesystem_operations] - if self._filesystem_operations else [] - } - - # Codificar dados para stdin - stdin_json = json.dumps(stdin_data).encode("utf-8") + # Prepare stdin data with filesystem operations (always binary streaming) + stdin_data = self._prepare_stdin_data() try: - # Run the subprocess with timeout and stdin data + # Execute the subprocess with stdin data # Ignoring S603 for subprocess.run as the cmd is built safely. # Untrusted input comes from `code` parameter, which should be # escaped properly as we are **not** using shell=True. process = subprocess.run( # noqa: S603 cmd, - input=stdin_json, # Passar dados via stdin + input=stdin_data, capture_output=True, text=False, # Keep as bytes for proper decoding timeout=timeout_seconds, check=False, # Don't raise on non-zero exit ) - stdout_bytes = process.stdout - stderr_bytes = process.stderr - - stdout = stdout_bytes.decode("utf-8", errors="replace") - - if stdout: - # stdout encodes the full result from the sandbox - # including stdout, stderr, and the json result - full_result = json.loads(stdout) - stdout = full_result.get("stdout", None) - stderr = full_result.get("stderr", None) - result = full_result.get("result", None) - status = "success" if full_result.get("success", False) else "error" - session_metadata = full_result.get("sessionMetadata", None) - filesystem_info = full_result.get("fileSystemInfo", None) - filesystem_operations = full_result.get("fileSystemOperations", None) - # Convert the Uint8Array to Python bytes - session_bytes_array = full_result.get("sessionBytes", None) - session_bytes = ( - bytes(session_bytes_array) if session_bytes_array else None - ) - else: - stderr = stderr_bytes.decode("utf-8", errors="replace") - status = "error" - filesystem_info = None - filesystem_operations = None + # Process the output + stdout_text = process.stdout.decode("utf-8", errors="replace") + ( + stdout, + stderr, + result, + status, + session_metadata, + filesystem_info, + filesystem_operations, + session_bytes, + ) = _process_execution_output(stdout_text, process.stderr) except subprocess.TimeoutExpired: status = "error" stderr = f"Execution timed out after {timeout_seconds} seconds" + stdout = "" + result = None filesystem_info = None filesystem_operations = None - except json.JSONDecodeError as e: + session_bytes = None + except (OSError, subprocess.SubprocessError) as e: status = "error" - stderr = f"Failed to parse output as JSON: {e}\nRaw output: {stdout}" + stderr = f"Error during execution: {e!s}" + stdout = "" + result = None filesystem_info = None filesystem_operations = None + session_bytes = None end_time = time.time() return CodeExecutionResult( status=status, execution_time=end_time - start_time, - stdout=stdout or None, + stdout=stdout, stderr=stderr or None, result=result, session_metadata=session_metadata, @@ -685,23 +669,25 @@ class PyodideSandboxTool(BaseTool): inside a LangGraph graph with a checkpointer, and has to be used with the prebuilt `create_react_agent` or `ToolNode`. - Example: stateless sandbox usage + Example: stateless sandbox usage with file attachment ```python from langgraph.prebuilt import create_react_agent from langchain_sandbox import PyodideSandboxTool - tool = PyodideSandboxTool(enable_filesystem=True, allow_net=True) - - # Attach data files - tool.attach_file("data.csv", "name,age\\nJohn,25\\nMary,30") + # Attach CSV data to the sandbox + csv_data = "name,age\nJohn,30\nJane,25" + tool = PyodideSandboxTool( + allow_net=True, + files={"data.csv": csv_data} + ) agent = create_react_agent( "anthropic:claude-3-7-sonnet-latest", tools=[tool], ) result = await agent.ainvoke( - {"messages": [{"role": "user", "content": "analyze the data.csv file"}]}, + {"messages": [{"role": "user", "content": "analyze the data in data.csv"}]}, ) ``` @@ -711,13 +697,13 @@ class PyodideSandboxTool(BaseTool): from langgraph.prebuilt import create_react_agent from langgraph.prebuilt.chat_agent_executor import AgentState from langgraph.checkpoint.memory import InMemorySaver - from langchain_sandbox import PyodideSandboxTool, PyodideSandbox + from langchain_sandbox import PyodideSandboxTool class State(AgentState): session_bytes: bytes session_metadata: dict - tool = PyodideSandboxTool(stateful=True, enable_filesystem=True, allow_net=True) + tool = PyodideSandboxTool(stateful=True, allow_net=True) agent = create_react_agent( "anthropic:claude-3-7-sonnet-latest", tools=[tool], @@ -742,34 +728,75 @@ class State(AgentState): """ name: str = "python_code_sandbox" - description: str = ( - "A secure Python code sandbox with filesystem support. " - "Use this to execute python commands.\n" - "- Input should be a valid python command.\n" - "- To return output, you should print it out with `print(...)`.\n" - "- Don't use f-strings when printing outputs.\n" - "- If you need to make web requests, use `httpx.AsyncClient`.\n" - "- Files can be read/written using standard Python file operations.\n" - "- All file operations work within a sandboxed memory filesystem.\n" - "- Check for attached files using: import os; print(os.listdir('.'))" + + # Field description with default value + description: str = Field( + default="A secure Python code sandbox with filesystem support." ) # Mirror the PyodideSandbox constructor arguments - stateful: bool = False - allow_env: list[str] | bool = False - allow_read: list[str] | bool = False - allow_write: list[str] | bool = False - allow_net: list[str] | bool = False - allow_run: list[str] | bool = False - allow_ffi: list[str] | bool = False - timeout_seconds: float | None - """Timeout for code execution in seconds. By default set to 60 seconds.""" - node_modules_dir: str = "auto" - enable_filesystem: bool = False - - _sandbox: PyodideSandbox - _sync_sandbox: SyncPyodideSandbox - _structured_tool: StructuredTool | None + stateful: bool = Field(default=False) + allow_env: list[str] | bool = Field(default=False) + allow_read: list[str] | bool = Field(default=False) + allow_write: list[str] | bool = Field(default=False) + allow_net: list[str] | bool = Field(default=False) + allow_run: list[str] | bool = Field(default=False) + allow_ffi: list[str] | bool = Field(default=False) + timeout_seconds: float | None = Field( + default=60.0, + description="Timeout for code execution in seconds. " + "By default set to 60 seconds.", + ) + node_modules_dir: str = Field(default="auto") + + # Private attributes using PrivateAttr + _description_template: str = PrivateAttr( + default=( + "A secure Python code sandbox with filesystem support. " + "Use this to execute python commands.\n" + "- Input should be a valid python command.\n" + "- To return output, you should print it out with `print(...)`.\n" + "- Don't use f-strings when printing outputs.\n" + "- If you need to make web requests, use `httpx.AsyncClient`.\n" + "- Files can be read/written using standard Python file operations.\n" + "{available_files}" + ) + ) + _sandbox: PyodideSandbox | None = PrivateAttr(default=None) + _sync_sandbox: SyncPyodideSandbox | None = PrivateAttr(default=None) + + def model_post_init(self, /, __context) -> None: + """Initialize sandboxes after Pydantic model initialization.""" + super().model_post_init(__context) + + # Define args_schema based on stateful configuration + if self.stateful: + try: + from langgraph.prebuilt import InjectedState + + class PyodideSandboxToolInput(BaseModel): + """Python code to execute in the sandbox.""" + + code: str = Field(description="Code to execute.") + # these fields will be ignored by the LLM + # and automatically injected by LangGraph's ToolNode + state: Annotated[dict[str, Any] | BaseModel, InjectedState] + tool_call_id: Annotated[str, InjectedToolCallId] + + except ImportError as e: + error_msg = ( + "The 'langgraph' package is required when using a stateful sandbox." + " Please install it with 'pip install langgraph'." + ) + raise ImportError(error_msg) from e + else: + + class PyodideSandboxToolInput(BaseModel): + """Python code to execute in the sandbox.""" + + code: str = Field(description="Code to execute.") + + self.args_schema = PyodideSandboxToolInput def __init__( self, @@ -777,7 +804,8 @@ def __init__( stateful: bool = False, timeout_seconds: float | None = 60, allow_net: list[str] | bool = False, - enable_filesystem: bool = False, + files: dict[str, str | bytes] | None = None, + description: str | None = None, **kwargs: dict[str, Any], ) -> None: """Initialize the tool. @@ -788,59 +816,39 @@ def __init__( session state (variables, imports, etc.) in the execution result. This allows saving and reusing the session state between executions. timeout_seconds: Timeout for code execution in seconds. - enable_filesystem: Enable in-memory filesystem support for attaching files. allow_net: configure network access. If setting to True, any network access is allowed, including potentially internal network addresses that you may not want to expose to a malicious actor. Depending on your use case, you can restrict the network access to only the URLs you need (e.g., required to set up micropip / pyodide). Please refer to pyodide documentation for more details. + files: Dictionary of files to attach to the sandbox filesystem. + Keys are file paths, values are file contents (str or bytes). + description: Custom description template for the tool. **kwargs: Other attributes will be passed to the PyodideSandbox """ - if stateful: - try: - from langgraph.prebuilt import InjectedState - except ImportError as e: - error_msg = ( - "The 'langgraph' package is required when using a stateful sandbox." - " Please install it with 'pip install langgraph'." - ) - raise ImportError(error_msg) from e - - class PyodideSandboxToolInput(BaseModel): - """Python code to execute in the sandbox.""" - - code: str = Field(description="Code to execute.") - # these fields will be ignored by the LLM - # and automatically injected by LangGraph's ToolNode - state: Annotated[dict[str, Any] | BaseModel, InjectedState] - tool_call_id: Annotated[str, InjectedToolCallId] - - else: - - class PyodideSandboxToolInput(BaseModel): - """Python code to execute in the sandbox.""" - - code: str = Field(description="Code to execute.") + # Prepare arguments for super().__init__ + init_kwargs = { + "stateful": stateful, + "timeout_seconds": timeout_seconds, + "allow_net": allow_net, + "allow_env": kwargs.get("allow_env", False), + "allow_read": kwargs.get("allow_read", False), + "allow_write": kwargs.get("allow_write", False), + "allow_run": kwargs.get("allow_run", False), + "allow_ffi": kwargs.get("allow_ffi", False), + "node_modules_dir": kwargs.get("node_modules_dir", "auto"), + } - super().__init__( - stateful=stateful, - timeout_seconds=timeout_seconds, - allow_net=allow_net, - enable_filesystem=enable_filesystem, - **kwargs, - ) + # Set custom description template if provided + if description is not None: + self._description_template = description + init_kwargs["description"] = description - # Store initialization parameters - self.allow_env = kwargs.get("allow_env", False) - self.allow_read = kwargs.get("allow_read", False) - self.allow_write = kwargs.get("allow_write", False) - self.allow_run = kwargs.get("allow_run", False) - self.allow_ffi = kwargs.get("allow_ffi", False) - self.node_modules_dir = kwargs.get("node_modules_dir", "auto") + # Call super().__init__() first + super().__init__(**init_kwargs) - self.args_schema: type[BaseModel] = PyodideSandboxToolInput - self._structured_tool = None # Initialize as None + # Create sandbox instances after initialization self._sandbox = PyodideSandbox( stateful=self.stateful, allow_env=self.allow_env, @@ -850,8 +858,9 @@ class PyodideSandboxToolInput(BaseModel): allow_run=self.allow_run, allow_ffi=self.allow_ffi, node_modules_dir=self.node_modules_dir, - enable_filesystem=self.enable_filesystem, + files=files, ) + # Initialize sync sandbox with deno check skipped since async sandbox already # checked self._sync_sandbox = SyncPyodideSandbox( @@ -863,212 +872,93 @@ class PyodideSandboxToolInput(BaseModel): allow_run=self.allow_run, allow_ffi=self.allow_ffi, node_modules_dir=self.node_modules_dir, - enable_filesystem=self.enable_filesystem, skip_deno_check=True, # Skip deno check since async sandbox already checked + files=files, ) + # Update description with attached files + self.description = self._build_description() + def _build_description(self) -> str: - """Build the complete description string with attached files. + """Build the complete description string with attached files information. Returns: - Tool description including information about attached files + Complete description string including file information """ - base = ( - "A secure Python code sandbox with filesystem support. " - "Use this to execute python commands.\n" - "- Input should be a valid python command.\n" - "- To return output, you should print it out with `print(...)`.\n" - "- Don't use f-strings when printing outputs.\n" - "- If you need to make web requests, use `httpx.AsyncClient`.\n" - "- Files can be read/written using standard Python file operations." - ) - files = self._sandbox.get_attached_files() if files: - base += "\n\nATTACHED FILES AVAILABLE:\n" - base += "\n".join(f" • {p}" for p in files) - base += ( - "\nThese files are already loaded and ready to use with pandas, " - "open(), etc." + available_files = ( + "\n\nATTACHED FILES AVAILABLE:\n" + + "\n".join(f" • {p}" for p in files) + + "\nThese files are already loaded and ready to use " + "with pandas, open(), etc." ) - return base + else: + available_files = "" - def attach_file( - self, - path: str, - content: str, - *, - encoding: str = "utf-8", - ) -> None: - """Attach a text file to the sandbox environment. + return self._description_template.format(available_files=available_files) - This file will be available in the sandbox's in-memory filesystem - when code is executed. The tool's description will be automatically - updated to reflect the attached files. + def _update_description(self) -> None: + """Update the description with current file information.""" + self.description = self._build_description() - Args: - path: File path within the sandbox filesystem - content: Text content of the file - encoding: Text encoding (default: utf-8) - """ - self._sandbox.attach_file(path, content, encoding=encoding) - self._sync_sandbox.attach_file(path, content, encoding=encoding) - # Update both BaseTool and StructuredTool descriptions - new_description = self._build_description() - self.description = new_description - if self._structured_tool: - self._structured_tool.description = new_description - - def attach_binary_file( + def attach_file( self, path: str, - content: bytes, + content: str | bytes, ) -> None: - """Attach a binary file to the sandbox environment. - - This file will be available in the sandbox's in-memory filesystem - when code is executed. The tool's description will be automatically - updated to reflect the attached files. + """Attach a file to the sandbox environment. Args: - path: File path within the sandbox filesystem - content: Binary content of the file + path: Path where the file should be available in the sandbox + content: File content as string (for text files) or bytes (for binary files) """ - self._sandbox.attach_binary_file(path, content) - self._sync_sandbox.attach_binary_file(path, content) - # Update both BaseTool and StructuredTool descriptions - new_description = self._build_description() - self.description = new_description - if self._structured_tool: - self._structured_tool.description = new_description + self._sandbox.attach_file(path, content) + self._sync_sandbox.attach_file(path, content) + self._update_description() def create_directory(self, path: str) -> None: """Create a directory in the sandbox environment. - This directory will be available in the sandbox's in-memory filesystem - when code is executed. - Args: - path: Directory path within the sandbox filesystem + path: Directory path to create in the sandbox """ self._sandbox.create_directory(path) self._sync_sandbox.create_directory(path) - # Update both BaseTool and StructuredTool descriptions - new_description = self._build_description() - self.description = new_description - if self._structured_tool: - self._structured_tool.description = new_description + self._update_description() - def get_attached_files(self) -> list[str]: - """Get list of attached file paths. + def clear_filesystem(self) -> dict[str, int]: + """Remove all files and directories from the sandbox environment. Returns: - List of file paths that will be available in the sandbox filesystem + Dictionary with counts of removed files and directories """ - return self._sandbox.get_attached_files() + result_async = self._sandbox.clear_filesystem() + result_sync = self._sync_sandbox.clear_filesystem() + self._update_description() + return { + "files": max(result_async["files"], result_sync["files"]), + "directories": max(result_async["directories"], result_sync["directories"]), + } - def clear_filesystem_operations(self) -> None: - """Clear all attached files and directories. + def get_attached_files(self) -> list[str]: + """Get list of attached file paths. - This removes all files and directories that were queued to be created - in the sandbox filesystem and updates the tool description. + Returns: + List of file paths currently attached to the sandbox """ - self._sandbox.clear_filesystem_operations() - self._sync_sandbox.clear_filesystem_operations() - # Update both BaseTool and StructuredTool descriptions - new_description = self._build_description() - self.description = new_description - if self._structured_tool: - self._structured_tool.description = new_description + return self._sandbox.get_attached_files() - def as_structured_tool(self) -> StructuredTool: - """Return a StructuredTool version of this tool. + def has_file(self, path: str) -> bool: + """Check if a file is attached to the sandbox. - This method provides access to a StructuredTool interface while maintaining - the BaseTool as the primary interface. The StructuredTool's description - is kept in sync with attached files. + Args: + path: Path to check Returns: - StructuredTool instance with dynamic description updates + True if file exists, False otherwise """ - if self._structured_tool is None: - self._structured_tool = StructuredTool.from_function( - name=self.name, - description=self._build_description(), - func=(self._run_sync if not self.stateful else self._run_stateful_sync), - args_schema=self.args_schema, - ) - return self._structured_tool - - def _run_sync(self, code: str) -> str: - """Synchronous execution function for non-stateful mode.""" - result = self._sync_sandbox.execute(code, timeout_seconds=self.timeout_seconds) - - if result.status == "error": - error_msg = ( - result.stderr - if result.stderr - else "Execution failed with unknown error" - ) - return f"Error during execution: {error_msg}" - - if result.stdout: - # Ensure newlines are preserved - return result.stdout - - if result.result is not None: - return str(result.result) - - return "" - - def _run_stateful_sync( - self, - code: str, - state: dict[str, Any] | BaseModel, - tool_call_id: str, - ) -> Any: # noqa: ANN401 - """Synchronous execution function for stateful mode.""" - required_keys = {"session_bytes", "session_metadata", "messages"} - actual_keys = set(state) if isinstance(state, dict) else set(state.__dict__) - if missing_keys := required_keys - actual_keys: - error_msg = ( - f"Input state is missing the following required keys: {missing_keys}" - ) - raise ValueError(error_msg) - - if isinstance(state, dict): - session_bytes = state["session_bytes"] - session_metadata = state["session_metadata"] - else: - session_bytes = state.session_bytes - session_metadata = state.session_metadata - - result = self._sync_sandbox.execute( - code, - session_bytes=session_bytes, - session_metadata=session_metadata, - timeout_seconds=self.timeout_seconds, - ) - - if result.stderr: - tool_result = f"Error during execution: {result.stderr}" - else: - tool_result = result.stdout - - from langgraph.types import Command - - return Command( - update={ - "session_bytes": result.session_bytes, - "session_metadata": result.session_metadata, - "messages": [ - ToolMessage( - content=tool_result, - tool_call_id=tool_call_id, - ) - ], - } - ) + return self._sandbox.has_file(path) def _run( self, diff --git a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py index 2ca207b..bb471f8 100644 --- a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py +++ b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py @@ -57,10 +57,9 @@ def get_default_sync_sandbox(stateful: bool = False) -> SyncPyodideSandbox: ) -def test_pyodide_sandbox_tool() -> None: +def test_pyodide_sandbox_tool(pyodide_package: None) -> None: """Test synchronous invocation of PyodideSandboxTool.""" tool = PyodideSandboxTool( - enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True, @@ -74,7 +73,6 @@ def test_pyodide_sandbox_tool() -> None: def test_pyodide_timeout() -> None: """Test synchronous invocation of PyodideSandboxTool with timeout.""" tool = PyodideSandboxTool( - enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True, @@ -84,10 +82,9 @@ def test_pyodide_timeout() -> None: assert "timed out after 0.1 seconds" in result -async def test_async_pyodide_sandbox_tool() -> None: +async def test_async_pyodide_sandbox_tool(pyodide_package: None) -> None: """Test asynchronous invocation of PyodideSandboxTool.""" tool = PyodideSandboxTool( - enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True, @@ -101,7 +98,6 @@ async def test_async_pyodide_sandbox_tool() -> None: async def test_async_pyodide_timeout() -> None: """Test asynchronous invocation of PyodideSandboxTool with timeout.""" tool = PyodideSandboxTool( - enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True, @@ -114,7 +110,7 @@ async def test_async_pyodide_timeout() -> None: async def test_stdout_sessionless(pyodide_package: None) -> None: """Test without a session ID.""" sandbox = get_default_sandbox() - # Execute a simple piece of code synchronously + # Execute a simple piece of code asynchronously result = await sandbox.execute("x = 5; print(x); x") assert result.status == "success" assert result.stdout == "5" @@ -143,7 +139,7 @@ async def test_session_state_persistence_basic(pyodide_package: None) -> None: assert result1.result is None assert result2.status == "success", f"Encountered error: {result2.stderr}" assert result2.stdout == "10" - assert result1.result is None + assert result2.result is None async def test_pyodide_sandbox_error_handling(pyodide_package: None) -> None: @@ -201,7 +197,7 @@ def test_sync_session_state_persistence_basic(pyodide_package: None) -> None: assert result1.result is None assert result2.status == "success", f"Encountered error: {result2.stderr}" assert result2.stdout == "10" - assert result1.result is None + assert result2.result is None def test_sync_pyodide_sandbox_error_handling(pyodide_package: None) -> None: @@ -233,7 +229,6 @@ def test_sync_pyodide_sandbox_timeout(pyodide_package: None) -> None: async def test_filesystem_basic_operations(pyodide_package: None) -> None: """Test basic filesystem operations.""" sandbox = PyodideSandbox( - enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True, @@ -281,10 +276,9 @@ async def test_filesystem_basic_operations(pyodide_package: None) -> None: assert "Processing complete!" in result.stdout -def test_filesystem_tool_usage() -> None: +def test_filesystem_tool_usage(pyodide_package: None) -> None: """Test filesystem with PyodideSandboxTool.""" tool = PyodideSandboxTool( - enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True, @@ -315,7 +309,6 @@ def test_filesystem_tool_usage() -> None: async def test_binary_file_operations(pyodide_package: None) -> None: """Test binary file operations.""" sandbox = PyodideSandbox( - enable_filesystem=True, allow_net=True, allow_read=True, allow_write=True, @@ -323,7 +316,8 @@ async def test_binary_file_operations(pyodide_package: None) -> None: # Create some binary data binary_data = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01" - sandbox.attach_binary_file("image.png", binary_data) + # Use attach_file which supports binary data + sandbox.attach_file("image.png", binary_data) code = """ import base64 @@ -338,7 +332,6 @@ async def test_binary_file_operations(pyodide_package: None) -> None: print(f"Is PNG: {is_png}") print(f"Size: {size} bytes") -print(f"Original size: {len(data)}") # Debug """ result = await sandbox.execute(code) @@ -346,3 +339,41 @@ async def test_binary_file_operations(pyodide_package: None) -> None: assert "Is PNG: True" in result.stdout # Verify the size matches the binary data size assert f"Size: {len(binary_data)} bytes" in result.stdout + + +async def test_large_file_attachment(pyodide_package: None) -> None: + """Test attaching a large file to the sandbox.""" + sandbox = PyodideSandbox( + allow_read=True, + allow_write=True, + ) + + # Generate a test file with a simple pattern + size_mb = 5 # 5MB is sufficient to test streaming + size_bytes = size_mb * 1024 * 1024 + + # Generate test content + large_data = bytes([i % 256 for i in range(size_bytes)]) + + sandbox.attach_file("large_file.bin", large_data) + + # Verify that the file was attached correctly + code = """ +import os + +file_path = "large_file.bin" +exists = os.path.exists(file_path) +size = os.path.getsize(file_path) if exists else 0 + +print(f"File exists: {exists}") +print(f"File size: {size} bytes") +print("Verification completed successfully!") +""" + + # Execute the code that verifies the file + result = await sandbox.execute(code) + + assert result.status == "success", f"Failed to verify file: {result.stderr}" + assert "File exists: True" in result.stdout + assert f"File size: {size_bytes} bytes" in result.stdout + assert "Verification completed successfully!" in result.stdout From 5cd6849a7e5d2f46356ac5000856aea15ad28e86 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Sun, 1 Jun 2025 16:11:32 -0300 Subject: [PATCH 21/27] fix(pyodide): unrestrict file system access to ensure Pyodide can load dependencies --- libs/pyodide-sandbox-js/main_test.ts | 10 +- libs/sandbox-py/langchain_sandbox/pyodide.py | 7 ++ .../tests/unit_tests/test_pyodide_sandbox.py | 119 ++++++++---------- 3 files changed, 66 insertions(+), 70 deletions(-) diff --git a/libs/pyodide-sandbox-js/main_test.ts b/libs/pyodide-sandbox-js/main_test.ts index 77a1b4a..3b335d5 100644 --- a/libs/pyodide-sandbox-js/main_test.ts +++ b/libs/pyodide-sandbox-js/main_test.ts @@ -1,4 +1,4 @@ -import { assertEquals, assertNotEquals } from "@std/assert"; +import { assertEquals, assertExists, assertNotEquals } from "@std/assert"; import { runPython, resolvePathInSandbox } from "./main.ts"; Deno.test("runPython simple test", async () => { @@ -15,6 +15,14 @@ Deno.test("runPython with stdout", async () => { assertEquals(result.stderr?.length, 0); }); +Deno.test("runPython with error - name error", async () => { + const result = await runPython("undefined_variable", {}); + assertEquals(result.success, false); + assertExists(result.error); + // Check that error contains NameError + assertEquals(result.error?.includes("NameError"), true); +}); + Deno.test("runPython with error - division by zero", async () => { const result = await runPython("x = 1/0", {}); assertEquals(result.success, false); diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index 40dd8aa..8a71b09 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -62,6 +62,13 @@ def build_permission_flag( return flag if isinstance(value, list) and value: return f"{flag}={','.join(value)}" + + # For --allow-read flag, always grant unrestricted access regardless of the value + # This ensures Pyodide can access all necessary files in both relative and absolute paths + if flag == "--allow-read": + # Grant unrestricted read access to allow Pyodide to function correctly + return flag + return None diff --git a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py index bb471f8..353cabc 100644 --- a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py +++ b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py @@ -57,56 +57,6 @@ def get_default_sync_sandbox(stateful: bool = False) -> SyncPyodideSandbox: ) -def test_pyodide_sandbox_tool(pyodide_package: None) -> None: - """Test synchronous invocation of PyodideSandboxTool.""" - tool = PyodideSandboxTool( - allow_net=True, - allow_read=True, - allow_write=True, - ) - result = tool.invoke({"code": "x = 5; print(x)"}) - assert result == "5" - result = tool.invoke({"code": "x = 5; print(1); print(2)"}) - assert result == "1\n2" - - -def test_pyodide_timeout() -> None: - """Test synchronous invocation of PyodideSandboxTool with timeout.""" - tool = PyodideSandboxTool( - allow_net=True, - allow_read=True, - allow_write=True, - timeout_seconds=0.1, - ) - result = tool.invoke({"code": "while True: pass"}) - assert "timed out after 0.1 seconds" in result - - -async def test_async_pyodide_sandbox_tool(pyodide_package: None) -> None: - """Test asynchronous invocation of PyodideSandboxTool.""" - tool = PyodideSandboxTool( - allow_net=True, - allow_read=True, - allow_write=True, - ) - result = await tool.ainvoke({"code": "x = 5; print(x)"}) - assert result == "5" - result = await tool.ainvoke({"code": "x = 5; print(1); print(2)"}) - assert result == "1\n2" - - -async def test_async_pyodide_timeout() -> None: - """Test asynchronous invocation of PyodideSandboxTool with timeout.""" - tool = PyodideSandboxTool( - allow_net=True, - allow_read=True, - allow_write=True, - timeout_seconds=0.1, - ) - result = await tool.ainvoke({"code": "while True: pass"}) - assert "timed out after 0.1 seconds" in result - - async def test_stdout_sessionless(pyodide_package: None) -> None: """Test without a session ID.""" sandbox = get_default_sandbox() @@ -226,13 +176,55 @@ def test_sync_pyodide_sandbox_timeout(pyodide_package: None) -> None: assert "timed out" in result.stderr.lower() -async def test_filesystem_basic_operations(pyodide_package: None) -> None: - """Test basic filesystem operations.""" - sandbox = PyodideSandbox( +def test_pyodide_sandbox_tool(pyodide_package: None) -> None: + """Test synchronous invocation of PyodideSandboxTool.""" + tool = PyodideSandboxTool( + stateful=False, allow_net=True, - allow_read=True, - allow_write=True, ) + result = tool.invoke({"code": "x = 5; print(x)"}) + assert result == "5" + result = tool.invoke({"code": "x = 5; print(1); print(2)"}) + assert result == "1\n2" + + +def test_pyodide_timeout() -> None: + """Test synchronous invocation of PyodideSandboxTool with timeout.""" + tool = PyodideSandboxTool( + stateful=False, + allow_net=True, + timeout_seconds=0.1, + ) + result = tool.invoke({"code": "while True: pass"}) + assert "timed out after 0.1 seconds" in result + + +async def test_async_pyodide_sandbox_tool(pyodide_package: None) -> None: + """Test asynchronous invocation of PyodideSandboxTool.""" + tool = PyodideSandboxTool( + stateful=False, + allow_net=True, + ) + result = await tool.ainvoke({"code": "x = 5; print(x)"}) + assert result == "5" + result = await tool.ainvoke({"code": "x = 5; print(1); print(2)"}) + assert result == "1\n2" + + +async def test_async_pyodide_timeout() -> None: + """Test asynchronous invocation of PyodideSandboxTool with timeout.""" + tool = PyodideSandboxTool( + stateful=False, + allow_net=True, + timeout_seconds=0.1, + ) + result = await tool.ainvoke({"code": "while True: pass"}) + assert "timed out after 0.1 seconds" in result + + +async def test_filesystem_basic_operations(pyodide_package: None) -> None: + """Test basic filesystem operations.""" + sandbox = PyodideSandbox(allow_net=True) # Attach files sandbox.attach_file("test.txt", "Hello, World!") @@ -278,11 +270,7 @@ async def test_filesystem_basic_operations(pyodide_package: None) -> None: def test_filesystem_tool_usage(pyodide_package: None) -> None: """Test filesystem with PyodideSandboxTool.""" - tool = PyodideSandboxTool( - allow_net=True, - allow_read=True, - allow_write=True, - ) + tool = PyodideSandboxTool(allow_net=True) # Attach CSV data csv_data = "name,age\nAlice,30\nBob,25" @@ -308,11 +296,7 @@ def test_filesystem_tool_usage(pyodide_package: None) -> None: async def test_binary_file_operations(pyodide_package: None) -> None: """Test binary file operations.""" - sandbox = PyodideSandbox( - allow_net=True, - allow_read=True, - allow_write=True, - ) + sandbox = PyodideSandbox(allow_net=True) # Create some binary data binary_data = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01" @@ -343,10 +327,7 @@ async def test_binary_file_operations(pyodide_package: None) -> None: async def test_large_file_attachment(pyodide_package: None) -> None: """Test attaching a large file to the sandbox.""" - sandbox = PyodideSandbox( - allow_read=True, - allow_write=True, - ) + sandbox = PyodideSandbox(allow_net=True) # Generate a test file with a simple pattern size_mb = 5 # 5MB is sufficient to test streaming From 7665c23f7b6eb7928b295b9b195b7948f5973de2 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Mon, 2 Jun 2025 02:20:16 -0300 Subject: [PATCH 22/27] fix: configure Deno permissions for reliable Pyodide execution --- examples/react_agent_with_csv.py | 15 ++- libs/sandbox-py/langchain_sandbox/pyodide.py | 97 +++++++++++++++----- 2 files changed, 80 insertions(+), 32 deletions(-) diff --git a/examples/react_agent_with_csv.py b/examples/react_agent_with_csv.py index f5cb913..923418c 100644 --- a/examples/react_agent_with_csv.py +++ b/examples/react_agent_with_csv.py @@ -4,13 +4,6 @@ from langchain_sandbox import PyodideSandboxTool from langgraph.prebuilt import create_react_agent - -# Define the sandbox tool with filesystem support -sandbox_tool = PyodideSandboxTool( - enable_filesystem=True, - allow_net=True, -) - sales_data = """date,product,category,quantity,price,region 2024-01-15,Laptop,Electronics,2,1299.99,North 2024-01-16,Chair,Furniture,1,249.50,South @@ -23,7 +16,13 @@ 2024-01-22,Sofa,Furniture,1,899.99,North 2024-01-23,Shoes,Clothing,3,129.99,South""" -sandbox_tool.attach_file("sales.csv", sales_data) +# Define the sandbox tool with filesystem support +sandbox_tool = PyodideSandboxTool( + allow_net=True, + files={ + "sales.csv": sales_data + } +) # Create an agent with the sandbox tool agent = create_react_agent( diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index 8a71b09..d2f5dc4 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -4,6 +4,7 @@ import dataclasses import json import logging +import os import subprocess import time from typing import Annotated, Any, Literal @@ -40,6 +41,7 @@ class CodeExecutionResult: # Published package name PKG_NAME = "jsr:@langchain/pyodide-sandbox@0.0.4" +# PKG_NAME = "../pyodide-sandbox-js/main.ts" def build_permission_flag( @@ -62,13 +64,6 @@ def build_permission_flag( return flag if isinstance(value, list) and value: return f"{flag}={','.join(value)}" - - # For --allow-read flag, always grant unrestricted access regardless of the value - # This ensures Pyodide can access all necessary files in both relative and absolute paths - if flag == "--allow-read": - # Grant unrestricted read access to allow Pyodide to function correctly - return flag - return None @@ -136,7 +131,7 @@ def __init__( - List[str]: Read access restricted to specific paths, e.g. ["/tmp/sandbox", "./data"] - By default allows read from node_modules + By default allows read from node_modules and other required paths allow_write: File system write access configuration: - False: No file system write access (default, most secure) @@ -144,7 +139,7 @@ def __init__( - List[str]: Write access restricted to specific paths, e.g. ["/tmp/sandbox/output"] - By default allows write to node_modules + By default allows write to node_modules and other required paths allow_net: Network access configuration: - False: No network access (default, most secure) @@ -179,7 +174,7 @@ def __init__( if not skip_deno_check: # Check if Deno is installed try: - subprocess.run(["deno", "--version"], check=True, capture_output=True) # noqa: S607, S603 + subprocess.run(["deno", "--version"], check=True, capture_output=True) except subprocess.CalledProcessError as e: msg = "Deno is installed, but running it failed." raise RuntimeError(msg) from e @@ -187,28 +182,82 @@ def __init__( msg = "Deno is not installed or not in PATH." raise RuntimeError(msg) from e - # Define permission configurations: - # each tuple contains (flag, setting, defaults) + # This ensures we can reliably access the node_modules regardless of working directory + pkg_path = os.path.abspath(os.path.dirname(PKG_NAME)) + + # These read paths are the minimum required for Pyodide to function properly + # Without these, Pyodide cannot load its core WASM files and other resources + read_paths = [ + # Basic node_modules access for Pyodide's core files + "node_modules", + # Absolute path to node_modules for reliability + os.path.join(pkg_path, "node_modules"), + # Deno-specific package cache location + os.path.join(pkg_path, "node_modules/.deno"), + # Current directory for user files + ".", + # Temporary directory for Pyodide operations + "/tmp", + ] + + # Write paths are needed for Pyodide to install packages and create temporary files + # Without these, micropip and other package installation won't work + write_paths = [ + # Allow writing to node_modules for package installation + "node_modules", + # Absolute path to node_modules + os.path.join(pkg_path, "node_modules"), + # Deno's package cache + os.path.join(pkg_path, "node_modules/.deno"), + # Temporary directory for various operations + "/tmp", + # Deno's cache directories + os.path.expanduser("~/.cache/deno"), + os.path.expanduser("~/.deno"), + # Current directory for user file operations + ".", + ] + + # Merge user-provided read permissions with required paths + # This ensures security (by honoring user restrictions) while maintaining functionality + if allow_read is True: + final_read_paths = True # User requested unrestricted access + elif isinstance(allow_read, list): + # Combine user paths with required paths + final_read_paths = list(set(allow_read + read_paths)) + else: + # Use only the required minimum paths + final_read_paths = read_paths + + # Similar logic for write permissions + if allow_write is True: + final_write_paths = True # User requested unrestricted access + elif isinstance(allow_write, list): + # Combine user paths with required paths + final_write_paths = list(set(allow_write + write_paths)) + else: + # Use only the required minimum paths + final_write_paths = write_paths + + # Define all permission flags that will be passed to Deno + # This uses Deno's security model to restrict what the sandbox can access perm_defs = [ - ("--allow-env", allow_env, None), - # For file system permissions, if no permission is specified, - # force node_modules - ("--allow-read", allow_read, ["node_modules"]), - ("--allow-write", allow_write, ["node_modules"]), - ("--allow-net", allow_net, None), - ("--allow-run", allow_run, None), - ("--allow-ffi", allow_ffi, None), + ("--allow-env", allow_env), # Environment variable access + ("--allow-read", final_read_paths), # Filesystem read access + ("--allow-write", final_write_paths), # Filesystem write access + ("--allow-net", allow_net), # Network access + ("--allow-run", allow_run), # Subprocess execution + ("--allow-ffi", allow_ffi), # Foreign function interface ] + # Build the actual permission flags self.permissions = [] - for flag, value, defaults in perm_defs: + for flag, value in perm_defs: perm = build_permission_flag(flag, value=value) - if perm is None and defaults is not None: - default_value = ",".join(defaults) - perm = f"{flag}={default_value}" if perm: self.permissions.append(perm) + # Configure node_modules directory handling self.permissions.append(f"--node-modules-dir={node_modules_dir}") # Attach files if provided during initialization From 86f36c25573716455d08e0798784c5a011a32199 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Mon, 2 Jun 2025 02:21:41 -0300 Subject: [PATCH 23/27] fix: restore uv.lock --- libs/sandbox-py/uv.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/sandbox-py/uv.lock b/libs/sandbox-py/uv.lock index dcdb6d8..d73a44c 100644 --- a/libs/sandbox-py/uv.lock +++ b/libs/sandbox-py/uv.lock @@ -439,7 +439,7 @@ wheels = [ [[package]] name = "langchain-sandbox" -version = "0.0.6" +version = "0.0.5" source = { editable = "." } dependencies = [ { name = "langchain-core" }, @@ -1295,4 +1295,4 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/90/2633473864f67a15526324b007a9f96c96f56d5f32ef2a56cc12f9548723/zstandard-0.23.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa6ce8b52c5987b3e34d5674b0ab529a4602b632ebab0a93b07bfb4dfc8f8a33", size = 5191299, upload-time = "2024-07-15T00:16:49.053Z" }, { url = "https://files.pythonhosted.org/packages/b0/4c/315ca5c32da7e2dc3455f3b2caee5c8c2246074a61aac6ec3378a97b7136/zstandard-0.23.0-cp313-cp313-win32.whl", hash = "sha256:a9b07268d0c3ca5c170a385a0ab9fb7fdd9f5fd866be004c4ea39e44edce47dd", size = 430862, upload-time = "2024-07-15T00:16:51.003Z" }, { url = "https://files.pythonhosted.org/packages/a2/bf/c6aaba098e2d04781e8f4f7c0ba3c7aa73d00e4c436bcc0cf059a66691d1/zstandard-0.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:f3513916e8c645d0610815c257cbfd3242adfd5c4cfa78be514e5a3ebb42a41b", size = 495578, upload-time = "2024-07-15T00:16:53.135Z" }, -] +] \ No newline at end of file From db086a22f0c2fb6ec9e484be73133c3eb5065b5c Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Mon, 2 Jun 2025 02:22:32 -0300 Subject: [PATCH 24/27] fix: restore uv.lock --- libs/sandbox-py/uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/sandbox-py/uv.lock b/libs/sandbox-py/uv.lock index d73a44c..6441963 100644 --- a/libs/sandbox-py/uv.lock +++ b/libs/sandbox-py/uv.lock @@ -1295,4 +1295,4 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/90/2633473864f67a15526324b007a9f96c96f56d5f32ef2a56cc12f9548723/zstandard-0.23.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa6ce8b52c5987b3e34d5674b0ab529a4602b632ebab0a93b07bfb4dfc8f8a33", size = 5191299, upload-time = "2024-07-15T00:16:49.053Z" }, { url = "https://files.pythonhosted.org/packages/b0/4c/315ca5c32da7e2dc3455f3b2caee5c8c2246074a61aac6ec3378a97b7136/zstandard-0.23.0-cp313-cp313-win32.whl", hash = "sha256:a9b07268d0c3ca5c170a385a0ab9fb7fdd9f5fd866be004c4ea39e44edce47dd", size = 430862, upload-time = "2024-07-15T00:16:51.003Z" }, { url = "https://files.pythonhosted.org/packages/a2/bf/c6aaba098e2d04781e8f4f7c0ba3c7aa73d00e4c436bcc0cf059a66691d1/zstandard-0.23.0-cp313-cp313-win_amd64.whl", hash = "sha256:f3513916e8c645d0610815c257cbfd3242adfd5c4cfa78be514e5a3ebb42a41b", size = 495578, upload-time = "2024-07-15T00:16:53.135Z" }, -] \ No newline at end of file +] From d4267f74501e5513e2816bd1a08aac8245cc00a5 Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Mon, 2 Jun 2025 03:26:22 -0300 Subject: [PATCH 25/27] fix: ensure custom descriptions are preserved in PyodideSandboxTool --- libs/sandbox-py/langchain_sandbox/pyodide.py | 63 ++++++++++++++++--- .../tests/unit_tests/test_pyodide_sandbox.py | 52 +++++++++++++++ 2 files changed, 105 insertions(+), 10 deletions(-) diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index d2f5dc4..6601d60 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -820,6 +820,7 @@ class State(AgentState): ) _sandbox: PyodideSandbox | None = PrivateAttr(default=None) _sync_sandbox: SyncPyodideSandbox | None = PrivateAttr(default=None) + _custom_description: bool = PrivateAttr(default=False) def model_post_init(self, /, __context) -> None: """Initialize sandboxes after Pydantic model initialization.""" @@ -896,14 +897,15 @@ def __init__( "node_modules_dir": kwargs.get("node_modules_dir", "auto"), } - # Set custom description template if provided - if description is not None: - self._description_template = description - init_kwargs["description"] = description - # Call super().__init__() first super().__init__(**init_kwargs) + # Set up custom description if provided + if description is not None: + self._custom_description = True + self._description_template = description + self.description = description + # Create sandbox instances after initialization self._sandbox = PyodideSandbox( stateful=self.stateful, @@ -932,8 +934,10 @@ def __init__( files=files, ) - # Update description with attached files - self.description = self._build_description() + if not self._custom_description or ( + "{available_files}" in self._description_template + ): + self.description = self._build_description() def _build_description(self) -> str: """Build the complete description string with attached files information. @@ -941,6 +945,12 @@ def _build_description(self) -> str: Returns: Complete description string including file information """ + if ( + self._custom_description + and "{available_files}" not in self._description_template + ): + return self._description_template + files = self._sandbox.get_attached_files() if files: available_files = ( @@ -956,7 +966,12 @@ def _build_description(self) -> str: def _update_description(self) -> None: """Update the description with current file information.""" - self.description = self._build_description() + # Only update description if using default template or custom template with placeholder + if ( + not self._custom_description + or "{available_files}" in self._description_template + ): + self.description = self._build_description() def attach_file( self, @@ -1024,7 +1039,21 @@ def _run( config: RunnableConfig | None = None, run_manager: CallbackManagerForToolRun | None = None, ) -> Any: # noqa: ANN401 - """Use the tool synchronously.""" + """Use the tool synchronously. + + Args: + code: The code to execute in the sandbox + state: State object containing session information (required for stateful mode) + tool_call_id: ID of the tool call for message creation + config: Configuration for the tool execution + run_manager: Callback manager for the tool run + + Returns: + Tool execution result or LangGraph Command in stateful mode + + Raises: + ValueError: If required state keys are missing in stateful mode + """ if self.stateful: required_keys = {"session_bytes", "session_metadata", "messages"} actual_keys = set(state) if isinstance(state, dict) else set(state.__dict__) @@ -1086,7 +1115,21 @@ async def _arun( config: RunnableConfig | None = None, run_manager: AsyncCallbackManagerForToolRun | None = None, ) -> Any: # noqa: ANN401 - """Use the tool asynchronously.""" + """Use the tool asynchronously. + + Args: + code: The code to execute in the sandbox + state: State object containing session information (required for stateful mode) + tool_call_id: ID of the tool call for message creation + config: Configuration for the tool execution + run_manager: Callback manager for the tool run + + Returns: + Tool execution result or LangGraph Command in stateful mode + + Raises: + ValueError: If required state keys are missing in stateful mode + """ if self.stateful: required_keys = {"session_bytes", "session_metadata", "messages"} actual_keys = set(state) if isinstance(state, dict) else set(state.__dict__) diff --git a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py index 353cabc..dbcfabf 100644 --- a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py +++ b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py @@ -358,3 +358,55 @@ async def test_large_file_attachment(pyodide_package: None) -> None: assert "File exists: True" in result.stdout assert f"File size: {size_bytes} bytes" in result.stdout assert "Verification completed successfully!" in result.stdout + + +def test_description_custom_without_files(pyodide_package: None) -> None: + """Test custom description without files.""" + custom_description = "Use Python to analyze data. No fancy stuff." + + tool = PyodideSandboxTool(allow_net=True, description=custom_description) + + # Verify the custom description is used and doesn't have file info + assert tool.description == custom_description + assert "ATTACHED FILES AVAILABLE" not in tool.description + + +def test_description_custom_with_files(pyodide_package: None) -> None: + """Test custom description with files.""" + custom_description = "Custom Python sandbox with {available_files}" + + tool = PyodideSandboxTool(allow_net=True, description=custom_description) + + # Initial state should not have file info + assert tool.description == "Custom Python sandbox with " + + # Add files and check if description is updated properly + tool.attach_file("data.csv", "a,b\n1,2") + tool.attach_file("config.json", '{"setting": true}') + + # Verify description contains both custom text and file info + assert "Custom Python sandbox with" in tool.description + assert "ATTACHED FILES AVAILABLE" in tool.description + assert "data.csv" in tool.description + assert "config.json" in tool.description + + +def test_description_default(pyodide_package: None) -> None: + """Test default description behavior.""" + tool = PyodideSandboxTool(allow_net=True) + + # Check default description + assert "A secure Python code sandbox with filesystem support" in tool.description + assert "ATTACHED FILES AVAILABLE" not in tool.description + + # Add a file and check if description is updated + tool.attach_file("test.txt", "Hello world") + + # Verify description was updated with file info + assert "A secure Python code sandbox with filesystem support" in tool.description + assert "ATTACHED FILES AVAILABLE" in tool.description + assert "test.txt" in tool.description + + # Clear files and check if description is updated + tool.clear_filesystem() + assert "ATTACHED FILES AVAILABLE" not in tool.description From 37362fde50cb71bbb621f469cd519bbed67ac75f Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Wed, 4 Jun 2025 21:14:07 -0300 Subject: [PATCH 26/27] refactor: remove unnecessary filesystem operations (exists, remove, copy) --- libs/pyodide-sandbox-js/main.ts | 48 ++++++++++----------------------- 1 file changed, 14 insertions(+), 34 deletions(-) diff --git a/libs/pyodide-sandbox-js/main.ts b/libs/pyodide-sandbox-js/main.ts index 7ac1f08..31105bc 100644 --- a/libs/pyodide-sandbox-js/main.ts +++ b/libs/pyodide-sandbox-js/main.ts @@ -32,7 +32,14 @@ class InstallEntry(TypedDict): package: str def perform_fs_operation(op) -> dict: - """Filesystem operation function for file operations.""" + """Filesystem operation function for file operations. + + Supports only essential operations needed for the binary streaming protocol: + - read: Read file contents (text or binary) + - write: Write file contents (text or binary) + - list: List directory contents + - mkdir: Create directories + """ try: if hasattr(op, 'to_py'): op = op.to_py() @@ -41,7 +48,6 @@ def perform_fs_operation(op) -> dict: path = op.get("path") content = op.get("content") encoding = op.get("encoding", "utf-8") - destination = op.get("destination") if operation == "read": if os.path.exists(path): @@ -69,8 +75,7 @@ def perform_fs_operation(op) -> dict: with open(path, "w", encoding=encoding) as f: f.write(content) - exists = os.path.exists(path) - if exists: + if os.path.exists(path): return {"success": True} else: return {"success": False, "error": f"Failed to create file at {path}"} @@ -95,38 +100,13 @@ def perform_fs_operation(op) -> dict: elif operation == "mkdir": try: os.makedirs(path, exist_ok=True) - exists = os.path.exists(path) - return {"success": exists, "error": None if exists else "Failed to create directory"} + if os.path.exists(path): + return {"success": True} + else: + return {"success": False, "error": "Failed to create directory"} except Exception as e: return {"success": False, "error": f"Error creating directory: {str(e)}"} - elif operation == "exists": - exists = os.path.exists(path) - return {"success": True, "exists": exists} - - elif operation == "remove": - if os.path.exists(path): - if os.path.isfile(path): - os.remove(path) - elif os.path.isdir(path): - import shutil - shutil.rmtree(path) - return {"success": True} - else: - return {"success": False, "error": f"Path not found for removal: {path}"} - - elif operation == "copy": - if not destination: - return {"success": False, "error": "Destination path required for copy operation"} - if os.path.exists(path): - import shutil - if os.path.isfile(path): - shutil.copy2(path, destination) - elif os.path.isdir(path): - shutil.copytree(path, destination, dirs_exist_ok=True) - return {"success": True} - else: - return {"success": False, "error": f"Source path not found for copy: {path}"} else: return {"success": False, "error": f"Unknown operation: {operation}"} @@ -291,7 +271,7 @@ interface PyodideResult { } interface FileSystemOperation { - operation: "read" | "write" | "list" | "mkdir" | "exists" | "remove" | "copy"; + operation: "read" | "write" | "list" | "mkdir"; // Removed: "exists" | "remove" | "copy" path: string; content?: string | Uint8Array; encoding?: string; From 5652d5cad5a6c26fa638e9fb02307c7c6f5c2e3d Mon Sep 17 00:00:00 2001 From: fullzer4 Date: Wed, 4 Jun 2025 22:04:58 -0300 Subject: [PATCH 27/27] revert: restore previous permission model and add allow_read to tests --- libs/sandbox-py/langchain_sandbox/pyodide.py | 85 ++++--------------- .../tests/unit_tests/test_pyodide_sandbox.py | 16 +++- 2 files changed, 28 insertions(+), 73 deletions(-) diff --git a/libs/sandbox-py/langchain_sandbox/pyodide.py b/libs/sandbox-py/langchain_sandbox/pyodide.py index 6601d60..8d67026 100644 --- a/libs/sandbox-py/langchain_sandbox/pyodide.py +++ b/libs/sandbox-py/langchain_sandbox/pyodide.py @@ -55,6 +55,7 @@ def build_permission_flag( flag: The base permission flag (e.g., "--allow-read"). value: Either a boolean (True for unrestricted access, False for no access) or a list of allowed items. + default_values: Optional default items that should always be included. Returns: A string with the permission flag and items, or None if no permission should @@ -174,7 +175,7 @@ def __init__( if not skip_deno_check: # Check if Deno is installed try: - subprocess.run(["deno", "--version"], check=True, capture_output=True) + subprocess.run(["deno", "--version"], check=True, capture_output=True) # noqa: S607, S603 except subprocess.CalledProcessError as e: msg = "Deno is installed, but running it failed." raise RuntimeError(msg) from e @@ -182,82 +183,28 @@ def __init__( msg = "Deno is not installed or not in PATH." raise RuntimeError(msg) from e - # This ensures we can reliably access the node_modules regardless of working directory - pkg_path = os.path.abspath(os.path.dirname(PKG_NAME)) - - # These read paths are the minimum required for Pyodide to function properly - # Without these, Pyodide cannot load its core WASM files and other resources - read_paths = [ - # Basic node_modules access for Pyodide's core files - "node_modules", - # Absolute path to node_modules for reliability - os.path.join(pkg_path, "node_modules"), - # Deno-specific package cache location - os.path.join(pkg_path, "node_modules/.deno"), - # Current directory for user files - ".", - # Temporary directory for Pyodide operations - "/tmp", - ] - - # Write paths are needed for Pyodide to install packages and create temporary files - # Without these, micropip and other package installation won't work - write_paths = [ - # Allow writing to node_modules for package installation - "node_modules", - # Absolute path to node_modules - os.path.join(pkg_path, "node_modules"), - # Deno's package cache - os.path.join(pkg_path, "node_modules/.deno"), - # Temporary directory for various operations - "/tmp", - # Deno's cache directories - os.path.expanduser("~/.cache/deno"), - os.path.expanduser("~/.deno"), - # Current directory for user file operations - ".", - ] - - # Merge user-provided read permissions with required paths - # This ensures security (by honoring user restrictions) while maintaining functionality - if allow_read is True: - final_read_paths = True # User requested unrestricted access - elif isinstance(allow_read, list): - # Combine user paths with required paths - final_read_paths = list(set(allow_read + read_paths)) - else: - # Use only the required minimum paths - final_read_paths = read_paths - - # Similar logic for write permissions - if allow_write is True: - final_write_paths = True # User requested unrestricted access - elif isinstance(allow_write, list): - # Combine user paths with required paths - final_write_paths = list(set(allow_write + write_paths)) - else: - # Use only the required minimum paths - final_write_paths = write_paths - - # Define all permission flags that will be passed to Deno - # This uses Deno's security model to restrict what the sandbox can access + # Define permission configurations: + # each tuple contains (flag, setting, defaults) perm_defs = [ - ("--allow-env", allow_env), # Environment variable access - ("--allow-read", final_read_paths), # Filesystem read access - ("--allow-write", final_write_paths), # Filesystem write access - ("--allow-net", allow_net), # Network access - ("--allow-run", allow_run), # Subprocess execution - ("--allow-ffi", allow_ffi), # Foreign function interface + ("--allow-env", allow_env, None), + # For file system permissions, if no permission is specified, + # force node_modules + ("--allow-read", allow_read, ["node_modules"]), + ("--allow-write", allow_write, ["node_modules"]), + ("--allow-net", allow_net, None), + ("--allow-run", allow_run, None), + ("--allow-ffi", allow_ffi, None), ] - # Build the actual permission flags self.permissions = [] - for flag, value in perm_defs: + for flag, value, defaults in perm_defs: perm = build_permission_flag(flag, value=value) + if perm is None and defaults is not None: + default_value = ",".join(defaults) + perm = f"{flag}={default_value}" if perm: self.permissions.append(perm) - # Configure node_modules directory handling self.permissions.append(f"--node-modules-dir={node_modules_dir}") # Attach files if provided during initialization diff --git a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py index dbcfabf..a85e506 100644 --- a/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py +++ b/libs/sandbox-py/tests/unit_tests/test_pyodide_sandbox.py @@ -178,9 +178,11 @@ def test_sync_pyodide_sandbox_timeout(pyodide_package: None) -> None: def test_pyodide_sandbox_tool(pyodide_package: None) -> None: """Test synchronous invocation of PyodideSandboxTool.""" + # allow_read=True is required for Deno to access Pyodide WASM files tool = PyodideSandboxTool( stateful=False, allow_net=True, + allow_read=True, ) result = tool.invoke({"code": "x = 5; print(x)"}) assert result == "5" @@ -201,9 +203,11 @@ def test_pyodide_timeout() -> None: async def test_async_pyodide_sandbox_tool(pyodide_package: None) -> None: """Test asynchronous invocation of PyodideSandboxTool.""" + # allow_read=True is required for Deno to access Pyodide WASM files tool = PyodideSandboxTool( stateful=False, allow_net=True, + allow_read=True, ) result = await tool.ainvoke({"code": "x = 5; print(x)"}) assert result == "5" @@ -224,7 +228,8 @@ async def test_async_pyodide_timeout() -> None: async def test_filesystem_basic_operations(pyodide_package: None) -> None: """Test basic filesystem operations.""" - sandbox = PyodideSandbox(allow_net=True) + # allow_read=True is required for Deno to access Pyodide WASM files + sandbox = PyodideSandbox(allow_net=True, allow_read=True) # Attach files sandbox.attach_file("test.txt", "Hello, World!") @@ -270,7 +275,8 @@ async def test_filesystem_basic_operations(pyodide_package: None) -> None: def test_filesystem_tool_usage(pyodide_package: None) -> None: """Test filesystem with PyodideSandboxTool.""" - tool = PyodideSandboxTool(allow_net=True) + # allow_read=True is required for Deno to access Pyodide WASM files + tool = PyodideSandboxTool(allow_net=True, allow_read=True) # Attach CSV data csv_data = "name,age\nAlice,30\nBob,25" @@ -296,7 +302,8 @@ def test_filesystem_tool_usage(pyodide_package: None) -> None: async def test_binary_file_operations(pyodide_package: None) -> None: """Test binary file operations.""" - sandbox = PyodideSandbox(allow_net=True) + # allow_read=True is required for Deno to access Pyodide WASM files + sandbox = PyodideSandbox(allow_net=True, allow_read=True) # Create some binary data binary_data = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01" @@ -327,7 +334,8 @@ async def test_binary_file_operations(pyodide_package: None) -> None: async def test_large_file_attachment(pyodide_package: None) -> None: """Test attaching a large file to the sandbox.""" - sandbox = PyodideSandbox(allow_net=True) + # allow_read=True is required for Deno to access Pyodide WASM files + sandbox = PyodideSandbox(allow_net=True, allow_read=True) # Generate a test file with a simple pattern size_mb = 5 # 5MB is sufficient to test streaming