initial commit
This commit is contained in:
550
venv/Lib/site-packages/langsmith/sandbox/README.md
Normal file
550
venv/Lib/site-packages/langsmith/sandbox/README.md
Normal file
@@ -0,0 +1,550 @@
|
||||
# LangSmith Sandbox
|
||||
|
||||
Sandboxed code execution for LangSmith. Run untrusted code safely in isolated containers.
|
||||
|
||||
> ⚠️ **Warning**: This module is experimental. Features and APIs may change, and breaking changes are expected as we iterate.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```python
|
||||
from langsmith.sandbox import SandboxClient
|
||||
|
||||
# Client uses LANGSMITH_ENDPOINT and LANGSMITH_API_KEY from environment
|
||||
client = SandboxClient()
|
||||
|
||||
# First, create a template (defines the container image)
|
||||
client.create_template(
|
||||
name="python-sandbox",
|
||||
image="python:3.12-slim",
|
||||
)
|
||||
|
||||
# Now create a sandbox from the template and run code
|
||||
with client.sandbox(template_name="python-sandbox") as sb:
|
||||
result = sb.run("python -c 'print(2 + 2)'")
|
||||
print(result.stdout) # "4\n"
|
||||
print(result.success) # True
|
||||
|
||||
# Or create a sandbox to keep
|
||||
sb = client.create_sandbox(template_name="python-sandbox")
|
||||
result = sb.run("python -c 'print(2 + 2)'")
|
||||
client.delete_sandbox(sb.name) # Don't forget to clean up when done
|
||||
|
||||
# Or use an existing sandbox by ID
|
||||
sb = client.get_sandbox(name="your-sandbox")
|
||||
result = sb.run("python -c 'print(2 + 2)'")
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
The sandbox module works out of the box for basic command execution (HTTP). For
|
||||
**real-time output** (streaming, callbacks, and `timeout=0`), install the
|
||||
optional dependency:
|
||||
|
||||
```bash
|
||||
pip install 'langsmith[sandbox]'
|
||||
```
|
||||
|
||||
This pulls in the `websockets` package. Without it, `sb.run()` falls back to
|
||||
HTTP automatically.
|
||||
|
||||
## Configuration
|
||||
|
||||
The client automatically uses LangSmith environment variables:
|
||||
|
||||
```python
|
||||
from langsmith.sandbox import SandboxClient
|
||||
|
||||
# Uses LANGSMITH_ENDPOINT and LANGSMITH_API_KEY
|
||||
client = SandboxClient()
|
||||
|
||||
# Or configure explicitly
|
||||
client = SandboxClient(
|
||||
api_endpoint="https://api.smith.langchain.com/v2/sandboxes",
|
||||
api_key="your-api-key",
|
||||
timeout=30.0,
|
||||
)
|
||||
```
|
||||
|
||||
## Running Commands
|
||||
|
||||
```python
|
||||
# Assuming you've created a template called "my-sandbox"
|
||||
with client.sandbox(template_name="my-sandbox") as sb:
|
||||
# Run a command
|
||||
result = sb.run("echo 'Hello, World!'")
|
||||
|
||||
print(result.stdout) # "Hello, World!\n"
|
||||
print(result.stderr) # ""
|
||||
print(result.exit_code) # 0
|
||||
print(result.success) # True
|
||||
|
||||
# Commands that fail return non-zero exit codes
|
||||
result = sb.run("exit 1")
|
||||
print(result.success) # False
|
||||
print(result.exit_code) # 1
|
||||
```
|
||||
|
||||
## Streaming Output
|
||||
|
||||
For long-running commands, you can stream output in real time. This requires
|
||||
the `websockets` package (`pip install 'langsmith[sandbox]'`).
|
||||
|
||||
### Callbacks
|
||||
|
||||
The simplest way to get real-time output. Blocks until the command completes.
|
||||
|
||||
```python
|
||||
import sys
|
||||
|
||||
with client.sandbox(template_name="my-sandbox") as sb:
|
||||
result = sb.run(
|
||||
"make build",
|
||||
timeout=600,
|
||||
on_stdout=lambda s: print(s, end=""),
|
||||
on_stderr=lambda s: print(s, end="", file=sys.stderr),
|
||||
)
|
||||
print(f"\nBuild {'succeeded' if result.success else 'failed'}")
|
||||
```
|
||||
|
||||
### Streaming with CommandHandle
|
||||
|
||||
For full control — access to the process handle, stream identity, kill, and
|
||||
reconnection.
|
||||
|
||||
```python
|
||||
with client.sandbox(template_name="my-sandbox") as sb:
|
||||
handle = sb.run("make build", timeout=600, wait=False)
|
||||
|
||||
print(f"Command ID: {handle.command_id}")
|
||||
|
||||
for chunk in handle:
|
||||
prefix = "OUT" if chunk.stream == "stdout" else "ERR"
|
||||
print(f"[{prefix}] {chunk.data}", end="")
|
||||
|
||||
result = handle.result
|
||||
print(f"\nExit code: {result.exit_code}")
|
||||
```
|
||||
|
||||
### Killing a Running Command
|
||||
|
||||
```python
|
||||
import threading
|
||||
import time
|
||||
|
||||
with client.sandbox(template_name="my-sandbox") as sb:
|
||||
handle = sb.run("sleep 3600", timeout=7200, wait=False)
|
||||
|
||||
# Kill after 10 seconds from another thread
|
||||
def kill_after(h, seconds):
|
||||
time.sleep(seconds)
|
||||
h.kill()
|
||||
|
||||
threading.Thread(target=kill_after, args=(handle, 10)).start()
|
||||
|
||||
for chunk in handle:
|
||||
print(chunk.data, end="")
|
||||
|
||||
result = handle.result
|
||||
print(f"Exit code: {result.exit_code}") # non-zero (killed)
|
||||
```
|
||||
|
||||
### Sending Stdin Input
|
||||
|
||||
```python
|
||||
with client.sandbox(template_name="my-sandbox") as sb:
|
||||
handle = sb.run(
|
||||
"python -c 'name = input(\"Name: \"); print(f\"Hello {name}\")'",
|
||||
timeout=30,
|
||||
wait=False,
|
||||
)
|
||||
|
||||
for chunk in handle:
|
||||
if "Name:" in chunk.data:
|
||||
handle.send_input("World\n")
|
||||
print(chunk.data, end="")
|
||||
|
||||
result = handle.result
|
||||
```
|
||||
|
||||
### Auto-Reconnect
|
||||
|
||||
`CommandHandle` (returned by `sb.run(wait=False)`) automatically
|
||||
reconnects on transient disconnects — hot-reloads, network blips, etc. No user
|
||||
code needed:
|
||||
|
||||
```python
|
||||
with client.sandbox(template_name="my-sandbox") as sb:
|
||||
handle = sb.run("make build", timeout=600, wait=False)
|
||||
|
||||
# Auto-reconnects on transient errors (hot-reload, network blips)
|
||||
for chunk in handle:
|
||||
print(chunk.data, end="")
|
||||
|
||||
result = handle.result
|
||||
```
|
||||
|
||||
For manual reconnection across process restarts:
|
||||
|
||||
```python
|
||||
with client.sandbox(template_name="my-sandbox") as sb:
|
||||
handle = sb.run("make build", timeout=600, wait=False)
|
||||
command_id = handle.command_id
|
||||
|
||||
# ... later, possibly in a different process ...
|
||||
|
||||
handle = sb.reconnect(command_id)
|
||||
for chunk in handle:
|
||||
print(chunk.data, end="")
|
||||
result = handle.result
|
||||
```
|
||||
|
||||
### No Timeout (`timeout=0`)
|
||||
|
||||
With WebSocket enabled, you can set `timeout=0` to let a command run
|
||||
indefinitely with no server-side deadline. This works with both `wait=False`
|
||||
and callbacks. Useful for long-lived processes like dev servers, file watchers,
|
||||
or background tasks that you control via `kill()`.
|
||||
|
||||
```python
|
||||
with client.sandbox(template_name="my-sandbox") as sb:
|
||||
handle = sb.run("python server.py", timeout=0, wait=False)
|
||||
|
||||
for chunk in handle:
|
||||
print(chunk.data, end="")
|
||||
if "Ready" in chunk.data:
|
||||
break # server is up, do other work
|
||||
|
||||
handle.kill() # stop when done
|
||||
```
|
||||
|
||||
> **Note:** `timeout=0` requires WebSocket support
|
||||
> (`pip install 'langsmith[sandbox]'`). Without WebSocket, `run()` falls
|
||||
> back to HTTP which has its own request-level timeout.
|
||||
|
||||
## File Operations
|
||||
|
||||
Read and write files in the sandbox:
|
||||
|
||||
```python
|
||||
# Assuming you've created a Python template
|
||||
with client.sandbox(template_name="my-python") as sb:
|
||||
# Write a file
|
||||
sb.write("/app/script.py", "print('Hello from file!')")
|
||||
|
||||
# Run the script
|
||||
result = sb.run("python /app/script.py")
|
||||
print(result.stdout) # "Hello from file!\n"
|
||||
|
||||
# Read a file (returns bytes)
|
||||
content = sb.read("/app/script.py")
|
||||
print(content.decode()) # "print('Hello from file!')"
|
||||
|
||||
# Write binary files
|
||||
sb.write("/app/data.bin", b"\x00\x01\x02\x03")
|
||||
```
|
||||
|
||||
## Templates
|
||||
|
||||
Templates define the container image and resources for sandboxes. **You must create a template before you can create sandboxes.**
|
||||
|
||||
```python
|
||||
# Create a template (required before creating sandboxes)
|
||||
template = client.create_template(
|
||||
name="my-python-env",
|
||||
image="python:3.12-slim", # Any Docker image
|
||||
cpu="1", # CPU limit (default: "500m")
|
||||
memory="1Gi", # Memory limit (default: "512Mi")
|
||||
)
|
||||
|
||||
# Now you can create sandboxes from this template
|
||||
with client.sandbox(template_name="my-python-env") as sb:
|
||||
result = sb.run("python --version")
|
||||
|
||||
# List all templates
|
||||
templates = client.list_templates()
|
||||
|
||||
# Get a specific template
|
||||
template = client.get_template("my-python-env")
|
||||
|
||||
# Update a template's name
|
||||
client.update_template("my-python-env", new_name="python-env-v2")
|
||||
|
||||
# Delete a template (fails if sandboxes or pools are using it)
|
||||
client.delete_template("my-python-env")
|
||||
```
|
||||
|
||||
### Common Template Images
|
||||
|
||||
```python
|
||||
# Python
|
||||
client.create_template(name="python", image="python:3.12-slim")
|
||||
|
||||
# Node.js
|
||||
client.create_template(name="node", image="node:20-slim")
|
||||
|
||||
# Ubuntu (general purpose)
|
||||
client.create_template(name="ubuntu", image="ubuntu:24.04")
|
||||
```
|
||||
|
||||
## Persistent Volumes
|
||||
|
||||
Use volumes to persist data across sandbox sessions:
|
||||
|
||||
```python
|
||||
from langsmith.sandbox import VolumeMountSpec
|
||||
|
||||
# Create a volume
|
||||
volume = client.create_volume(name="my-data", size="1Gi")
|
||||
|
||||
# Create a template with the volume mounted
|
||||
template = client.create_template(
|
||||
name="stateful-sandbox",
|
||||
image="python:3.12-slim",
|
||||
volume_mounts=[
|
||||
VolumeMountSpec(volume_name="my-data", mount_path="/data")
|
||||
],
|
||||
)
|
||||
|
||||
# Data written to /data persists across sandbox sessions
|
||||
with client.sandbox(template_name="stateful-sandbox") as sb:
|
||||
sb.write("/data/state.txt", "persistent data")
|
||||
|
||||
# Later, in a new sandbox...
|
||||
with client.sandbox(template_name="stateful-sandbox") as sb:
|
||||
content = sb.read("/data/state.txt")
|
||||
print(content.decode()) # "persistent data"
|
||||
```
|
||||
|
||||
## Pools (Pre-warmed Sandboxes)
|
||||
|
||||
Pools pre-provision sandboxes for faster startup:
|
||||
|
||||
```python
|
||||
# First create a template (without volumes - pools don't support volumes)
|
||||
client.create_template(name="fast-python", image="python:3.12-slim")
|
||||
|
||||
# Create a pool with 5 warm sandboxes
|
||||
pool = client.create_pool(
|
||||
name="python-pool",
|
||||
template_name="fast-python",
|
||||
replicas=2,
|
||||
)
|
||||
|
||||
# Sandboxes from pooled templates start faster
|
||||
with client.sandbox(template_name="fast-python") as sb:
|
||||
result = sb.run("python --version")
|
||||
|
||||
# Scale the pool
|
||||
client.update_pool("python-pool", replicas=3)
|
||||
|
||||
# Delete the pool
|
||||
client.delete_pool("python-pool")
|
||||
```
|
||||
|
||||
> **Note:** Templates with volume mounts cannot be used in pools.
|
||||
|
||||
## Reusing Existing Sandboxes
|
||||
|
||||
Get a sandbox that's already running:
|
||||
|
||||
```python
|
||||
# Create a sandbox (requires explicit cleanup)
|
||||
sb = client.create_sandbox(template_name="my-template")
|
||||
print(sb.name) # e.g., "sandbox-abc123"
|
||||
|
||||
# Later, get the same sandbox
|
||||
sb = client.get_sandbox("sandbox-abc123")
|
||||
result = sb.run("echo 'Still running!'")
|
||||
|
||||
# Clean up when done
|
||||
client.delete_sandbox("sandbox-abc123")
|
||||
```
|
||||
|
||||
## Async Sandbox Creation
|
||||
|
||||
By default, `create_sandbox()` blocks until the sandbox is ready. For
|
||||
non-blocking creation, pass `wait_for_ready=False`:
|
||||
|
||||
```python
|
||||
# Returns immediately with status="provisioning"
|
||||
sb = client.create_sandbox(template_name="my-template", wait_for_ready=False)
|
||||
print(sb.status) # "provisioning"
|
||||
|
||||
# Poll until ready using the lightweight status endpoint
|
||||
sb = client.wait_for_sandbox(sb.name, timeout=120, poll_interval=1.0)
|
||||
print(sb.status) # "ready"
|
||||
|
||||
# Now the sandbox is usable
|
||||
result = sb.run("echo hello")
|
||||
```
|
||||
|
||||
You can also poll manually for more control:
|
||||
|
||||
```python
|
||||
sb = client.create_sandbox(template_name="my-template", wait_for_ready=False)
|
||||
|
||||
while True:
|
||||
status = client.get_sandbox_status(sb.name)
|
||||
if status.status == "ready":
|
||||
sb = client.get_sandbox(sb.name)
|
||||
break
|
||||
if status.status == "failed":
|
||||
print(f"Failed: {status.status_message}")
|
||||
break
|
||||
time.sleep(1)
|
||||
```
|
||||
|
||||
> **Note:** Operations like `run()`, `write()`, and `read()` will raise
|
||||
> `SandboxNotReadyError` if called on a sandbox that isn't ready yet.
|
||||
|
||||
## Async Support
|
||||
|
||||
Full async support for all operations:
|
||||
|
||||
```python
|
||||
from langsmith.sandbox import AsyncSandboxClient
|
||||
|
||||
async def main():
|
||||
async with AsyncSandboxClient() as client:
|
||||
# Create a template first
|
||||
await client.create_template(name="async-python", image="python:3.12-slim")
|
||||
|
||||
# Use the template
|
||||
async with await client.sandbox(template_name="async-python") as sb:
|
||||
result = await sb.run("python -c 'print(1 + 1)'")
|
||||
print(result.stdout) # "2\n"
|
||||
|
||||
await sb.write("/app/test.txt", "async content")
|
||||
content = await sb.read("/app/test.txt")
|
||||
print(content.decode())
|
||||
```
|
||||
|
||||
### Async Streaming
|
||||
|
||||
```python
|
||||
async with await client.sandbox(template_name="async-python") as sb:
|
||||
handle = await sb.run("make build", timeout=600, wait=False)
|
||||
|
||||
async for chunk in handle:
|
||||
print(chunk.data, end="")
|
||||
|
||||
result = await handle.result
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
The module provides type-based exceptions with a `resource_type` attribute for specific handling:
|
||||
|
||||
```python
|
||||
from langsmith.sandbox import (
|
||||
SandboxClientError, # Base exception for all sandbox errors
|
||||
ResourceCreationError, # Resource provisioning failed (check resource_type, error_type)
|
||||
ResourceNotFoundError, # Resource doesn't exist (check resource_type)
|
||||
ResourceTimeoutError, # Operation timed out (check resource_type)
|
||||
SandboxNotReadyError, # Sandbox not ready for operations yet
|
||||
SandboxConnectionError, # Network/WebSocket error
|
||||
CommandTimeoutError, # Command exceeded its timeout (extends SandboxOperationError)
|
||||
QuotaExceededError, # Quota limit reached
|
||||
)
|
||||
|
||||
try:
|
||||
with client.sandbox(template_name="my-sandbox") as sb:
|
||||
result = sb.run("sleep 999", timeout=10)
|
||||
except CommandTimeoutError as e:
|
||||
print(f"Command timed out: {e}")
|
||||
except ResourceCreationError as e:
|
||||
print(f"{e.resource_type} creation failed: {e}")
|
||||
except ResourceNotFoundError as e:
|
||||
print(f"{e.resource_type} not found: {e}")
|
||||
except ResourceTimeoutError as e:
|
||||
print(f"Timeout waiting for {e.resource_type}: {e}")
|
||||
except SandboxConnectionError as e:
|
||||
print(f"Connection error: {e}")
|
||||
except SandboxClientError as e:
|
||||
print(f"Error: {e}")
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
### SandboxClient
|
||||
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| `sandbox(template_name, ...)` | Create a sandbox (auto-deleted on context exit) |
|
||||
| `create_sandbox(template_name, *, wait_for_ready=True, ...)` | Create a sandbox (requires explicit delete). Pass `wait_for_ready=False` for async creation. |
|
||||
| `get_sandbox(name)` | Get an existing sandbox by name |
|
||||
| `get_sandbox_status(name)` | Get lightweight provisioning status (`ResourceStatus`) |
|
||||
| `wait_for_sandbox(name, *, timeout=120, poll_interval=1.0)` | Poll until sandbox is ready or failed |
|
||||
| `list_sandboxes()` | List all sandboxes |
|
||||
| `update_sandbox(name, *, new_name)` | Update a sandbox's display name |
|
||||
| `delete_sandbox(name)` | Delete a sandbox |
|
||||
| `create_template(name, image, ...)` | Create a template |
|
||||
| `list_templates()` | List all templates |
|
||||
| `get_template(name)` | Get template by name |
|
||||
| `update_template(name, *, new_name)` | Update a template's display name |
|
||||
| `delete_template(name)` | Delete a template |
|
||||
| `create_volume(name, size)` | Create a persistent volume |
|
||||
| `list_volumes()` | List all volumes |
|
||||
| `update_volume(name, *, new_name, size)` | Update a volume's name or size |
|
||||
| `delete_volume(name)` | Delete a volume |
|
||||
| `create_pool(name, template_name, replicas)` | Create a pool |
|
||||
| `list_pools()` | List all pools |
|
||||
| `update_pool(name, *, replicas, new_name)` | Update pool replicas or name |
|
||||
| `delete_pool(name)` | Delete a pool |
|
||||
|
||||
### Sandbox
|
||||
|
||||
| Property | Description |
|
||||
|----------|-------------|
|
||||
| `name` | Display name |
|
||||
| `template_name` | Template used to create this sandbox |
|
||||
| `status` | Lifecycle status: `"provisioning"`, `"ready"`, or `"failed"` |
|
||||
| `status_message` | Human-readable details when status is `"failed"`, `None` otherwise |
|
||||
| `dataplane_url` | URL for runtime operations (only functional when status is `"ready"`) |
|
||||
| `id` | Unique identifier (UUID) |
|
||||
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| `run(command, *, timeout=60, on_stdout=None, on_stderr=None, wait=True)` | Execute a shell command. Returns `ExecutionResult` or `CommandHandle` (when `wait=False`). |
|
||||
| `reconnect(command_id, *, stdout_offset=0, stderr_offset=0)` | Reconnect to a running command. Returns `CommandHandle`. |
|
||||
| `write(path, content)` | Write file (str or bytes) |
|
||||
| `read(path)` | Read file (returns bytes) |
|
||||
|
||||
### ExecutionResult
|
||||
|
||||
| Property | Description |
|
||||
|----------|-------------|
|
||||
| `stdout` | Standard output (str) |
|
||||
| `stderr` | Standard error (str) |
|
||||
| `exit_code` | Exit code (int) |
|
||||
| `success` | True if exit_code == 0 |
|
||||
|
||||
### ResourceStatus
|
||||
|
||||
Returned by `client.get_sandbox_status()`.
|
||||
|
||||
| Property | Description |
|
||||
|----------|-------------|
|
||||
| `status` | Lifecycle status: `"provisioning"`, `"ready"`, or `"failed"` |
|
||||
| `status_message` | Human-readable details when `"failed"`, `None` otherwise |
|
||||
|
||||
### CommandHandle
|
||||
|
||||
Returned by `sb.run(wait=False)`. Iterable, yielding `OutputChunk` objects.
|
||||
|
||||
| Property / Method | Description |
|
||||
|-------------------|-------------|
|
||||
| `command_id` | Server-assigned command ID |
|
||||
| `pid` | Process ID on the sandbox |
|
||||
| `result` | Final `ExecutionResult` (blocks until complete) |
|
||||
| `kill()` | Send SIGKILL to the running command |
|
||||
| `send_input(data)` | Write string data to the command's stdin |
|
||||
| `reconnect()` | Reconnect from last known offsets |
|
||||
|
||||
### OutputChunk
|
||||
|
||||
| Property | Description |
|
||||
|----------|-------------|
|
||||
| `stream` | `"stdout"` or `"stderr"` |
|
||||
| `data` | Text content of this chunk (str) |
|
||||
| `offset` | Byte offset within the stream (int) |
|
||||
110
venv/Lib/site-packages/langsmith/sandbox/__init__.py
Normal file
110
venv/Lib/site-packages/langsmith/sandbox/__init__.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""LangSmith Sandbox Module.
|
||||
|
||||
This module provides sandboxed code execution capabilities through the
|
||||
LangSmith Sandbox API.
|
||||
|
||||
Example:
|
||||
from langsmith.sandbox import SandboxClient
|
||||
|
||||
# Uses LANGSMITH_ENDPOINT and LANGSMITH_API_KEY from environment
|
||||
client = SandboxClient()
|
||||
|
||||
with client.sandbox(template_name="python-sandbox") as sb:
|
||||
result = sb.run("python --version")
|
||||
print(result.stdout)
|
||||
|
||||
# Or async:
|
||||
from langsmith.sandbox import AsyncSandboxClient
|
||||
|
||||
async with AsyncSandboxClient() as client:
|
||||
async with await client.sandbox(template_name="python-sandbox") as sb:
|
||||
result = await sb.run("python --version")
|
||||
print(result.stdout)
|
||||
"""
|
||||
|
||||
from langsmith.sandbox._async_client import AsyncSandboxClient
|
||||
from langsmith.sandbox._async_sandbox import AsyncSandbox
|
||||
from langsmith.sandbox._client import SandboxClient
|
||||
from langsmith.sandbox._exceptions import (
|
||||
CommandTimeoutError,
|
||||
DataplaneNotConfiguredError,
|
||||
QuotaExceededError,
|
||||
ResourceAlreadyExistsError,
|
||||
ResourceCreationError,
|
||||
ResourceInUseError,
|
||||
ResourceNameConflictError,
|
||||
ResourceNotFoundError,
|
||||
ResourceTimeoutError,
|
||||
SandboxAPIError,
|
||||
SandboxAuthenticationError,
|
||||
SandboxClientError,
|
||||
SandboxConnectionError,
|
||||
SandboxNotReadyError,
|
||||
SandboxOperationError,
|
||||
SandboxServerReloadError,
|
||||
ValidationError,
|
||||
)
|
||||
from langsmith.sandbox._models import (
|
||||
AsyncCommandHandle,
|
||||
CommandHandle,
|
||||
ExecutionResult,
|
||||
OutputChunk,
|
||||
Pool,
|
||||
ResourceSpec,
|
||||
ResourceStatus,
|
||||
SandboxTemplate,
|
||||
Volume,
|
||||
VolumeMountSpec,
|
||||
)
|
||||
from langsmith.sandbox._sandbox import Sandbox
|
||||
|
||||
__all__ = [
|
||||
# Main classes
|
||||
"SandboxClient",
|
||||
"AsyncSandboxClient",
|
||||
"Sandbox",
|
||||
"AsyncSandbox",
|
||||
# Models
|
||||
"SandboxTemplate",
|
||||
"ResourceStatus",
|
||||
"ResourceSpec",
|
||||
"ExecutionResult",
|
||||
"Volume",
|
||||
"VolumeMountSpec",
|
||||
"Pool",
|
||||
# WebSocket streaming models
|
||||
"CommandHandle",
|
||||
"AsyncCommandHandle",
|
||||
"OutputChunk",
|
||||
# Base and connection errors
|
||||
"SandboxClientError",
|
||||
"SandboxAPIError",
|
||||
"SandboxAuthenticationError",
|
||||
"SandboxConnectionError",
|
||||
"SandboxServerReloadError",
|
||||
# Resource errors (type-based with resource_type attribute)
|
||||
"ResourceCreationError",
|
||||
"ResourceNotFoundError",
|
||||
"ResourceTimeoutError",
|
||||
"ResourceInUseError",
|
||||
"ResourceAlreadyExistsError",
|
||||
"ResourceNameConflictError",
|
||||
# Validation and quota errors
|
||||
"ValidationError",
|
||||
"QuotaExceededError",
|
||||
# Sandbox-specific errors
|
||||
"SandboxNotReadyError",
|
||||
"SandboxOperationError",
|
||||
"CommandTimeoutError",
|
||||
"DataplaneNotConfiguredError",
|
||||
]
|
||||
|
||||
# Emit warning on import
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"langsmith.sandbox is in alpha. "
|
||||
"This feature is experimental, and breaking changes are expected.",
|
||||
FutureWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
950
venv/Lib/site-packages/langsmith/sandbox/_async_client.py
Normal file
950
venv/Lib/site-packages/langsmith/sandbox/_async_client.py
Normal file
@@ -0,0 +1,950 @@
|
||||
"""Async SandboxClient class for interacting with the sandbox server API."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from langsmith import utils as ls_utils
|
||||
from langsmith.sandbox._async_sandbox import AsyncSandbox
|
||||
from langsmith.sandbox._exceptions import (
|
||||
ResourceCreationError,
|
||||
ResourceInUseError,
|
||||
ResourceNameConflictError,
|
||||
ResourceNotFoundError,
|
||||
ResourceTimeoutError,
|
||||
SandboxAPIError,
|
||||
ValidationError,
|
||||
)
|
||||
from langsmith.sandbox._helpers import (
|
||||
handle_client_http_error,
|
||||
handle_pool_error,
|
||||
handle_sandbox_creation_error,
|
||||
handle_volume_creation_error,
|
||||
parse_error_response,
|
||||
)
|
||||
from langsmith.sandbox._models import (
|
||||
Pool,
|
||||
ResourceStatus,
|
||||
SandboxTemplate,
|
||||
Volume,
|
||||
VolumeMountSpec,
|
||||
)
|
||||
from langsmith.sandbox._transport import AsyncRetryTransport
|
||||
|
||||
|
||||
def _get_default_api_endpoint() -> str:
|
||||
"""Get the default sandbox API endpoint from environment.
|
||||
|
||||
Derives the endpoint from LANGSMITH_ENDPOINT (or LANGCHAIN_ENDPOINT).
|
||||
"""
|
||||
base = ls_utils.get_env_var("ENDPOINT", default="https://api.smith.langchain.com")
|
||||
return f"{base.rstrip('/')}/v2/sandboxes"
|
||||
|
||||
|
||||
def _get_default_api_key() -> Optional[str]:
|
||||
"""Get the default API key from environment."""
|
||||
return ls_utils.get_env_var("API_KEY")
|
||||
|
||||
|
||||
class AsyncSandboxClient:
|
||||
"""Async client for interacting with the Sandbox Server API.
|
||||
|
||||
This client provides an async interface for managing sandboxes and templates.
|
||||
|
||||
Example:
|
||||
# Uses LANGSMITH_ENDPOINT and LANGSMITH_API_KEY from environment
|
||||
async with AsyncSandboxClient() as client:
|
||||
# Create a sandbox and run commands
|
||||
async with await client.sandbox(template_name="python-sandbox") as sandbox:
|
||||
result = await sandbox.run("python --version")
|
||||
print(result.stdout)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_endpoint: Optional[str] = None,
|
||||
timeout: float = 10.0,
|
||||
api_key: Optional[str] = None,
|
||||
max_retries: int = 3,
|
||||
):
|
||||
"""Initialize the AsyncSandboxClient.
|
||||
|
||||
Args:
|
||||
api_endpoint: Full URL of the sandbox API endpoint. If not provided,
|
||||
derived from LANGSMITH_ENDPOINT environment variable.
|
||||
timeout: Default HTTP timeout in seconds.
|
||||
api_key: API key for authentication. If not provided, uses
|
||||
LANGSMITH_API_KEY environment variable.
|
||||
max_retries: Maximum number of retries for transient errors (502, 503,
|
||||
504), rate limits (429), and connection failures. Set to 0
|
||||
to disable retries. Default: 3.
|
||||
"""
|
||||
self._base_url = (api_endpoint or _get_default_api_endpoint()).rstrip("/")
|
||||
resolved_api_key = api_key or _get_default_api_key()
|
||||
self._api_key = resolved_api_key
|
||||
headers: dict[str, str] = {}
|
||||
if resolved_api_key:
|
||||
headers["X-Api-Key"] = resolved_api_key
|
||||
transport = AsyncRetryTransport(max_retries=max_retries)
|
||||
self._http = httpx.AsyncClient(
|
||||
transport=transport, timeout=timeout, headers=headers
|
||||
)
|
||||
|
||||
async def aclose(self) -> None:
|
||||
"""Close the async HTTP client."""
|
||||
await self._http.aclose()
|
||||
|
||||
def __del__(self) -> None:
|
||||
"""Best-effort cleanup of the async HTTP client on garbage collection.
|
||||
|
||||
If an event loop is running, schedules ``aclose()`` as a task.
|
||||
Otherwise the underlying sockets will be closed by the GC.
|
||||
For deterministic cleanup, use ``async with`` or ``await aclose()``.
|
||||
"""
|
||||
try:
|
||||
if not self._http.is_closed:
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
if not loop.is_closed():
|
||||
loop.create_task(self.aclose())
|
||||
except RuntimeError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def __aenter__(self) -> AsyncSandboxClient:
|
||||
"""Enter async context manager."""
|
||||
return self
|
||||
|
||||
async def __aexit__(
|
||||
self,
|
||||
exc_type: Optional[type],
|
||||
exc_val: Optional[BaseException],
|
||||
exc_tb: Optional[Any],
|
||||
) -> None:
|
||||
"""Exit async context manager."""
|
||||
await self.aclose()
|
||||
|
||||
# ========================================================================
|
||||
# Volume Operations
|
||||
# ========================================================================
|
||||
|
||||
async def create_volume(
|
||||
self,
|
||||
name: str,
|
||||
size: str,
|
||||
*,
|
||||
timeout: int = 60,
|
||||
) -> Volume:
|
||||
"""Create a new persistent volume.
|
||||
|
||||
Creates a persistent storage volume that can be referenced in templates.
|
||||
|
||||
Args:
|
||||
name: Volume name.
|
||||
size: Storage size (e.g., "1Gi", "10Gi").
|
||||
timeout: Timeout in seconds when waiting for ready (min: 5, max: 300).
|
||||
|
||||
Returns:
|
||||
Created Volume.
|
||||
|
||||
Raises:
|
||||
VolumeProvisioningError: If volume provisioning fails.
|
||||
ResourceTimeoutError: If volume doesn't become ready within timeout.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/volumes"
|
||||
|
||||
payload = {
|
||||
"name": name,
|
||||
"size": size,
|
||||
"wait_for_ready": True,
|
||||
"timeout": timeout,
|
||||
}
|
||||
|
||||
try:
|
||||
response = await self._http.post(url, json=payload, timeout=timeout + 30)
|
||||
response.raise_for_status()
|
||||
return Volume.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
handle_volume_creation_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def get_volume(self, name: str) -> Volume:
|
||||
"""Get a volume by name.
|
||||
|
||||
Args:
|
||||
name: Volume name.
|
||||
|
||||
Returns:
|
||||
Volume.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If volume not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/volumes/{name}"
|
||||
|
||||
try:
|
||||
response = await self._http.get(url)
|
||||
response.raise_for_status()
|
||||
return Volume.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Volume '{name}' not found", resource_type="volume"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def list_volumes(self) -> list[Volume]:
|
||||
"""List all volumes.
|
||||
|
||||
Returns:
|
||||
List of Volumes.
|
||||
"""
|
||||
url = f"{self._base_url}/volumes"
|
||||
|
||||
try:
|
||||
response = await self._http.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return [Volume.from_dict(v) for v in data.get("volumes", [])]
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise SandboxAPIError(
|
||||
f"API endpoint not found: {url}. "
|
||||
f"Check that api_endpoint is correct."
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def delete_volume(self, name: str) -> None:
|
||||
"""Delete a volume.
|
||||
|
||||
Args:
|
||||
name: Volume name.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If volume not found.
|
||||
ResourceInUseError: If volume is referenced by templates.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/volumes/{name}"
|
||||
|
||||
try:
|
||||
response = await self._http.delete(url)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Volume '{name}' not found", resource_type="volume"
|
||||
) from e
|
||||
if e.response.status_code == 409:
|
||||
data = parse_error_response(e)
|
||||
raise ResourceInUseError(data["message"], resource_type="volume") from e
|
||||
handle_client_http_error(e)
|
||||
|
||||
async def update_volume(
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
new_name: Optional[str] = None,
|
||||
size: Optional[str] = None,
|
||||
) -> Volume:
|
||||
"""Update a volume's name and/or size.
|
||||
|
||||
You can update the display name, size, or both in a single request.
|
||||
Only storage size increases are allowed (storage backend limitation).
|
||||
|
||||
Args:
|
||||
name: Current volume name.
|
||||
new_name: New display name (optional).
|
||||
size: New storage size (must be >= current size). Optional.
|
||||
|
||||
Returns:
|
||||
Updated Volume.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If volume not found.
|
||||
VolumeResizeError: If storage decrease attempted.
|
||||
ResourceNameConflictError: If new_name is already in use.
|
||||
SandboxQuotaExceededError: If storage quota would be exceeded.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/volumes/{name}"
|
||||
payload: dict[str, Any] = {}
|
||||
if new_name is not None:
|
||||
payload["name"] = new_name
|
||||
if size is not None:
|
||||
payload["size"] = size
|
||||
|
||||
if not payload:
|
||||
# Nothing to update, just return the current volume
|
||||
return await self.get_volume(name)
|
||||
|
||||
try:
|
||||
response = await self._http.patch(url, json=payload)
|
||||
response.raise_for_status()
|
||||
return Volume.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Volume '{name}' not found", resource_type="volume"
|
||||
) from e
|
||||
if e.response.status_code == 400:
|
||||
data = parse_error_response(e)
|
||||
raise ValidationError(data["message"], error_type="VolumeResize") from e
|
||||
if e.response.status_code == 409:
|
||||
data = parse_error_response(e)
|
||||
raise ResourceNameConflictError(
|
||||
data["message"], resource_type="volume"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
# ========================================================================
|
||||
# Template Operations
|
||||
# ========================================================================
|
||||
|
||||
async def create_template(
|
||||
self,
|
||||
name: str,
|
||||
image: str,
|
||||
*,
|
||||
cpu: str = "500m",
|
||||
memory: str = "512Mi",
|
||||
storage: Optional[str] = None,
|
||||
volume_mounts: Optional[list[VolumeMountSpec]] = None,
|
||||
) -> SandboxTemplate:
|
||||
"""Create a new SandboxTemplate.
|
||||
|
||||
Only the container image, resource limits, and volume mounts can be
|
||||
configured. All other container details are handled by the server.
|
||||
|
||||
Args:
|
||||
name: Template name.
|
||||
image: Container image (e.g., "python:3.12-slim").
|
||||
cpu: CPU limit (e.g., "500m", "1", "2"). Default: "500m".
|
||||
memory: Memory limit (e.g., "256Mi", "1Gi"). Default: "512Mi".
|
||||
storage: Ephemeral storage limit (e.g., "1Gi"). Optional.
|
||||
volume_mounts: List of volumes to mount in the sandbox. Optional.
|
||||
|
||||
Returns:
|
||||
Created SandboxTemplate.
|
||||
|
||||
Raises:
|
||||
SandboxClientError: If creation fails.
|
||||
"""
|
||||
url = f"{self._base_url}/templates"
|
||||
|
||||
payload: dict[str, Any] = {
|
||||
"name": name,
|
||||
"image": image,
|
||||
"resources": {
|
||||
"cpu": cpu,
|
||||
"memory": memory,
|
||||
},
|
||||
}
|
||||
if storage:
|
||||
payload["resources"]["storage"] = storage
|
||||
if volume_mounts:
|
||||
payload["volume_mounts"] = [
|
||||
{"volume_name": vm.volume_name, "mount_path": vm.mount_path}
|
||||
for vm in volume_mounts
|
||||
]
|
||||
|
||||
try:
|
||||
response = await self._http.post(url, json=payload)
|
||||
response.raise_for_status()
|
||||
return SandboxTemplate.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def get_template(self, name: str) -> SandboxTemplate:
|
||||
"""Get a SandboxTemplate by name.
|
||||
|
||||
Args:
|
||||
name: Template name.
|
||||
|
||||
Returns:
|
||||
SandboxTemplate.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If template not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/templates/{name}"
|
||||
|
||||
try:
|
||||
response = await self._http.get(url)
|
||||
response.raise_for_status()
|
||||
return SandboxTemplate.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Template '{name}' not found", resource_type="template"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def list_templates(self) -> list[SandboxTemplate]:
|
||||
"""List all SandboxTemplates.
|
||||
|
||||
Returns:
|
||||
List of SandboxTemplates.
|
||||
"""
|
||||
url = f"{self._base_url}/templates"
|
||||
|
||||
try:
|
||||
response = await self._http.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return [SandboxTemplate.from_dict(t) for t in data.get("templates", [])]
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise SandboxAPIError(
|
||||
f"API endpoint not found: {url}. "
|
||||
f"Check that api_endpoint is correct."
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def update_template(self, name: str, *, new_name: str) -> SandboxTemplate:
|
||||
"""Update a template's display name.
|
||||
|
||||
Args:
|
||||
name: Current template name.
|
||||
new_name: New display name.
|
||||
|
||||
Returns:
|
||||
Updated SandboxTemplate.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If template not found.
|
||||
ResourceNameConflictError: If new_name is already in use.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/templates/{name}"
|
||||
payload = {"name": new_name}
|
||||
|
||||
try:
|
||||
response = await self._http.patch(url, json=payload)
|
||||
response.raise_for_status()
|
||||
return SandboxTemplate.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Template '{name}' not found", resource_type="template"
|
||||
) from e
|
||||
if e.response.status_code == 409:
|
||||
data = parse_error_response(e)
|
||||
raise ResourceNameConflictError(
|
||||
data["message"], resource_type="template"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def delete_template(self, name: str) -> None:
|
||||
"""Delete a SandboxTemplate.
|
||||
|
||||
Args:
|
||||
name: Template name.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If template not found.
|
||||
ResourceInUseError: If template is referenced by sandboxes or pools.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/templates/{name}"
|
||||
|
||||
try:
|
||||
response = await self._http.delete(url)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Template '{name}' not found", resource_type="template"
|
||||
) from e
|
||||
if e.response.status_code == 409:
|
||||
data = parse_error_response(e)
|
||||
raise ResourceInUseError(
|
||||
data["message"], resource_type="template"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
|
||||
# ========================================================================
|
||||
# Pool Operations
|
||||
# ========================================================================
|
||||
|
||||
async def create_pool(
|
||||
self,
|
||||
name: str,
|
||||
template_name: str,
|
||||
replicas: int,
|
||||
*,
|
||||
timeout: int = 30,
|
||||
) -> Pool:
|
||||
"""Create a new Sandbox Pool.
|
||||
|
||||
Pools pre-provision sandboxes from a template for faster startup.
|
||||
|
||||
Args:
|
||||
name: Pool name (lowercase letters, numbers, hyphens; max 63 chars).
|
||||
template_name: Name of the SandboxTemplate to use (no volume mounts).
|
||||
replicas: Number of sandboxes to pre-provision (1-100).
|
||||
timeout: Timeout in seconds when waiting for ready (10-600).
|
||||
|
||||
Returns:
|
||||
Created Pool.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If template not found.
|
||||
ValidationError: If template has volumes attached.
|
||||
ResourceAlreadyExistsError: If pool with this name already exists.
|
||||
ResourceTimeoutError: If pool doesn't reach ready state within timeout.
|
||||
SandboxQuotaExceededError: If organization quota is exceeded.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/pools"
|
||||
|
||||
payload: dict[str, Any] = {
|
||||
"name": name,
|
||||
"template_name": template_name,
|
||||
"replicas": replicas,
|
||||
"wait_for_ready": True,
|
||||
"timeout": timeout,
|
||||
}
|
||||
|
||||
try:
|
||||
http_timeout = timeout + 30
|
||||
response = await self._http.post(url, json=payload, timeout=http_timeout)
|
||||
response.raise_for_status()
|
||||
return Pool.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
handle_pool_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def get_pool(self, name: str) -> Pool:
|
||||
"""Get a Pool by name.
|
||||
|
||||
Args:
|
||||
name: Pool name.
|
||||
|
||||
Returns:
|
||||
Pool.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If pool not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/pools/{name}"
|
||||
|
||||
try:
|
||||
response = await self._http.get(url)
|
||||
response.raise_for_status()
|
||||
return Pool.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Pool '{name}' not found", resource_type="pool"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def list_pools(self) -> list[Pool]:
|
||||
"""List all Pools.
|
||||
|
||||
Returns:
|
||||
List of Pools.
|
||||
"""
|
||||
url = f"{self._base_url}/pools"
|
||||
|
||||
try:
|
||||
response = await self._http.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return [Pool.from_dict(p) for p in data.get("pools", [])]
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise SandboxAPIError(
|
||||
f"API endpoint not found: {url}. "
|
||||
f"Check that api_endpoint is correct."
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def update_pool(
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
new_name: Optional[str] = None,
|
||||
replicas: Optional[int] = None,
|
||||
) -> Pool:
|
||||
"""Update a Pool's name and/or replica count.
|
||||
|
||||
You can update the display name, replica count, or both.
|
||||
The template reference cannot be changed after creation.
|
||||
|
||||
Args:
|
||||
name: Current pool name.
|
||||
new_name: New display name (optional).
|
||||
replicas: New number of replicas (0-100). Set to 0 to pause.
|
||||
|
||||
Returns:
|
||||
Updated Pool.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If pool not found.
|
||||
ValidationError: If template was deleted.
|
||||
ResourceNameConflictError: If new_name is already in use.
|
||||
SandboxQuotaExceededError: If quota exceeded when scaling up.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/pools/{name}"
|
||||
|
||||
payload: dict[str, Any] = {}
|
||||
if new_name is not None:
|
||||
payload["name"] = new_name
|
||||
if replicas is not None:
|
||||
payload["replicas"] = replicas
|
||||
|
||||
if not payload:
|
||||
# Nothing to update, just return the current pool
|
||||
return await self.get_pool(name)
|
||||
|
||||
try:
|
||||
response = await self._http.patch(url, json=payload)
|
||||
response.raise_for_status()
|
||||
return Pool.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Pool '{name}' not found", resource_type="pool"
|
||||
) from e
|
||||
if e.response.status_code == 409:
|
||||
data = parse_error_response(e)
|
||||
raise ResourceNameConflictError(
|
||||
data["message"], resource_type="pool"
|
||||
) from e
|
||||
handle_pool_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def delete_pool(self, name: str) -> None:
|
||||
"""Delete a Pool.
|
||||
|
||||
This will terminate all sandboxes in the pool.
|
||||
|
||||
Args:
|
||||
name: Pool name.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If pool not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/pools/{name}"
|
||||
|
||||
try:
|
||||
response = await self._http.delete(url)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Pool '{name}' not found", resource_type="pool"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
|
||||
# ========================================================================
|
||||
# Sandbox Operations
|
||||
# ========================================================================
|
||||
|
||||
async def sandbox(
|
||||
self,
|
||||
template_name: str,
|
||||
*,
|
||||
name: Optional[str] = None,
|
||||
timeout: int = 30,
|
||||
) -> AsyncSandbox:
|
||||
"""Create a sandbox and return an AsyncSandbox instance.
|
||||
|
||||
This is the primary method for creating sandboxes. Use it as an
|
||||
async context manager for automatic cleanup:
|
||||
|
||||
async with await client.sandbox(template_name="my-template") as sandbox:
|
||||
result = await sandbox.run("echo hello")
|
||||
|
||||
The sandbox is automatically deleted when exiting the context manager.
|
||||
For sandboxes with manual lifecycle management, use create_sandbox().
|
||||
|
||||
Args:
|
||||
template_name: Name of the SandboxTemplate to use.
|
||||
name: Optional sandbox name (auto-generated if not provided).
|
||||
timeout: Timeout in seconds when waiting for ready.
|
||||
|
||||
Returns:
|
||||
AsyncSandbox instance.
|
||||
|
||||
Raises:
|
||||
ResourceTimeoutError: If timeout waiting for sandbox to be ready.
|
||||
ResourceCreationError: If sandbox creation fails.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
sb = await self.create_sandbox(
|
||||
template_name=template_name,
|
||||
name=name,
|
||||
timeout=timeout,
|
||||
)
|
||||
sb._auto_delete = True
|
||||
return sb
|
||||
|
||||
async def create_sandbox(
|
||||
self,
|
||||
template_name: str,
|
||||
*,
|
||||
name: Optional[str] = None,
|
||||
timeout: int = 30,
|
||||
wait_for_ready: bool = True,
|
||||
) -> AsyncSandbox:
|
||||
"""Create a new Sandbox.
|
||||
|
||||
The sandbox is NOT automatically deleted. Use delete_sandbox() for cleanup,
|
||||
or use sandbox() for automatic cleanup with a context manager.
|
||||
|
||||
Args:
|
||||
template_name: Name of the SandboxTemplate to use.
|
||||
name: Optional sandbox name (auto-generated if not provided).
|
||||
timeout: Timeout in seconds when waiting for ready (only used when
|
||||
wait_for_ready=True).
|
||||
wait_for_ready: If True (default), block until sandbox is ready.
|
||||
If False, return immediately with status "provisioning". Use
|
||||
get_sandbox_status() or wait_for_sandbox() to poll for readiness.
|
||||
|
||||
Returns:
|
||||
Created AsyncSandbox. When wait_for_ready=False, the sandbox will have
|
||||
status="provisioning" and cannot be used for operations until ready.
|
||||
|
||||
Raises:
|
||||
ResourceTimeoutError: If timeout waiting for sandbox to be ready.
|
||||
ResourceCreationError: If sandbox creation fails.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/boxes"
|
||||
|
||||
payload: dict[str, Any] = {
|
||||
"template_name": template_name,
|
||||
"wait_for_ready": wait_for_ready,
|
||||
}
|
||||
if wait_for_ready:
|
||||
payload["timeout"] = timeout
|
||||
if name:
|
||||
payload["name"] = name
|
||||
|
||||
http_timeout = (timeout + 30) if wait_for_ready else 30
|
||||
|
||||
try:
|
||||
response = await self._http.post(url, json=payload, timeout=http_timeout)
|
||||
response.raise_for_status()
|
||||
return AsyncSandbox.from_dict(
|
||||
response.json(), client=self, auto_delete=False
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
handle_sandbox_creation_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def get_sandbox(self, name: str) -> AsyncSandbox:
|
||||
"""Get a Sandbox by name.
|
||||
|
||||
The sandbox is NOT automatically deleted. Use delete_sandbox() for cleanup.
|
||||
|
||||
Args:
|
||||
name: Sandbox name.
|
||||
|
||||
Returns:
|
||||
AsyncSandbox.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If sandbox not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/boxes/{name}"
|
||||
|
||||
try:
|
||||
response = await self._http.get(url)
|
||||
response.raise_for_status()
|
||||
return AsyncSandbox.from_dict(
|
||||
response.json(), client=self, auto_delete=False
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Sandbox '{name}' not found", resource_type="sandbox"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def list_sandboxes(self) -> list[AsyncSandbox]:
|
||||
"""List all Sandboxes.
|
||||
|
||||
Returns:
|
||||
List of AsyncSandboxes.
|
||||
"""
|
||||
url = f"{self._base_url}/boxes"
|
||||
|
||||
try:
|
||||
response = await self._http.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return [
|
||||
AsyncSandbox.from_dict(c, client=self, auto_delete=False)
|
||||
for c in data.get("sandboxes", [])
|
||||
]
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise SandboxAPIError(
|
||||
f"API endpoint not found: {url}. "
|
||||
f"Check that api_endpoint is correct."
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def update_sandbox(self, name: str, *, new_name: str) -> AsyncSandbox:
|
||||
"""Update a sandbox's display name.
|
||||
|
||||
Args:
|
||||
name: Current sandbox name.
|
||||
new_name: New display name.
|
||||
|
||||
Returns:
|
||||
Updated AsyncSandbox.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If sandbox not found.
|
||||
ResourceNameConflictError: If new_name is already in use.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/boxes/{name}"
|
||||
payload = {"name": new_name}
|
||||
|
||||
try:
|
||||
response = await self._http.patch(url, json=payload)
|
||||
response.raise_for_status()
|
||||
return AsyncSandbox.from_dict(
|
||||
response.json(), client=self, auto_delete=False
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Sandbox '{name}' not found", resource_type="sandbox"
|
||||
) from e
|
||||
if e.response.status_code == 409:
|
||||
raise ResourceNameConflictError(
|
||||
f"Sandbox name '{new_name}' already in use",
|
||||
resource_type="sandbox",
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def delete_sandbox(self, name: str) -> None:
|
||||
"""Delete a Sandbox.
|
||||
|
||||
Args:
|
||||
name: Sandbox name.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If sandbox not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/boxes/{name}"
|
||||
|
||||
try:
|
||||
response = await self._http.delete(url)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Sandbox '{name}' not found", resource_type="sandbox"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
|
||||
async def get_sandbox_status(self, name: str) -> ResourceStatus:
|
||||
"""Get the provisioning status of a sandbox.
|
||||
|
||||
This is a lightweight endpoint designed for high-frequency polling
|
||||
during sandbox provisioning. It returns only the status fields
|
||||
without full sandbox data.
|
||||
|
||||
Args:
|
||||
name: Sandbox name.
|
||||
|
||||
Returns:
|
||||
ResourceStatus with status and status_message.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If sandbox not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/boxes/{name}/status"
|
||||
|
||||
try:
|
||||
response = await self._http.get(url)
|
||||
response.raise_for_status()
|
||||
return ResourceStatus.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Sandbox '{name}' not found", resource_type="sandbox"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def wait_for_sandbox(
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
timeout: int = 120,
|
||||
poll_interval: float = 1.0,
|
||||
) -> AsyncSandbox:
|
||||
"""Poll until a sandbox reaches "ready" or "failed" status.
|
||||
|
||||
Uses the lightweight status endpoint for polling, then fetches the
|
||||
full sandbox data once ready.
|
||||
|
||||
Args:
|
||||
name: Sandbox name.
|
||||
timeout: Maximum time to wait in seconds.
|
||||
poll_interval: Time between status checks in seconds.
|
||||
|
||||
Returns:
|
||||
AsyncSandbox in "ready" status.
|
||||
|
||||
Raises:
|
||||
ResourceCreationError: If sandbox status becomes "failed".
|
||||
ResourceTimeoutError: If timeout expires while still "provisioning".
|
||||
ResourceNotFoundError: If sandbox not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
import time
|
||||
|
||||
deadline = time.monotonic() + timeout
|
||||
while True:
|
||||
status = await self.get_sandbox_status(name)
|
||||
if status.status == "ready":
|
||||
return await self.get_sandbox(name)
|
||||
if status.status == "failed":
|
||||
raise ResourceCreationError(
|
||||
status.status_message or "Sandbox provisioning failed",
|
||||
resource_type="sandbox",
|
||||
)
|
||||
remaining = deadline - time.monotonic()
|
||||
if remaining <= 0:
|
||||
raise ResourceTimeoutError(
|
||||
f"Sandbox '{name}' not ready after {timeout}s",
|
||||
resource_type="sandbox",
|
||||
last_status=status.status,
|
||||
)
|
||||
await asyncio.sleep(min(poll_interval, remaining))
|
||||
446
venv/Lib/site-packages/langsmith/sandbox/_async_sandbox.py
Normal file
446
venv/Lib/site-packages/langsmith/sandbox/_async_sandbox.py
Normal file
@@ -0,0 +1,446 @@
|
||||
"""AsyncSandbox class for async sandbox operations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union, overload
|
||||
|
||||
import httpx
|
||||
|
||||
from langsmith.sandbox._exceptions import (
|
||||
DataplaneNotConfiguredError,
|
||||
ResourceNotFoundError,
|
||||
SandboxConnectionError,
|
||||
SandboxNotReadyError,
|
||||
)
|
||||
from langsmith.sandbox._helpers import handle_sandbox_http_error
|
||||
from langsmith.sandbox._models import (
|
||||
AsyncCommandHandle,
|
||||
ExecutionResult,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langsmith.sandbox._async_client import AsyncSandboxClient
|
||||
|
||||
|
||||
@dataclass
|
||||
class AsyncSandbox:
|
||||
"""Represents an active sandbox for running commands and file operations async.
|
||||
|
||||
This class is typically obtained from AsyncSandboxClient.sandbox() and supports
|
||||
the async context manager protocol for automatic cleanup.
|
||||
|
||||
Attributes:
|
||||
name: Display name (can be updated).
|
||||
template_name: Name of the template used to create this sandbox.
|
||||
dataplane_url: URL for data plane operations (file I/O, command execution).
|
||||
Only functional when status is "ready".
|
||||
id: Unique identifier (UUID). Remains constant even if name changes.
|
||||
May be None for resources created before ID support was added.
|
||||
status: Sandbox lifecycle status. One of "provisioning", "ready", "failed".
|
||||
status_message: Human-readable details when status is "failed", None otherwise.
|
||||
created_at: Timestamp when the sandbox was created.
|
||||
updated_at: Timestamp when the sandbox was last updated.
|
||||
|
||||
Example:
|
||||
async with await client.sandbox(template_name="python-sandbox") as sandbox:
|
||||
result = await sandbox.run("python --version")
|
||||
print(result.stdout)
|
||||
"""
|
||||
|
||||
# Data fields (from API response)
|
||||
name: str
|
||||
template_name: str
|
||||
dataplane_url: Optional[str] = None
|
||||
id: Optional[str] = None
|
||||
status: str = "ready"
|
||||
status_message: Optional[str] = None
|
||||
created_at: Optional[str] = None
|
||||
updated_at: Optional[str] = None
|
||||
|
||||
# Internal fields (not from API)
|
||||
_client: AsyncSandboxClient = field(repr=False, default=None) # type: ignore
|
||||
_auto_delete: bool = field(repr=False, default=True)
|
||||
|
||||
@classmethod
|
||||
def from_dict(
|
||||
cls,
|
||||
data: dict[str, Any],
|
||||
client: AsyncSandboxClient,
|
||||
auto_delete: bool = True,
|
||||
) -> AsyncSandbox:
|
||||
"""Create an AsyncSandbox from API response dict.
|
||||
|
||||
Args:
|
||||
data: API response dictionary containing sandbox data.
|
||||
client: Parent AsyncSandboxClient for operations.
|
||||
auto_delete: Whether to delete the sandbox on context exit.
|
||||
|
||||
Returns:
|
||||
AsyncSandbox instance.
|
||||
"""
|
||||
return cls(
|
||||
name=data.get("name", ""),
|
||||
template_name=data.get("template_name", ""),
|
||||
dataplane_url=data.get("dataplane_url"),
|
||||
id=data.get("id"),
|
||||
status=data.get("status", "ready"),
|
||||
status_message=data.get("status_message"),
|
||||
created_at=data.get("created_at"),
|
||||
updated_at=data.get("updated_at"),
|
||||
_client=client,
|
||||
_auto_delete=auto_delete,
|
||||
)
|
||||
|
||||
async def __aenter__(self) -> AsyncSandbox:
|
||||
"""Enter async context manager."""
|
||||
return self
|
||||
|
||||
async def __aexit__(
|
||||
self,
|
||||
exc_type: Optional[type],
|
||||
exc_val: Optional[BaseException],
|
||||
exc_tb: Optional[Any],
|
||||
) -> None:
|
||||
"""Exit async context manager, optionally deleting the sandbox."""
|
||||
if self._auto_delete:
|
||||
try:
|
||||
await self._client.delete_sandbox(self.name)
|
||||
except Exception:
|
||||
# Don't raise on cleanup errors
|
||||
pass
|
||||
|
||||
def _require_dataplane_url(self) -> str:
|
||||
"""Validate and return the dataplane URL.
|
||||
|
||||
Returns:
|
||||
The dataplane URL.
|
||||
|
||||
Raises:
|
||||
SandboxNotReadyError: If sandbox status is not "ready".
|
||||
DataplaneNotConfiguredError: If dataplane_url is not configured.
|
||||
"""
|
||||
if self.status != "ready":
|
||||
raise SandboxNotReadyError(
|
||||
f"Sandbox '{self.name}' is not ready (status: {self.status}). "
|
||||
"Wait for status 'ready' before running operations."
|
||||
)
|
||||
if not self.dataplane_url:
|
||||
raise DataplaneNotConfiguredError(
|
||||
f"Sandbox '{self.name}' does not have a dataplane_url configured. "
|
||||
"Runtime operations require a dataplane URL."
|
||||
)
|
||||
return self.dataplane_url
|
||||
|
||||
@overload
|
||||
async def run(
|
||||
self,
|
||||
command: str,
|
||||
*,
|
||||
timeout: int = ...,
|
||||
env: Optional[dict[str, str]] = ...,
|
||||
cwd: Optional[str] = ...,
|
||||
shell: str = ...,
|
||||
on_stdout: Optional[Callable[[str], Any]] = ...,
|
||||
on_stderr: Optional[Callable[[str], Any]] = ...,
|
||||
wait: Literal[True] = ...,
|
||||
) -> ExecutionResult: ...
|
||||
|
||||
@overload
|
||||
async def run(
|
||||
self,
|
||||
command: str,
|
||||
*,
|
||||
timeout: int = ...,
|
||||
env: Optional[dict[str, str]] = ...,
|
||||
cwd: Optional[str] = ...,
|
||||
shell: str = ...,
|
||||
on_stdout: Optional[Callable[[str], Any]] = ...,
|
||||
on_stderr: Optional[Callable[[str], Any]] = ...,
|
||||
wait: Literal[False],
|
||||
) -> AsyncCommandHandle: ...
|
||||
|
||||
async def run(
|
||||
self,
|
||||
command: str,
|
||||
*,
|
||||
timeout: int = 60,
|
||||
env: Optional[dict[str, str]] = None,
|
||||
cwd: Optional[str] = None,
|
||||
shell: str = "/bin/bash",
|
||||
on_stdout: Optional[Callable[[str], Any]] = None,
|
||||
on_stderr: Optional[Callable[[str], Any]] = None,
|
||||
wait: bool = True,
|
||||
) -> Union[ExecutionResult, AsyncCommandHandle]:
|
||||
"""Execute a command in the sandbox asynchronously.
|
||||
|
||||
Args:
|
||||
command: Shell command to execute.
|
||||
timeout: Command timeout in seconds.
|
||||
env: Environment variables to set for the command.
|
||||
cwd: Working directory for command execution. If None, uses sandbox default.
|
||||
shell: Shell to use for command execution. Defaults to "/bin/bash".
|
||||
on_stdout: Callback invoked with each stdout chunk as it arrives.
|
||||
Blocks until the command completes and returns ExecutionResult.
|
||||
Cannot be combined with wait=False.
|
||||
on_stderr: Callback invoked with each stderr chunk as it arrives.
|
||||
Blocks until the command completes and returns ExecutionResult.
|
||||
Cannot be combined with wait=False.
|
||||
wait: If True (default), block until the command completes and
|
||||
return ExecutionResult. If False, return an
|
||||
AsyncCommandHandle immediately for streaming output,
|
||||
kill, stdin input, and reconnection. Cannot be combined
|
||||
with on_stdout/on_stderr callbacks.
|
||||
|
||||
Returns:
|
||||
ExecutionResult when wait=True (default).
|
||||
AsyncCommandHandle when wait=False.
|
||||
|
||||
Raises:
|
||||
ValueError: If wait=False is combined with callbacks.
|
||||
DataplaneNotConfiguredError: If dataplane_url is not configured.
|
||||
SandboxOperationError: If command execution fails.
|
||||
CommandTimeoutError: If command exceeds its timeout.
|
||||
SandboxConnectionError: If connection to sandbox fails after retries.
|
||||
SandboxNotReadyError: If sandbox is not ready.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
if not wait and (on_stdout or on_stderr):
|
||||
raise ValueError(
|
||||
"Cannot combine wait=False with on_stdout/on_stderr callbacks. "
|
||||
"Use wait=False and iterate the CommandHandle, or use callbacks."
|
||||
)
|
||||
|
||||
self._require_dataplane_url()
|
||||
|
||||
use_ws = not wait or on_stdout or on_stderr
|
||||
if use_ws:
|
||||
return await self._run_ws(
|
||||
command,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
cwd=cwd,
|
||||
shell=shell,
|
||||
wait=wait,
|
||||
on_stdout=on_stdout,
|
||||
on_stderr=on_stderr,
|
||||
)
|
||||
|
||||
# Catch broad exceptions so that unexpected WS failures (e.g. version
|
||||
# incompatibilities) don't break users who don't need WS features.
|
||||
try:
|
||||
return await self._run_ws(
|
||||
command,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
cwd=cwd,
|
||||
shell=shell,
|
||||
wait=True,
|
||||
on_stdout=None,
|
||||
on_stderr=None,
|
||||
)
|
||||
except (SandboxConnectionError, ImportError, OSError, TypeError):
|
||||
return await self._run_http(
|
||||
command,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
cwd=cwd,
|
||||
shell=shell,
|
||||
)
|
||||
|
||||
async def _run_ws(
|
||||
self,
|
||||
command: str,
|
||||
*,
|
||||
timeout: int,
|
||||
env: Optional[dict[str, str]],
|
||||
cwd: Optional[str],
|
||||
shell: str,
|
||||
wait: bool,
|
||||
on_stdout: Optional[Callable[[str], Any]],
|
||||
on_stderr: Optional[Callable[[str], Any]],
|
||||
) -> Union[ExecutionResult, AsyncCommandHandle]:
|
||||
"""Execute via WebSocket /execute/ws."""
|
||||
from langsmith.sandbox._ws_execute import run_ws_stream_async
|
||||
|
||||
dataplane_url = self._require_dataplane_url()
|
||||
api_key = self._client._api_key
|
||||
|
||||
msg_stream, control = await run_ws_stream_async(
|
||||
dataplane_url,
|
||||
api_key,
|
||||
command,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
cwd=cwd,
|
||||
shell=shell,
|
||||
on_stdout=on_stdout,
|
||||
on_stderr=on_stderr,
|
||||
)
|
||||
|
||||
handle = AsyncCommandHandle(msg_stream, control, self)
|
||||
await handle._ensure_started()
|
||||
|
||||
if not wait:
|
||||
return handle
|
||||
|
||||
return await handle.result
|
||||
|
||||
async def _run_http(
|
||||
self,
|
||||
command: str,
|
||||
*,
|
||||
timeout: int,
|
||||
env: Optional[dict[str, str]],
|
||||
cwd: Optional[str],
|
||||
shell: str,
|
||||
) -> ExecutionResult:
|
||||
"""Execute via HTTP POST /execute (existing implementation)."""
|
||||
dataplane_url = self._require_dataplane_url()
|
||||
url = f"{dataplane_url}/execute"
|
||||
payload: dict[str, Any] = {
|
||||
"command": command,
|
||||
"timeout": timeout,
|
||||
"shell": shell,
|
||||
}
|
||||
if env is not None:
|
||||
payload["env"] = env
|
||||
if cwd is not None:
|
||||
payload["cwd"] = cwd
|
||||
|
||||
try:
|
||||
response = await self._client._http.post(
|
||||
url, json=payload, timeout=timeout + 10
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return ExecutionResult(
|
||||
stdout=data.get("stdout", ""),
|
||||
stderr=data.get("stderr", ""),
|
||||
exit_code=data.get("exit_code", -1),
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
handle_sandbox_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
async def reconnect(
|
||||
self,
|
||||
command_id: str,
|
||||
*,
|
||||
stdout_offset: int = 0,
|
||||
stderr_offset: int = 0,
|
||||
) -> AsyncCommandHandle:
|
||||
"""Reconnect to a running or recently-finished command.
|
||||
|
||||
Resumes output from the given byte offsets. Any output produced while
|
||||
the client was disconnected is replayed from the server's ring buffer.
|
||||
|
||||
Args:
|
||||
command_id: The command ID from handle.command_id.
|
||||
stdout_offset: Byte offset to resume stdout from (default: 0).
|
||||
stderr_offset: Byte offset to resume stderr from (default: 0).
|
||||
|
||||
Returns:
|
||||
An AsyncCommandHandle for the command.
|
||||
|
||||
Raises:
|
||||
SandboxOperationError: If command_id is not found or session expired.
|
||||
SandboxConnectionError: If connection to sandbox fails after retries.
|
||||
"""
|
||||
from langsmith.sandbox._ws_execute import reconnect_ws_stream_async
|
||||
|
||||
dataplane_url = self._require_dataplane_url()
|
||||
api_key = self._client._api_key
|
||||
|
||||
msg_stream, control = await reconnect_ws_stream_async(
|
||||
dataplane_url,
|
||||
api_key,
|
||||
command_id,
|
||||
stdout_offset=stdout_offset,
|
||||
stderr_offset=stderr_offset,
|
||||
)
|
||||
|
||||
return AsyncCommandHandle(
|
||||
msg_stream,
|
||||
control,
|
||||
self,
|
||||
command_id=command_id,
|
||||
stdout_offset=stdout_offset,
|
||||
stderr_offset=stderr_offset,
|
||||
)
|
||||
|
||||
async def write(
|
||||
self,
|
||||
path: str,
|
||||
content: Union[str, bytes],
|
||||
*,
|
||||
timeout: int = 60,
|
||||
) -> None:
|
||||
"""Write content to a file in the sandbox asynchronously.
|
||||
|
||||
Args:
|
||||
path: Target file path in the sandbox.
|
||||
content: File content (str or bytes).
|
||||
timeout: Request timeout in seconds.
|
||||
|
||||
Raises:
|
||||
DataplaneNotConfiguredError: If dataplane_url is not configured.
|
||||
SandboxOperationError: If file write fails.
|
||||
SandboxConnectionError: If connection to sandbox fails after retries.
|
||||
SandboxNotReadyError: If sandbox is not ready.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
dataplane_url = self._require_dataplane_url()
|
||||
url = f"{dataplane_url}/upload"
|
||||
|
||||
# Ensure content is bytes for multipart upload
|
||||
if isinstance(content, str):
|
||||
content = content.encode("utf-8")
|
||||
|
||||
files = {"file": ("file", content)}
|
||||
|
||||
try:
|
||||
response = await self._client._http.post(
|
||||
url, params={"path": path}, files=files, timeout=timeout
|
||||
)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
handle_sandbox_http_error(e)
|
||||
|
||||
async def read(self, path: str, *, timeout: int = 60) -> bytes:
|
||||
"""Read a file from the sandbox asynchronously.
|
||||
|
||||
Args:
|
||||
path: File path to read. Supports both absolute paths (e.g., /tmp/file.txt)
|
||||
and relative paths (resolved from /home/user/).
|
||||
timeout: Request timeout in seconds.
|
||||
|
||||
Returns:
|
||||
File contents as bytes.
|
||||
|
||||
Raises:
|
||||
DataplaneNotConfiguredError: If dataplane_url is not configured.
|
||||
ResourceNotFoundError: If the file doesn't exist.
|
||||
SandboxOperationError: If file read fails.
|
||||
SandboxConnectionError: If connection to sandbox fails after retries.
|
||||
SandboxNotReadyError: If sandbox is not ready.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
dataplane_url = self._require_dataplane_url()
|
||||
url = f"{dataplane_url}/download"
|
||||
|
||||
try:
|
||||
response = await self._client._http.get(
|
||||
url, params={"path": path}, timeout=timeout
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.content
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"File '{path}' not found in sandbox '{self.name}'",
|
||||
resource_type="file",
|
||||
) from e
|
||||
handle_sandbox_http_error(e)
|
||||
# This line should never be reached but satisfies type checker
|
||||
raise # pragma: no cover
|
||||
938
venv/Lib/site-packages/langsmith/sandbox/_client.py
Normal file
938
venv/Lib/site-packages/langsmith/sandbox/_client.py
Normal file
@@ -0,0 +1,938 @@
|
||||
"""Main SandboxClient class for interacting with the sandbox server API."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from langsmith import utils as ls_utils
|
||||
from langsmith.sandbox._exceptions import (
|
||||
ResourceCreationError,
|
||||
ResourceInUseError,
|
||||
ResourceNameConflictError,
|
||||
ResourceNotFoundError,
|
||||
ResourceTimeoutError,
|
||||
SandboxAPIError,
|
||||
ValidationError,
|
||||
)
|
||||
from langsmith.sandbox._helpers import (
|
||||
handle_client_http_error,
|
||||
handle_pool_error,
|
||||
handle_sandbox_creation_error,
|
||||
handle_volume_creation_error,
|
||||
parse_error_response,
|
||||
)
|
||||
from langsmith.sandbox._models import (
|
||||
Pool,
|
||||
ResourceStatus,
|
||||
SandboxTemplate,
|
||||
Volume,
|
||||
VolumeMountSpec,
|
||||
)
|
||||
from langsmith.sandbox._sandbox import Sandbox
|
||||
from langsmith.sandbox._transport import RetryTransport
|
||||
|
||||
|
||||
def _get_default_api_endpoint() -> str:
|
||||
"""Get the default sandbox API endpoint from environment.
|
||||
|
||||
Derives the endpoint from LANGSMITH_ENDPOINT (or LANGCHAIN_ENDPOINT).
|
||||
"""
|
||||
base = ls_utils.get_env_var("ENDPOINT", default="https://api.smith.langchain.com")
|
||||
return f"{base.rstrip('/')}/v2/sandboxes"
|
||||
|
||||
|
||||
def _get_default_api_key() -> Optional[str]:
|
||||
"""Get the default API key from environment."""
|
||||
return ls_utils.get_env_var("API_KEY")
|
||||
|
||||
|
||||
class SandboxClient:
|
||||
"""Client for interacting with the Sandbox Server API.
|
||||
|
||||
This client provides a simple interface for managing sandboxes and templates.
|
||||
|
||||
Example:
|
||||
# Uses LANGSMITH_ENDPOINT and LANGSMITH_API_KEY from environment
|
||||
client = SandboxClient()
|
||||
|
||||
# Or with explicit configuration
|
||||
client = SandboxClient(
|
||||
api_endpoint="https://api.smith.langchain.com/v2/sandboxes",
|
||||
api_key="your-api-key",
|
||||
)
|
||||
|
||||
# Create a sandbox and run commands
|
||||
with client.sandbox(template_name="python-sandbox") as sandbox:
|
||||
result = sandbox.run("python --version")
|
||||
print(result.stdout)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_endpoint: Optional[str] = None,
|
||||
timeout: float = 10.0,
|
||||
api_key: Optional[str] = None,
|
||||
max_retries: int = 3,
|
||||
):
|
||||
"""Initialize the SandboxClient.
|
||||
|
||||
Args:
|
||||
api_endpoint: Full URL of the sandbox API endpoint. If not provided,
|
||||
derived from LANGSMITH_ENDPOINT environment variable.
|
||||
timeout: Default HTTP timeout in seconds.
|
||||
api_key: API key for authentication. If not provided, uses
|
||||
LANGSMITH_API_KEY environment variable.
|
||||
max_retries: Maximum number of retries for transient errors (502, 503,
|
||||
504), rate limits (429), and connection failures. Set to 0
|
||||
to disable retries. Default: 3.
|
||||
"""
|
||||
self._base_url = (api_endpoint or _get_default_api_endpoint()).rstrip("/")
|
||||
resolved_api_key = api_key or _get_default_api_key()
|
||||
self._api_key = resolved_api_key
|
||||
headers: dict[str, str] = {}
|
||||
if resolved_api_key:
|
||||
headers["X-Api-Key"] = resolved_api_key
|
||||
transport = RetryTransport(max_retries=max_retries)
|
||||
self._http = httpx.Client(transport=transport, timeout=timeout, headers=headers)
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the HTTP client."""
|
||||
self._http.close()
|
||||
|
||||
def __del__(self) -> None:
|
||||
"""Close the HTTP client on garbage collection."""
|
||||
try:
|
||||
if not self._http.is_closed:
|
||||
self._http.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def __enter__(self) -> SandboxClient:
|
||||
"""Enter context manager."""
|
||||
return self
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: Optional[type],
|
||||
exc_val: Optional[BaseException],
|
||||
exc_tb: Optional[Any],
|
||||
) -> None:
|
||||
"""Exit context manager."""
|
||||
self.close()
|
||||
|
||||
# ========================================================================
|
||||
# Volume Operations
|
||||
# ========================================================================
|
||||
|
||||
def create_volume(
|
||||
self,
|
||||
name: str,
|
||||
size: str,
|
||||
*,
|
||||
timeout: int = 60,
|
||||
) -> Volume:
|
||||
"""Create a new persistent volume.
|
||||
|
||||
Creates a persistent storage volume that can be referenced in templates.
|
||||
|
||||
Args:
|
||||
name: Volume name.
|
||||
size: Storage size (e.g., "1Gi", "10Gi").
|
||||
timeout: Timeout in seconds when waiting for ready (min: 5, max: 300).
|
||||
|
||||
Returns:
|
||||
Created Volume.
|
||||
|
||||
Raises:
|
||||
VolumeProvisioningError: If volume provisioning fails.
|
||||
ResourceTimeoutError: If volume doesn't become ready within timeout.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/volumes"
|
||||
|
||||
payload = {
|
||||
"name": name,
|
||||
"size": size,
|
||||
"wait_for_ready": True,
|
||||
"timeout": timeout,
|
||||
}
|
||||
|
||||
try:
|
||||
response = self._http.post(url, json=payload, timeout=timeout + 30)
|
||||
response.raise_for_status()
|
||||
return Volume.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
handle_volume_creation_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def get_volume(self, name: str) -> Volume:
|
||||
"""Get a volume by name.
|
||||
|
||||
Args:
|
||||
name: Volume name.
|
||||
|
||||
Returns:
|
||||
Volume.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If volume not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/volumes/{name}"
|
||||
|
||||
try:
|
||||
response = self._http.get(url)
|
||||
response.raise_for_status()
|
||||
return Volume.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Volume '{name}' not found", resource_type="volume"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def list_volumes(self) -> list[Volume]:
|
||||
"""List all volumes.
|
||||
|
||||
Returns:
|
||||
List of Volumes.
|
||||
"""
|
||||
url = f"{self._base_url}/volumes"
|
||||
|
||||
try:
|
||||
response = self._http.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return [Volume.from_dict(v) for v in data.get("volumes", [])]
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise SandboxAPIError(
|
||||
f"API endpoint not found: {url}. "
|
||||
f"Check that api_endpoint is correct."
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def delete_volume(self, name: str) -> None:
|
||||
"""Delete a volume.
|
||||
|
||||
Args:
|
||||
name: Volume name.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If volume not found.
|
||||
ResourceInUseError: If volume is referenced by templates.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/volumes/{name}"
|
||||
|
||||
try:
|
||||
response = self._http.delete(url)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Volume '{name}' not found", resource_type="volume"
|
||||
) from e
|
||||
if e.response.status_code == 409:
|
||||
data = parse_error_response(e)
|
||||
raise ResourceInUseError(data["message"], resource_type="volume") from e
|
||||
handle_client_http_error(e)
|
||||
|
||||
def update_volume(
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
new_name: Optional[str] = None,
|
||||
size: Optional[str] = None,
|
||||
) -> Volume:
|
||||
"""Update a volume's name and/or size.
|
||||
|
||||
You can update the display name, size, or both in a single request.
|
||||
Only storage size increases are allowed (storage backend limitation).
|
||||
|
||||
Args:
|
||||
name: Current volume name.
|
||||
new_name: New display name (optional).
|
||||
size: New storage size (must be >= current size). Optional.
|
||||
|
||||
Returns:
|
||||
Updated Volume.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If volume not found.
|
||||
VolumeResizeError: If storage decrease attempted.
|
||||
ResourceNameConflictError: If new_name is already in use.
|
||||
SandboxQuotaExceededError: If storage quota would be exceeded.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/volumes/{name}"
|
||||
payload: dict[str, Any] = {}
|
||||
if new_name is not None:
|
||||
payload["name"] = new_name
|
||||
if size is not None:
|
||||
payload["size"] = size
|
||||
|
||||
if not payload:
|
||||
# Nothing to update, just return the current volume
|
||||
return self.get_volume(name)
|
||||
|
||||
try:
|
||||
response = self._http.patch(url, json=payload)
|
||||
response.raise_for_status()
|
||||
return Volume.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Volume '{name}' not found", resource_type="volume"
|
||||
) from e
|
||||
if e.response.status_code == 400:
|
||||
data = parse_error_response(e)
|
||||
raise ValidationError(data["message"], error_type="VolumeResize") from e
|
||||
if e.response.status_code == 409:
|
||||
data = parse_error_response(e)
|
||||
raise ResourceNameConflictError(
|
||||
data["message"], resource_type="volume"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
# ========================================================================
|
||||
# Template Operations
|
||||
# ========================================================================
|
||||
|
||||
def create_template(
|
||||
self,
|
||||
name: str,
|
||||
image: str,
|
||||
*,
|
||||
cpu: str = "500m",
|
||||
memory: str = "512Mi",
|
||||
storage: Optional[str] = None,
|
||||
volume_mounts: Optional[list[VolumeMountSpec]] = None,
|
||||
) -> SandboxTemplate:
|
||||
"""Create a new SandboxTemplate.
|
||||
|
||||
Only the container image, resource limits, and volume mounts can be
|
||||
configured. All other container details are handled by the server.
|
||||
|
||||
Args:
|
||||
name: Template name.
|
||||
image: Container image (e.g., "python:3.12-slim").
|
||||
cpu: CPU limit (e.g., "500m", "1", "2"). Default: "500m".
|
||||
memory: Memory limit (e.g., "256Mi", "1Gi"). Default: "512Mi".
|
||||
storage: Ephemeral storage limit (e.g., "1Gi"). Optional.
|
||||
volume_mounts: List of volumes to mount in the sandbox. Optional.
|
||||
|
||||
Returns:
|
||||
Created SandboxTemplate.
|
||||
|
||||
Raises:
|
||||
SandboxClientError: If creation fails.
|
||||
"""
|
||||
url = f"{self._base_url}/templates"
|
||||
|
||||
payload: dict[str, Any] = {
|
||||
"name": name,
|
||||
"image": image,
|
||||
"resources": {
|
||||
"cpu": cpu,
|
||||
"memory": memory,
|
||||
},
|
||||
}
|
||||
if storage:
|
||||
payload["resources"]["storage"] = storage
|
||||
if volume_mounts:
|
||||
payload["volume_mounts"] = [
|
||||
{"volume_name": vm.volume_name, "mount_path": vm.mount_path}
|
||||
for vm in volume_mounts
|
||||
]
|
||||
|
||||
try:
|
||||
response = self._http.post(url, json=payload)
|
||||
response.raise_for_status()
|
||||
return SandboxTemplate.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def get_template(self, name: str) -> SandboxTemplate:
|
||||
"""Get a SandboxTemplate by name.
|
||||
|
||||
Args:
|
||||
name: Template name.
|
||||
|
||||
Returns:
|
||||
SandboxTemplate.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If template not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/templates/{name}"
|
||||
|
||||
try:
|
||||
response = self._http.get(url)
|
||||
response.raise_for_status()
|
||||
return SandboxTemplate.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Template '{name}' not found", resource_type="template"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def list_templates(self) -> list[SandboxTemplate]:
|
||||
"""List all SandboxTemplates.
|
||||
|
||||
Returns:
|
||||
List of SandboxTemplates.
|
||||
"""
|
||||
url = f"{self._base_url}/templates"
|
||||
|
||||
try:
|
||||
response = self._http.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return [SandboxTemplate.from_dict(t) for t in data.get("templates", [])]
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise SandboxAPIError(
|
||||
f"API endpoint not found: {url}. "
|
||||
f"Check that api_endpoint is correct."
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def update_template(self, name: str, *, new_name: str) -> SandboxTemplate:
|
||||
"""Update a template's display name.
|
||||
|
||||
Args:
|
||||
name: Current template name.
|
||||
new_name: New display name.
|
||||
|
||||
Returns:
|
||||
Updated SandboxTemplate.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If template not found.
|
||||
ResourceNameConflictError: If new_name is already in use.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/templates/{name}"
|
||||
payload = {"name": new_name}
|
||||
|
||||
try:
|
||||
response = self._http.patch(url, json=payload)
|
||||
response.raise_for_status()
|
||||
return SandboxTemplate.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Template '{name}' not found", resource_type="template"
|
||||
) from e
|
||||
if e.response.status_code == 409:
|
||||
data = parse_error_response(e)
|
||||
raise ResourceNameConflictError(
|
||||
data["message"], resource_type="template"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def delete_template(self, name: str) -> None:
|
||||
"""Delete a SandboxTemplate.
|
||||
|
||||
Args:
|
||||
name: Template name.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If template not found.
|
||||
ResourceInUseError: If template is referenced by sandboxes or pools.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/templates/{name}"
|
||||
|
||||
try:
|
||||
response = self._http.delete(url)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Template '{name}' not found", resource_type="template"
|
||||
) from e
|
||||
if e.response.status_code == 409:
|
||||
data = parse_error_response(e)
|
||||
raise ResourceInUseError(
|
||||
data["message"], resource_type="template"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
|
||||
# ========================================================================
|
||||
# Pool Operations
|
||||
# ========================================================================
|
||||
|
||||
def create_pool(
|
||||
self,
|
||||
name: str,
|
||||
template_name: str,
|
||||
replicas: int,
|
||||
*,
|
||||
timeout: int = 30,
|
||||
) -> Pool:
|
||||
"""Create a new Sandbox Pool.
|
||||
|
||||
Pools pre-provision sandboxes from a template for faster startup.
|
||||
|
||||
Args:
|
||||
name: Pool name (lowercase letters, numbers, hyphens; max 63 chars).
|
||||
template_name: Name of the SandboxTemplate to use (no volume mounts).
|
||||
replicas: Number of sandboxes to pre-provision (1-100).
|
||||
timeout: Timeout in seconds when waiting for ready (10-600).
|
||||
|
||||
Returns:
|
||||
Created Pool.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If template not found.
|
||||
ValidationError: If template has volumes attached.
|
||||
ResourceAlreadyExistsError: If pool with this name already exists.
|
||||
ResourceTimeoutError: If pool doesn't reach ready state within timeout.
|
||||
SandboxQuotaExceededError: If organization quota is exceeded.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/pools"
|
||||
|
||||
payload: dict[str, Any] = {
|
||||
"name": name,
|
||||
"template_name": template_name,
|
||||
"replicas": replicas,
|
||||
"wait_for_ready": True,
|
||||
"timeout": timeout,
|
||||
}
|
||||
|
||||
try:
|
||||
http_timeout = timeout + 30
|
||||
response = self._http.post(url, json=payload, timeout=http_timeout)
|
||||
response.raise_for_status()
|
||||
return Pool.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
handle_pool_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def get_pool(self, name: str) -> Pool:
|
||||
"""Get a Pool by name.
|
||||
|
||||
Args:
|
||||
name: Pool name.
|
||||
|
||||
Returns:
|
||||
Pool.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If pool not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/pools/{name}"
|
||||
|
||||
try:
|
||||
response = self._http.get(url)
|
||||
response.raise_for_status()
|
||||
return Pool.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Pool '{name}' not found", resource_type="pool"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def list_pools(self) -> list[Pool]:
|
||||
"""List all Pools.
|
||||
|
||||
Returns:
|
||||
List of Pools.
|
||||
"""
|
||||
url = f"{self._base_url}/pools"
|
||||
|
||||
try:
|
||||
response = self._http.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return [Pool.from_dict(p) for p in data.get("pools", [])]
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise SandboxAPIError(
|
||||
f"API endpoint not found: {url}. "
|
||||
f"Check that api_endpoint is correct."
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def update_pool(
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
new_name: Optional[str] = None,
|
||||
replicas: Optional[int] = None,
|
||||
) -> Pool:
|
||||
"""Update a Pool's name and/or replica count.
|
||||
|
||||
You can update the display name, replica count, or both.
|
||||
The template reference cannot be changed after creation.
|
||||
|
||||
Args:
|
||||
name: Current pool name.
|
||||
new_name: New display name (optional).
|
||||
replicas: New number of replicas (0-100). Set to 0 to pause.
|
||||
|
||||
Returns:
|
||||
Updated Pool.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If pool not found.
|
||||
ValidationError: If template was deleted.
|
||||
ResourceNameConflictError: If new_name is already in use.
|
||||
SandboxQuotaExceededError: If quota exceeded when scaling up.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/pools/{name}"
|
||||
|
||||
payload: dict[str, Any] = {}
|
||||
if new_name is not None:
|
||||
payload["name"] = new_name
|
||||
if replicas is not None:
|
||||
payload["replicas"] = replicas
|
||||
|
||||
if not payload:
|
||||
# Nothing to update, just return the current pool
|
||||
return self.get_pool(name)
|
||||
|
||||
try:
|
||||
response = self._http.patch(url, json=payload)
|
||||
response.raise_for_status()
|
||||
return Pool.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Pool '{name}' not found", resource_type="pool"
|
||||
) from e
|
||||
if e.response.status_code == 409:
|
||||
data = parse_error_response(e)
|
||||
raise ResourceNameConflictError(
|
||||
data["message"], resource_type="pool"
|
||||
) from e
|
||||
handle_pool_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def delete_pool(self, name: str) -> None:
|
||||
"""Delete a Pool.
|
||||
|
||||
This will terminate all sandboxes in the pool.
|
||||
|
||||
Args:
|
||||
name: Pool name.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If pool not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/pools/{name}"
|
||||
|
||||
try:
|
||||
response = self._http.delete(url)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Pool '{name}' not found", resource_type="pool"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
|
||||
# ========================================================================
|
||||
# Sandbox Operations
|
||||
# ========================================================================
|
||||
|
||||
def sandbox(
|
||||
self,
|
||||
template_name: str,
|
||||
*,
|
||||
name: Optional[str] = None,
|
||||
timeout: int = 30,
|
||||
) -> Sandbox:
|
||||
"""Create a sandbox and return a Sandbox instance.
|
||||
|
||||
This is the primary method for creating sandboxes. Use it as a
|
||||
context manager for automatic cleanup:
|
||||
|
||||
with client.sandbox(template_name="my-template") as sandbox:
|
||||
result = sandbox.run("echo hello")
|
||||
|
||||
The sandbox is automatically deleted when exiting the context manager.
|
||||
For sandboxes with manual lifecycle management, use create_sandbox().
|
||||
|
||||
Args:
|
||||
template_name: Name of the SandboxTemplate to use.
|
||||
name: Optional sandbox name (auto-generated if not provided).
|
||||
timeout: Timeout in seconds when waiting for ready.
|
||||
|
||||
Returns:
|
||||
Sandbox instance.
|
||||
|
||||
Raises:
|
||||
ResourceTimeoutError: If timeout waiting for sandbox to be ready.
|
||||
ResourceCreationError: If sandbox creation fails.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
sb = self.create_sandbox(
|
||||
template_name=template_name,
|
||||
name=name,
|
||||
timeout=timeout,
|
||||
)
|
||||
sb._auto_delete = True
|
||||
return sb
|
||||
|
||||
def create_sandbox(
|
||||
self,
|
||||
template_name: str,
|
||||
*,
|
||||
name: Optional[str] = None,
|
||||
timeout: int = 30,
|
||||
wait_for_ready: bool = True,
|
||||
) -> Sandbox:
|
||||
"""Create a new Sandbox.
|
||||
|
||||
The sandbox is NOT automatically deleted. Use delete_sandbox() for cleanup,
|
||||
or use sandbox() for automatic cleanup with a context manager.
|
||||
|
||||
Args:
|
||||
template_name: Name of the SandboxTemplate to use.
|
||||
name: Optional sandbox name (auto-generated if not provided).
|
||||
timeout: Timeout in seconds when waiting for ready (only used when
|
||||
wait_for_ready=True).
|
||||
wait_for_ready: If True (default), block until sandbox is ready.
|
||||
If False, return immediately with status "provisioning". Use
|
||||
get_sandbox_status() or wait_for_sandbox() to poll for readiness.
|
||||
|
||||
Returns:
|
||||
Created Sandbox. When wait_for_ready=False, the sandbox will have
|
||||
status="provisioning" and cannot be used for operations until ready.
|
||||
|
||||
Raises:
|
||||
ResourceTimeoutError: If timeout waiting for sandbox to be ready.
|
||||
ResourceCreationError: If sandbox creation fails.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/boxes"
|
||||
|
||||
payload: dict[str, Any] = {
|
||||
"template_name": template_name,
|
||||
"wait_for_ready": wait_for_ready,
|
||||
}
|
||||
if wait_for_ready:
|
||||
payload["timeout"] = timeout
|
||||
if name:
|
||||
payload["name"] = name
|
||||
|
||||
http_timeout = (timeout + 30) if wait_for_ready else 30
|
||||
|
||||
try:
|
||||
response = self._http.post(url, json=payload, timeout=http_timeout)
|
||||
response.raise_for_status()
|
||||
return Sandbox.from_dict(response.json(), client=self, auto_delete=False)
|
||||
except httpx.HTTPStatusError as e:
|
||||
handle_sandbox_creation_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def get_sandbox(self, name: str) -> Sandbox:
|
||||
"""Get a Sandbox by name.
|
||||
|
||||
The sandbox is NOT automatically deleted. Use delete_sandbox() for cleanup.
|
||||
|
||||
Args:
|
||||
name: Sandbox name.
|
||||
|
||||
Returns:
|
||||
Sandbox.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If sandbox not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/boxes/{name}"
|
||||
|
||||
try:
|
||||
response = self._http.get(url)
|
||||
response.raise_for_status()
|
||||
return Sandbox.from_dict(response.json(), client=self, auto_delete=False)
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Sandbox '{name}' not found", resource_type="sandbox"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def list_sandboxes(self) -> list[Sandbox]:
|
||||
"""List all Sandboxes.
|
||||
|
||||
Returns:
|
||||
List of Sandboxes.
|
||||
"""
|
||||
url = f"{self._base_url}/boxes"
|
||||
|
||||
try:
|
||||
response = self._http.get(url)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return [
|
||||
Sandbox.from_dict(c, client=self, auto_delete=False)
|
||||
for c in data.get("sandboxes", [])
|
||||
]
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise SandboxAPIError(
|
||||
f"API endpoint not found: {url}. "
|
||||
f"Check that api_endpoint is correct."
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def update_sandbox(self, name: str, *, new_name: str) -> Sandbox:
|
||||
"""Update a sandbox's display name.
|
||||
|
||||
Args:
|
||||
name: Current sandbox name.
|
||||
new_name: New display name.
|
||||
|
||||
Returns:
|
||||
Updated Sandbox.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If sandbox not found.
|
||||
ResourceNameConflictError: If new_name is already in use.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/boxes/{name}"
|
||||
payload = {"name": new_name}
|
||||
|
||||
try:
|
||||
response = self._http.patch(url, json=payload)
|
||||
response.raise_for_status()
|
||||
return Sandbox.from_dict(response.json(), client=self, auto_delete=False)
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Sandbox '{name}' not found", resource_type="sandbox"
|
||||
) from e
|
||||
if e.response.status_code == 409:
|
||||
raise ResourceNameConflictError(
|
||||
f"Sandbox name '{new_name}' already in use",
|
||||
resource_type="sandbox",
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def delete_sandbox(self, name: str) -> None:
|
||||
"""Delete a Sandbox.
|
||||
|
||||
Args:
|
||||
name: Sandbox name.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If sandbox not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/boxes/{name}"
|
||||
|
||||
try:
|
||||
response = self._http.delete(url)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Sandbox '{name}' not found", resource_type="sandbox"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
|
||||
def get_sandbox_status(self, name: str) -> ResourceStatus:
|
||||
"""Get the provisioning status of a sandbox.
|
||||
|
||||
This is a lightweight endpoint designed for high-frequency polling
|
||||
during sandbox provisioning. It returns only the status fields
|
||||
without full sandbox data.
|
||||
|
||||
Args:
|
||||
name: Sandbox name.
|
||||
|
||||
Returns:
|
||||
ResourceStatus with status and status_message.
|
||||
|
||||
Raises:
|
||||
ResourceNotFoundError: If sandbox not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
url = f"{self._base_url}/boxes/{name}/status"
|
||||
|
||||
try:
|
||||
response = self._http.get(url)
|
||||
response.raise_for_status()
|
||||
return ResourceStatus.from_dict(response.json())
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"Sandbox '{name}' not found", resource_type="sandbox"
|
||||
) from e
|
||||
handle_client_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def wait_for_sandbox(
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
timeout: int = 120,
|
||||
poll_interval: float = 1.0,
|
||||
) -> Sandbox:
|
||||
"""Poll until a sandbox reaches "ready" or "failed" status.
|
||||
|
||||
Uses the lightweight status endpoint for polling, then fetches the
|
||||
full sandbox data once ready.
|
||||
|
||||
Args:
|
||||
name: Sandbox name.
|
||||
timeout: Maximum time to wait in seconds.
|
||||
poll_interval: Time between status checks in seconds.
|
||||
|
||||
Returns:
|
||||
Sandbox in "ready" status.
|
||||
|
||||
Raises:
|
||||
ResourceCreationError: If sandbox status becomes "failed".
|
||||
ResourceTimeoutError: If timeout expires while still "provisioning".
|
||||
ResourceNotFoundError: If sandbox not found.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
import time
|
||||
|
||||
deadline = time.monotonic() + timeout
|
||||
while True:
|
||||
status = self.get_sandbox_status(name)
|
||||
if status.status == "ready":
|
||||
return self.get_sandbox(name)
|
||||
if status.status == "failed":
|
||||
raise ResourceCreationError(
|
||||
status.status_message or "Sandbox provisioning failed",
|
||||
resource_type="sandbox",
|
||||
)
|
||||
remaining = deadline - time.monotonic()
|
||||
if remaining <= 0:
|
||||
raise ResourceTimeoutError(
|
||||
f"Sandbox '{name}' not ready after {timeout}s",
|
||||
resource_type="sandbox",
|
||||
last_status=status.status,
|
||||
)
|
||||
time.sleep(min(poll_interval, remaining))
|
||||
286
venv/Lib/site-packages/langsmith/sandbox/_exceptions.py
Normal file
286
venv/Lib/site-packages/langsmith/sandbox/_exceptions.py
Normal file
@@ -0,0 +1,286 @@
|
||||
"""Custom exceptions for the sandbox client.
|
||||
|
||||
All sandbox exceptions extend LangSmithError for unified error handling.
|
||||
The exceptions are organized by error type rather than resource type,
|
||||
with a resource_type attribute for specific handling when needed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langsmith.utils import LangSmithError
|
||||
|
||||
|
||||
class SandboxClientError(LangSmithError):
|
||||
"""Base exception for sandbox client errors."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Connection and Authentication Errors
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class SandboxAPIError(SandboxClientError):
|
||||
"""Raised when the API endpoint returns an unexpected error.
|
||||
|
||||
For example, this is raised for wrong URL or path.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SandboxAuthenticationError(SandboxClientError):
|
||||
"""Raised when authentication fails (invalid or missing API key)."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SandboxConnectionError(SandboxClientError):
|
||||
"""Raised when connection to the sandbox server fails."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SandboxServerReloadError(SandboxConnectionError):
|
||||
"""Raised when the server sends a 1001 Going Away close frame.
|
||||
|
||||
This indicates a server hot-reload, not a true connection failure.
|
||||
The command is still running on the server.
|
||||
|
||||
This is a subclass of SandboxConnectionError, so the auto-reconnect
|
||||
logic in CommandHandle catches it along with all other
|
||||
connection errors. The distinction matters for retry strategy:
|
||||
SandboxServerReloadError triggers immediate reconnect (no backoff),
|
||||
while other SandboxConnectionError triggers exponential backoff.
|
||||
|
||||
Users typically never see this exception — it's handled internally.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Resource Errors (type-based, with resource_type attribute)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class ResourceNotFoundError(SandboxClientError):
|
||||
"""Raised when a resource is not found.
|
||||
|
||||
Attributes:
|
||||
resource_type: Type of resource (sandbox, template, volume, pool, file).
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, resource_type: Optional[str] = None):
|
||||
"""Initialize the error."""
|
||||
super().__init__(message)
|
||||
self.resource_type = resource_type
|
||||
|
||||
|
||||
class ResourceTimeoutError(SandboxClientError):
|
||||
"""Raised when an operation times out.
|
||||
|
||||
Attributes:
|
||||
resource_type: Type of resource (sandbox, volume, pool).
|
||||
last_status: The last known status before timeout (for sandboxes).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
resource_type: Optional[str] = None,
|
||||
last_status: Optional[str] = None,
|
||||
):
|
||||
"""Initialize the error."""
|
||||
super().__init__(message)
|
||||
self.resource_type = resource_type
|
||||
self.last_status = last_status
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Return string representation."""
|
||||
base = super().__str__()
|
||||
if self.last_status:
|
||||
return f"{base} (last_status: {self.last_status})"
|
||||
return base
|
||||
|
||||
|
||||
class ResourceInUseError(SandboxClientError):
|
||||
"""Raised when deleting a resource that is still in use.
|
||||
|
||||
Attributes:
|
||||
resource_type: Type of resource (template, volume).
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, resource_type: Optional[str] = None):
|
||||
"""Initialize the error."""
|
||||
super().__init__(message)
|
||||
self.resource_type = resource_type
|
||||
|
||||
|
||||
class ResourceAlreadyExistsError(SandboxClientError):
|
||||
"""Raised when creating a resource that already exists.
|
||||
|
||||
Attributes:
|
||||
resource_type: Type of resource (e.g., pool).
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, resource_type: Optional[str] = None):
|
||||
"""Initialize the error."""
|
||||
super().__init__(message)
|
||||
self.resource_type = resource_type
|
||||
|
||||
|
||||
class ResourceNameConflictError(SandboxClientError):
|
||||
"""Raised when updating a resource name to one that already exists.
|
||||
|
||||
Attributes:
|
||||
resource_type: Type of resource (volume, template, pool, sandbox).
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, resource_type: Optional[str] = None):
|
||||
"""Initialize the error."""
|
||||
super().__init__(message)
|
||||
self.resource_type = resource_type
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Validation and Quota Errors
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class ValidationError(SandboxClientError):
|
||||
"""Raised when request validation fails.
|
||||
|
||||
This includes:
|
||||
- Resource values exceeding server-defined limits (CPU, memory, storage)
|
||||
- Invalid resource units
|
||||
- Invalid name formats
|
||||
- Pool validation failures (e.g., template has volumes)
|
||||
|
||||
Attributes:
|
||||
field: The field that failed validation (e.g., "cpu", "memory").
|
||||
details: List of validation error details from the API.
|
||||
error_type: Machine-readable error type from the API.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
field: Optional[str] = None,
|
||||
details: Optional[list[dict]] = None,
|
||||
error_type: Optional[str] = None,
|
||||
):
|
||||
"""Initialize the error."""
|
||||
super().__init__(message)
|
||||
self.field = field
|
||||
self.details = details or []
|
||||
self.error_type = error_type
|
||||
|
||||
|
||||
class QuotaExceededError(SandboxClientError):
|
||||
"""Raised when organization quota limits are exceeded.
|
||||
|
||||
Users should contact support@langchain.dev to increase quotas.
|
||||
|
||||
Attributes:
|
||||
quota_type: Type of quota exceeded (e.g., "sandbox_count", "cpu").
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, quota_type: Optional[str] = None):
|
||||
"""Initialize the error."""
|
||||
super().__init__(message)
|
||||
self.quota_type = quota_type
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Resource Creation Errors
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class ResourceCreationError(SandboxClientError):
|
||||
"""Raised when resource provisioning fails.
|
||||
|
||||
Attributes:
|
||||
resource_type: Type of resource (sandbox, volume, pool).
|
||||
error_type: Machine-readable error type (ImagePull, CrashLoop,
|
||||
SandboxConfig, Unschedulable, VolumeProvisioning).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
resource_type: Optional[str] = None,
|
||||
error_type: Optional[str] = None,
|
||||
):
|
||||
"""Initialize the error."""
|
||||
super().__init__(message)
|
||||
self.resource_type = resource_type
|
||||
self.error_type = error_type
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Return string representation."""
|
||||
if self.error_type:
|
||||
return f"{super().__str__()} [{self.error_type}]"
|
||||
return super().__str__()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Sandbox Operation Errors (runtime errors during sandbox interaction)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class DataplaneNotConfiguredError(SandboxClientError):
|
||||
"""Raised when dataplane_url is not available for the sandbox.
|
||||
|
||||
This occurs when the sandbox-router URL is not configured for the cluster.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SandboxNotReadyError(SandboxClientError):
|
||||
"""Raised when attempting to interact with a sandbox that is not ready."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SandboxOperationError(SandboxClientError):
|
||||
"""Raised when a sandbox operation fails (run, read, write).
|
||||
|
||||
Attributes:
|
||||
operation: The operation that failed (command, read, write).
|
||||
error_type: Machine-readable error type from the API.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
operation: Optional[str] = None,
|
||||
error_type: Optional[str] = None,
|
||||
):
|
||||
"""Initialize the error."""
|
||||
super().__init__(message)
|
||||
self.operation = operation
|
||||
self.error_type = error_type
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Return string representation."""
|
||||
if self.error_type:
|
||||
return f"{super().__str__()} [{self.error_type}]"
|
||||
return super().__str__()
|
||||
|
||||
|
||||
class CommandTimeoutError(SandboxOperationError):
|
||||
"""Raised when a command exceeds its timeout.
|
||||
|
||||
Attributes:
|
||||
timeout: The timeout value in seconds that was exceeded.
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, timeout: Optional[int] = None):
|
||||
"""Initialize the error."""
|
||||
super().__init__(message, operation="command", error_type="CommandTimeout")
|
||||
self.timeout = timeout
|
||||
368
venv/Lib/site-packages/langsmith/sandbox/_helpers.py
Normal file
368
venv/Lib/site-packages/langsmith/sandbox/_helpers.py
Normal file
@@ -0,0 +1,368 @@
|
||||
"""Shared helper functions for error handling.
|
||||
|
||||
These functions are used by both sync and async clients to parse error responses
|
||||
and raise appropriate exceptions. They contain no I/O operations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from langsmith.sandbox._exceptions import (
|
||||
QuotaExceededError,
|
||||
ResourceAlreadyExistsError,
|
||||
ResourceCreationError,
|
||||
ResourceNotFoundError,
|
||||
ResourceTimeoutError,
|
||||
SandboxAPIError,
|
||||
SandboxAuthenticationError,
|
||||
SandboxClientError,
|
||||
SandboxConnectionError,
|
||||
SandboxNotReadyError,
|
||||
SandboxOperationError,
|
||||
ValidationError,
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# Error Response Parsing
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def parse_error_response(error: httpx.HTTPStatusError) -> dict[str, Any]:
|
||||
"""Parse standardized error response.
|
||||
|
||||
Expected format: {"detail": {"error": "...", "message": "..."}}
|
||||
|
||||
Returns a dict with:
|
||||
- error_type: The error type (e.g., "ImagePull", "CrashLoop")
|
||||
- message: Human-readable error message
|
||||
"""
|
||||
try:
|
||||
data = error.response.json()
|
||||
detail = data.get("detail")
|
||||
|
||||
# Standardized format: {"detail": {"error": "...", "message": "..."}}
|
||||
if isinstance(detail, dict):
|
||||
return {
|
||||
"error_type": detail.get("error"),
|
||||
"message": detail.get("message", str(error)),
|
||||
}
|
||||
|
||||
# Pydantic validation error format: {"detail": [{"loc": [...], "msg": "..."}]}
|
||||
if isinstance(detail, list) and detail:
|
||||
messages = [d.get("msg", str(d)) for d in detail if isinstance(d, dict)]
|
||||
return {
|
||||
"error_type": None,
|
||||
"message": "; ".join(messages) if messages else str(error),
|
||||
}
|
||||
|
||||
# Fallback for plain string detail
|
||||
return {"error_type": None, "message": detail or str(error)}
|
||||
except Exception:
|
||||
return {"error_type": None, "message": str(error)}
|
||||
|
||||
|
||||
def parse_error_response_simple(error: httpx.HTTPStatusError) -> dict[str, Any]:
|
||||
"""Parse error response (simplified version for sandbox operations).
|
||||
|
||||
Returns a dict with:
|
||||
- error_type: The error type
|
||||
- message: Human-readable error message
|
||||
"""
|
||||
try:
|
||||
data = error.response.json()
|
||||
detail = data.get("detail")
|
||||
|
||||
if isinstance(detail, dict):
|
||||
return {
|
||||
"error_type": detail.get("error"),
|
||||
"message": detail.get("message", str(error)),
|
||||
}
|
||||
|
||||
return {"error_type": None, "message": detail or str(error)}
|
||||
except Exception:
|
||||
return {"error_type": None, "message": str(error)}
|
||||
|
||||
|
||||
def parse_validation_error(error: httpx.HTTPStatusError) -> list[dict]:
|
||||
"""Parse Pydantic validation error response.
|
||||
|
||||
Returns a list of validation error details, each containing:
|
||||
- loc: Location of the error (e.g., ["body", "resources", "cpu"])
|
||||
- msg: Human-readable error message
|
||||
- type: Error type (e.g., "value_error")
|
||||
"""
|
||||
try:
|
||||
data = error.response.json()
|
||||
detail = data.get("detail", [])
|
||||
if isinstance(detail, list):
|
||||
return detail
|
||||
return []
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def extract_quota_type(message: str) -> Optional[str]:
|
||||
"""Extract quota type from error message.
|
||||
|
||||
Returns one of: "sandbox_count", "cpu", "memory", "volume_count",
|
||||
"storage", or None.
|
||||
"""
|
||||
message_lower = message.lower()
|
||||
# Check for sandbox count quota
|
||||
if "sandbox" in message_lower and (
|
||||
"count" in message_lower or "limit" in message_lower
|
||||
):
|
||||
return "sandbox_count"
|
||||
elif "cpu" in message_lower:
|
||||
return "cpu"
|
||||
elif "memory" in message_lower:
|
||||
return "memory"
|
||||
# Check for volume count quota
|
||||
elif "volume" in message_lower and (
|
||||
"count" in message_lower or "limit" in message_lower
|
||||
):
|
||||
return "volume_count"
|
||||
elif "storage" in message_lower:
|
||||
return "storage"
|
||||
return None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Client Error Handlers
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def raise_creation_error(
|
||||
data: dict[str, Any],
|
||||
error: httpx.HTTPStatusError,
|
||||
resource_type: str = "sandbox",
|
||||
) -> None:
|
||||
"""Raise ResourceCreationError with the error_type from the API response.
|
||||
|
||||
The error_type indicates the specific failure reason:
|
||||
- ImagePull: Image pull failed
|
||||
- CrashLoop: Container crashed during startup
|
||||
- SandboxConfig: Configuration error
|
||||
- Unschedulable: Cannot be scheduled
|
||||
"""
|
||||
raise ResourceCreationError(
|
||||
data.get("message", f"{resource_type.title()} creation failed"),
|
||||
resource_type=resource_type,
|
||||
error_type=data.get("error_type"),
|
||||
) from error
|
||||
|
||||
|
||||
def handle_sandbox_creation_error(error: httpx.HTTPStatusError) -> None:
|
||||
"""Handle HTTP errors specific to sandbox creation.
|
||||
|
||||
Maps API error responses to specific exception types:
|
||||
- 408: ResourceTimeoutError (sandbox didn't become ready in time)
|
||||
- 422: ValidationError (bad input) or ResourceCreationError (runtime)
|
||||
- 429: QuotaExceededError (org limits exceeded)
|
||||
- 503: ResourceCreationError (no resources available)
|
||||
- Other: Falls through to generic error handling
|
||||
"""
|
||||
status = error.response.status_code
|
||||
data = parse_error_response(error)
|
||||
|
||||
if status == 408:
|
||||
# Timeout - include the message which contains last known status
|
||||
raise ResourceTimeoutError(data["message"], resource_type="sandbox") from error
|
||||
elif status == 422:
|
||||
# Check if this is a Pydantic validation error (bad input) vs creation error
|
||||
details = parse_validation_error(error)
|
||||
if details and any(d.get("type") == "value_error" for d in details):
|
||||
# Pydantic validation error (bad input - exceeds server limits)
|
||||
field = details[0].get("loc", [None])[-1] if details else None
|
||||
raise ValidationError(
|
||||
message=data["message"],
|
||||
field=field,
|
||||
details=details,
|
||||
) from error
|
||||
else:
|
||||
# Sandbox creation failed (runtime error like image pull failure)
|
||||
raise_creation_error(data, error)
|
||||
elif status == 429:
|
||||
# Organization quota exceeded
|
||||
quota_type = extract_quota_type(data["message"])
|
||||
raise QuotaExceededError(
|
||||
message=data["message"],
|
||||
quota_type=quota_type,
|
||||
) from error
|
||||
elif status == 503:
|
||||
# Service Unavailable - scheduling failed
|
||||
raise ResourceCreationError(
|
||||
data["message"],
|
||||
resource_type="sandbox",
|
||||
error_type=data.get("error_type") or "Unschedulable",
|
||||
) from error
|
||||
else:
|
||||
# Fall through to generic handling
|
||||
handle_client_http_error(error)
|
||||
|
||||
|
||||
def handle_volume_creation_error(error: httpx.HTTPStatusError) -> None:
|
||||
"""Handle HTTP errors specific to volume creation.
|
||||
|
||||
Maps API error responses to specific exception types:
|
||||
- 503: ResourceCreationError (provisioning failed)
|
||||
- 504: ResourceTimeoutError (volume didn't become ready in time)
|
||||
- Other: Falls through to generic error handling
|
||||
"""
|
||||
status = error.response.status_code
|
||||
data = parse_error_response(error)
|
||||
|
||||
if status == 503:
|
||||
# Provisioning failed (invalid storage class, quota exceeded)
|
||||
raise ResourceCreationError(
|
||||
data["message"],
|
||||
resource_type="volume",
|
||||
error_type="VolumeProvisioning",
|
||||
) from error
|
||||
elif status == 504:
|
||||
# Timeout - volume didn't become ready in time
|
||||
raise ResourceTimeoutError(data["message"], resource_type="volume") from error
|
||||
else:
|
||||
# Fall through to generic handling
|
||||
handle_client_http_error(error)
|
||||
|
||||
|
||||
def handle_pool_error(error: httpx.HTTPStatusError) -> None:
|
||||
"""Handle HTTP errors specific to pool creation/update.
|
||||
|
||||
Maps API error responses to specific exception types:
|
||||
- 400: ResourceNotFoundError or ValidationError (template has volumes)
|
||||
- 409: ResourceAlreadyExistsError
|
||||
- 429: QuotaExceededError (org limits exceeded)
|
||||
- 504: ResourceTimeoutError (timeout waiting for ready replicas)
|
||||
- Other: Falls through to generic error handling
|
||||
"""
|
||||
status = error.response.status_code
|
||||
data = parse_error_response(error)
|
||||
error_type = data.get("error_type")
|
||||
|
||||
if status == 400:
|
||||
# Check the error type to determine the specific exception
|
||||
if error_type == "TemplateNotFound":
|
||||
raise ResourceNotFoundError(
|
||||
data["message"], resource_type="template"
|
||||
) from error
|
||||
elif error_type == "ValidationError":
|
||||
# Template has volumes attached
|
||||
raise ValidationError(data["message"], error_type=error_type) from error
|
||||
else:
|
||||
# Generic bad request
|
||||
handle_client_http_error(error)
|
||||
elif status == 409:
|
||||
# Pool already exists
|
||||
raise ResourceAlreadyExistsError(
|
||||
data["message"], resource_type="pool"
|
||||
) from error
|
||||
elif status == 429:
|
||||
# Organization quota exceeded
|
||||
quota_type = extract_quota_type(data["message"])
|
||||
raise QuotaExceededError(
|
||||
message=data["message"],
|
||||
quota_type=quota_type,
|
||||
) from error
|
||||
elif status == 504:
|
||||
# Timeout waiting for pool to be ready
|
||||
raise ResourceTimeoutError(data["message"], resource_type="pool") from error
|
||||
else:
|
||||
# Fall through to generic handling
|
||||
handle_client_http_error(error)
|
||||
|
||||
|
||||
def handle_client_http_error(error: httpx.HTTPStatusError) -> None:
|
||||
"""Handle HTTP errors and raise appropriate exceptions (for client operations)."""
|
||||
data = parse_error_response(error)
|
||||
message = data["message"]
|
||||
error_type = data.get("error_type")
|
||||
status = error.response.status_code
|
||||
|
||||
if status in (401, 403):
|
||||
raise SandboxAuthenticationError(message) from error
|
||||
if status == 404:
|
||||
raise ResourceNotFoundError(message) from error
|
||||
|
||||
# Handle validation errors (invalid resource values, formats, etc.)
|
||||
if status == 422:
|
||||
details = parse_validation_error(error)
|
||||
field = details[0].get("loc", [None])[-1] if details else None
|
||||
raise ValidationError(
|
||||
message=message,
|
||||
field=field,
|
||||
details=details,
|
||||
) from error
|
||||
|
||||
# Handle quota exceeded errors (org limits)
|
||||
if status == 429:
|
||||
quota_type = extract_quota_type(message)
|
||||
raise QuotaExceededError(
|
||||
message=message,
|
||||
quota_type=quota_type,
|
||||
) from error
|
||||
|
||||
if status == 502 and error_type == "ConnectionError":
|
||||
raise SandboxConnectionError(message) from error
|
||||
if status == 500:
|
||||
raise SandboxAPIError(message) from error
|
||||
raise SandboxClientError(message) from error
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Sandbox Operation Error Handlers
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def handle_sandbox_http_error(error: httpx.HTTPStatusError) -> None:
|
||||
"""Handle HTTP errors for sandbox operations (run, read, write).
|
||||
|
||||
Maps API error types to specific exceptions:
|
||||
- WriteError -> SandboxOperationError (operation="write")
|
||||
- ReadError -> SandboxOperationError (operation="read")
|
||||
- CommandError -> SandboxOperationError (operation="command")
|
||||
- ConnectionError (502) -> SandboxConnectionError
|
||||
- FileNotFound / 404 -> ResourceNotFoundError (resource_type="file")
|
||||
- NotReady (400) -> SandboxNotReadyError
|
||||
- 403 -> SandboxOperationError (permission denied)
|
||||
"""
|
||||
data = parse_error_response_simple(error)
|
||||
message = data["message"]
|
||||
error_type = data.get("error_type")
|
||||
status = error.response.status_code
|
||||
|
||||
# Operation-specific errors (from sandbox runtime)
|
||||
if error_type == "WriteError":
|
||||
raise SandboxOperationError(
|
||||
message, operation="write", error_type=error_type
|
||||
) from error
|
||||
if error_type == "ReadError":
|
||||
raise SandboxOperationError(
|
||||
message, operation="read", error_type=error_type
|
||||
) from error
|
||||
if error_type == "CommandError":
|
||||
raise SandboxOperationError(
|
||||
message, operation="command", error_type=error_type
|
||||
) from error
|
||||
|
||||
# Permission denied
|
||||
if status == 403:
|
||||
raise SandboxOperationError(
|
||||
message, operation=None, error_type="PermissionDenied"
|
||||
) from error
|
||||
|
||||
# Connection to sandbox failed
|
||||
if status == 502 and error_type == "ConnectionError":
|
||||
raise SandboxConnectionError(message) from error
|
||||
|
||||
# Not ready / not found
|
||||
if status == 400 and error_type == "NotReady":
|
||||
raise SandboxNotReadyError(message) from error
|
||||
if status == 404 or error_type == "FileNotFound":
|
||||
raise ResourceNotFoundError(message, resource_type="file") from error
|
||||
|
||||
raise SandboxClientError(message) from error
|
||||
680
venv/Lib/site-packages/langsmith/sandbox/_models.py
Normal file
680
venv/Lib/site-packages/langsmith/sandbox/_models.py
Normal file
@@ -0,0 +1,680 @@
|
||||
"""Data models for the sandbox client."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import AsyncIterator, Iterator
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
from langsmith.sandbox._exceptions import (
|
||||
SandboxConnectionError,
|
||||
SandboxOperationError,
|
||||
SandboxServerReloadError,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langsmith.sandbox._async_sandbox import AsyncSandbox
|
||||
from langsmith.sandbox._sandbox import Sandbox
|
||||
from langsmith.sandbox._ws_execute import (
|
||||
_AsyncWSStreamControl,
|
||||
_WSStreamControl,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExecutionResult:
|
||||
"""Result of executing a command in a sandbox."""
|
||||
|
||||
stdout: str
|
||||
stderr: str
|
||||
exit_code: int
|
||||
|
||||
@property
|
||||
def success(self) -> bool:
|
||||
"""Return True if the command exited with code 0."""
|
||||
return self.exit_code == 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResourceSpec:
|
||||
"""Resource specification for a sandbox."""
|
||||
|
||||
cpu: str = "500m"
|
||||
memory: str = "512Mi"
|
||||
storage: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Volume:
|
||||
"""Represents a persistent volume.
|
||||
|
||||
Volumes are persistent storage that can be mounted in sandboxes.
|
||||
|
||||
Attributes:
|
||||
id: Unique identifier (UUID). Remains constant even if name changes.
|
||||
May be None for resources created before ID support was added.
|
||||
name: Display name (can be updated).
|
||||
"""
|
||||
|
||||
name: str
|
||||
size: str
|
||||
storage_class: str
|
||||
id: Optional[str] = None
|
||||
created_at: Optional[str] = None
|
||||
updated_at: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> Volume:
|
||||
"""Create a Volume from API response dict."""
|
||||
return cls(
|
||||
name=data.get("name", ""),
|
||||
size=data.get("size", "unknown"),
|
||||
storage_class=data.get("storage_class", "default"),
|
||||
id=data.get("id"),
|
||||
created_at=data.get("created_at"),
|
||||
updated_at=data.get("updated_at"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VolumeMountSpec:
|
||||
"""Specification for mounting a volume in a sandbox template."""
|
||||
|
||||
volume_name: str
|
||||
mount_path: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class SandboxTemplate:
|
||||
"""Represents a SandboxTemplate.
|
||||
|
||||
Templates define the image, resource limits, and volume mounts for sandboxes.
|
||||
All other container details are handled by the server with secure defaults.
|
||||
|
||||
Attributes:
|
||||
id: Unique identifier (UUID). Remains constant even if name changes.
|
||||
May be None for resources created before ID support was added.
|
||||
name: Display name (can be updated).
|
||||
"""
|
||||
|
||||
name: str
|
||||
image: str
|
||||
resources: ResourceSpec
|
||||
volume_mounts: list[VolumeMountSpec] = field(default_factory=list)
|
||||
id: Optional[str] = None
|
||||
created_at: Optional[str] = None
|
||||
updated_at: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> SandboxTemplate:
|
||||
"""Create a SandboxTemplate from API response dict."""
|
||||
resources_data = data.get("resources", {})
|
||||
volume_mounts_data = data.get("volume_mounts", [])
|
||||
return cls(
|
||||
name=data.get("name", ""),
|
||||
image=data.get("image", "unknown"),
|
||||
resources=ResourceSpec(
|
||||
cpu=resources_data.get("cpu", "500m"),
|
||||
memory=resources_data.get("memory", "512Mi"),
|
||||
storage=resources_data.get("storage"),
|
||||
),
|
||||
volume_mounts=[
|
||||
VolumeMountSpec(
|
||||
volume_name=vm.get("volume_name", ""),
|
||||
mount_path=vm.get("mount_path", ""),
|
||||
)
|
||||
for vm in volume_mounts_data
|
||||
],
|
||||
id=data.get("id"),
|
||||
created_at=data.get("created_at"),
|
||||
updated_at=data.get("updated_at"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResourceStatus:
|
||||
"""Lightweight provisioning status for any async-created resource.
|
||||
|
||||
Attributes:
|
||||
status: Resource lifecycle status. One of "provisioning", "ready", "failed".
|
||||
status_message: Human-readable details when status is "failed", None otherwise.
|
||||
"""
|
||||
|
||||
status: str
|
||||
status_message: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> ResourceStatus:
|
||||
"""Create a ResourceStatus from API response dict."""
|
||||
return cls(
|
||||
status=data.get("status", "provisioning"),
|
||||
status_message=data.get("status_message"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Pool:
|
||||
"""Represents a Sandbox Pool for pre-provisioned sandboxes.
|
||||
|
||||
Pools pre-provision sandboxes from a template for faster startup.
|
||||
Instead of waiting for a new sandbox to be created, sandboxes can
|
||||
be served from a pre-warmed pool.
|
||||
|
||||
Note: Templates with volume mounts cannot be used in pools.
|
||||
|
||||
Attributes:
|
||||
id: Unique identifier (UUID). Remains constant even if name changes.
|
||||
May be None for resources created before ID support was added.
|
||||
name: Display name (can be updated).
|
||||
"""
|
||||
|
||||
name: str
|
||||
template_name: str
|
||||
replicas: int # Desired replicas
|
||||
id: Optional[str] = None
|
||||
created_at: Optional[str] = None
|
||||
updated_at: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict[str, Any]) -> Pool:
|
||||
"""Create a Pool from API response dict."""
|
||||
return cls(
|
||||
name=data.get("name", ""),
|
||||
template_name=data.get("template_name", ""),
|
||||
replicas=data.get("replicas", 0),
|
||||
id=data.get("id"),
|
||||
created_at=data.get("created_at"),
|
||||
updated_at=data.get("updated_at"),
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# WebSocket Command Execution Models
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class OutputChunk:
|
||||
"""A single chunk of streaming output from command execution.
|
||||
|
||||
Attributes:
|
||||
stream: Either "stdout" or "stderr".
|
||||
data: The text content of this chunk (valid UTF-8, server handles
|
||||
boundary splitting).
|
||||
offset: Byte offset within the stream. Used internally for
|
||||
reconnection; users typically don't need this.
|
||||
"""
|
||||
|
||||
stream: str
|
||||
data: str
|
||||
offset: int
|
||||
|
||||
|
||||
class CommandHandle:
|
||||
"""Handle to a running command with streaming output and auto-reconnect.
|
||||
|
||||
Iterable, yielding OutputChunk objects (stdout and stderr interleaved
|
||||
in arrival order). Access .result after iteration to get the full
|
||||
ExecutionResult.
|
||||
|
||||
Auto-reconnect behavior:
|
||||
- Server hot-reload (1001 Going Away): reconnect immediately
|
||||
- Network error / unexpected close: reconnect with exponential backoff
|
||||
- User called kill(): do NOT reconnect (propagate error)
|
||||
|
||||
The auto-reconnect is transparent -- the iterator reconnects and
|
||||
continues yielding chunks without any user intervention. If all
|
||||
reconnect attempts are exhausted, SandboxConnectionError is raised.
|
||||
|
||||
Construction modes (controlled by ``command_id``):
|
||||
- **New execution** (``command_id=""``, the default): the constructor
|
||||
eagerly reads the server's ``"started"`` message to populate
|
||||
``command_id`` and ``pid`` before returning.
|
||||
- **Reconnection** (``command_id`` set): skips the started-message
|
||||
read, since reconnect streams don't emit one.
|
||||
|
||||
Example:
|
||||
handle = sandbox.run("make build", timeout=600, wait=False)
|
||||
|
||||
for chunk in handle: # auto-reconnects on transient errors
|
||||
print(chunk.data, end="")
|
||||
|
||||
result = handle.result
|
||||
print(f"Exit code: {result.exit_code}")
|
||||
"""
|
||||
|
||||
MAX_AUTO_RECONNECTS = 5
|
||||
_BACKOFF_BASE = 0.5 # seconds
|
||||
_BACKOFF_MAX = 8.0 # seconds
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message_stream: Iterator[dict],
|
||||
control: Optional[_WSStreamControl],
|
||||
sandbox: Sandbox,
|
||||
*,
|
||||
command_id: str = "",
|
||||
stdout_offset: int = 0,
|
||||
stderr_offset: int = 0,
|
||||
) -> None:
|
||||
self._stream = message_stream
|
||||
self._control = control
|
||||
self._sandbox = sandbox
|
||||
self._command_id: Optional[str] = None
|
||||
self._pid: Optional[int] = None
|
||||
self._result: Optional[ExecutionResult] = None
|
||||
self._stdout_parts: list[str] = []
|
||||
self._stderr_parts: list[str] = []
|
||||
self._exhausted = False
|
||||
self._last_stdout_offset = stdout_offset
|
||||
self._last_stderr_offset = stderr_offset
|
||||
|
||||
# New executions (command_id=""): eager_start reads "started" message.
|
||||
# Reconnections (command_id set): skip eager_start since reconnect
|
||||
# streams don't send a "started" message.
|
||||
if command_id:
|
||||
self._command_id = command_id
|
||||
else:
|
||||
self._consume_started()
|
||||
|
||||
def _consume_started(self) -> None:
|
||||
"""Eagerly read the 'started' message to populate command_id and pid.
|
||||
|
||||
Blocks briefly until the server sends the started message (arrives
|
||||
near-instantly after connection). After this call, command_id and
|
||||
pid are available, and the WebSocket is bound to the control object
|
||||
(so kill() works).
|
||||
"""
|
||||
try:
|
||||
first_msg = next(self._stream)
|
||||
except StopIteration:
|
||||
raise SandboxOperationError(
|
||||
"Command stream ended before 'started' message",
|
||||
operation="command",
|
||||
)
|
||||
if first_msg.get("type") != "started":
|
||||
raise SandboxOperationError(
|
||||
f"Expected 'started' message, got '{first_msg.get('type')}'",
|
||||
operation="command",
|
||||
)
|
||||
self._command_id = first_msg.get("command_id")
|
||||
self._pid = first_msg.get("pid")
|
||||
|
||||
@property
|
||||
def command_id(self) -> Optional[str]:
|
||||
"""The server-assigned command ID. Available after construction."""
|
||||
return self._command_id
|
||||
|
||||
@property
|
||||
def pid(self) -> Optional[int]:
|
||||
"""The process ID on the sandbox. Available after construction."""
|
||||
return self._pid
|
||||
|
||||
@property
|
||||
def result(self) -> ExecutionResult:
|
||||
"""The final execution result. Blocks until the command completes.
|
||||
|
||||
Drains the remaining stream if not already exhausted, then returns
|
||||
the ExecutionResult with aggregated stdout, stderr, and exit_code.
|
||||
"""
|
||||
if self._result is None:
|
||||
for _ in self:
|
||||
pass
|
||||
if self._result is None:
|
||||
raise SandboxOperationError(
|
||||
"Command stream ended without exit message",
|
||||
operation="command",
|
||||
)
|
||||
return self._result
|
||||
|
||||
def _iter_stream(self) -> Iterator[OutputChunk]:
|
||||
"""Iterate over output chunks from the current stream (no reconnect)."""
|
||||
if self._exhausted:
|
||||
return
|
||||
for msg in self._stream:
|
||||
msg_type = msg.get("type")
|
||||
if msg_type in ("stdout", "stderr"):
|
||||
chunk = OutputChunk(
|
||||
stream=msg_type,
|
||||
data=msg["data"],
|
||||
offset=msg.get("offset", 0),
|
||||
)
|
||||
if msg_type == "stdout":
|
||||
self._stdout_parts.append(msg["data"])
|
||||
else:
|
||||
self._stderr_parts.append(msg["data"])
|
||||
yield chunk
|
||||
elif msg_type == "exit":
|
||||
self._result = ExecutionResult(
|
||||
stdout="".join(self._stdout_parts),
|
||||
stderr="".join(self._stderr_parts),
|
||||
exit_code=msg["exit_code"],
|
||||
)
|
||||
self._exhausted = True
|
||||
return
|
||||
self._exhausted = True
|
||||
|
||||
def __iter__(self) -> Iterator[OutputChunk]:
|
||||
"""Iterate over output chunks, auto-reconnecting on transient errors.
|
||||
|
||||
Reconnect strategy:
|
||||
- 1001 Going Away (hot-reload): immediate reconnect, no delay
|
||||
- Other SandboxConnectionError: exponential backoff (0.5s, 1s, 2s...)
|
||||
- After kill(): no reconnect, error propagates
|
||||
"""
|
||||
import time
|
||||
|
||||
reconnect_attempts = 0
|
||||
while True:
|
||||
try:
|
||||
for chunk in self._iter_stream():
|
||||
reconnect_attempts = 0 # Reset on successful data
|
||||
if chunk.stream == "stdout":
|
||||
self._last_stdout_offset = chunk.offset + len(
|
||||
chunk.data.encode("utf-8")
|
||||
)
|
||||
else:
|
||||
self._last_stderr_offset = chunk.offset + len(
|
||||
chunk.data.encode("utf-8")
|
||||
)
|
||||
yield chunk
|
||||
return # Stream ended normally (exit message received)
|
||||
|
||||
except SandboxConnectionError as e:
|
||||
if self._control and self._control.killed:
|
||||
raise
|
||||
|
||||
reconnect_attempts += 1
|
||||
if reconnect_attempts > self.MAX_AUTO_RECONNECTS:
|
||||
raise SandboxConnectionError(
|
||||
f"Lost connection {reconnect_attempts} times in "
|
||||
f"succession, giving up"
|
||||
) from e
|
||||
|
||||
is_hot_reload = isinstance(e, SandboxServerReloadError)
|
||||
if not is_hot_reload:
|
||||
delay = min(
|
||||
self._BACKOFF_BASE * (2 ** (reconnect_attempts - 1)),
|
||||
self._BACKOFF_MAX,
|
||||
)
|
||||
time.sleep(delay)
|
||||
|
||||
assert self._command_id is not None
|
||||
new_handle = self._sandbox.reconnect(
|
||||
self._command_id,
|
||||
stdout_offset=self._last_stdout_offset,
|
||||
stderr_offset=self._last_stderr_offset,
|
||||
)
|
||||
self._stream = new_handle._stream
|
||||
self._control = new_handle._control
|
||||
self._exhausted = False
|
||||
|
||||
def kill(self) -> None:
|
||||
"""Send a kill signal to the running command (SIGKILL).
|
||||
|
||||
The server kills the entire process group. The stream will
|
||||
subsequently yield an exit message with a non-zero exit code.
|
||||
|
||||
Has no effect if the command has already exited or the
|
||||
WebSocket connection is closed.
|
||||
"""
|
||||
if self._control:
|
||||
self._control.send_kill()
|
||||
|
||||
def send_input(self, data: str) -> None:
|
||||
"""Write data to the command's stdin.
|
||||
|
||||
Args:
|
||||
data: String data to write to stdin.
|
||||
|
||||
Has no effect if the command has already exited or the
|
||||
WebSocket connection is closed.
|
||||
"""
|
||||
if self._control:
|
||||
self._control.send_input(data)
|
||||
|
||||
@property
|
||||
def last_stdout_offset(self) -> int:
|
||||
"""Last known stdout byte offset (for manual reconnection)."""
|
||||
return self._last_stdout_offset
|
||||
|
||||
@property
|
||||
def last_stderr_offset(self) -> int:
|
||||
"""Last known stderr byte offset (for manual reconnection)."""
|
||||
return self._last_stderr_offset
|
||||
|
||||
def reconnect(self) -> CommandHandle:
|
||||
"""Reconnect to this command from the last known offsets.
|
||||
|
||||
Returns a new handle that resumes output from where this one
|
||||
left off. Any output produced while disconnected is replayed
|
||||
from the server's ring buffer.
|
||||
|
||||
Returns:
|
||||
A new CommandHandle.
|
||||
|
||||
Raises:
|
||||
SandboxOperationError: If command_id is not found or
|
||||
session expired.
|
||||
SandboxConnectionError: If connection to sandbox fails.
|
||||
"""
|
||||
assert self._command_id is not None
|
||||
return self._sandbox.reconnect(
|
||||
self._command_id,
|
||||
stdout_offset=self._last_stdout_offset,
|
||||
stderr_offset=self._last_stderr_offset,
|
||||
)
|
||||
|
||||
|
||||
class AsyncCommandHandle:
|
||||
"""Async handle to a running command with streaming output and auto-reconnect.
|
||||
|
||||
Async iterable, yielding OutputChunk objects (stdout and stderr interleaved
|
||||
in arrival order). Access .result after iteration to get the full
|
||||
ExecutionResult.
|
||||
|
||||
Auto-reconnect behavior:
|
||||
- Server hot-reload (1001 Going Away): reconnect immediately
|
||||
- Network error / unexpected close: reconnect with exponential backoff
|
||||
- User called kill(): do NOT reconnect (propagate error)
|
||||
|
||||
Construction modes (controlled by ``command_id``):
|
||||
- **New execution** (``command_id=""``, the default): call
|
||||
``await handle._ensure_started()`` after construction to read the
|
||||
server's ``"started"`` message and populate ``command_id`` / ``pid``.
|
||||
- **Reconnection** (``command_id`` set): skips the started-message
|
||||
read, since reconnect streams don't emit one.
|
||||
|
||||
Example:
|
||||
handle = await sandbox.run("make build", timeout=600, wait=False)
|
||||
|
||||
async for chunk in handle: # auto-reconnects on transient errors
|
||||
print(chunk.data, end="")
|
||||
|
||||
result = await handle.result
|
||||
print(f"Exit code: {result.exit_code}")
|
||||
"""
|
||||
|
||||
MAX_AUTO_RECONNECTS = 5
|
||||
_BACKOFF_BASE = 0.5 # seconds
|
||||
_BACKOFF_MAX = 8.0 # seconds
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message_stream: AsyncIterator[dict],
|
||||
control: Optional[_AsyncWSStreamControl],
|
||||
sandbox: AsyncSandbox,
|
||||
*,
|
||||
command_id: str = "",
|
||||
stdout_offset: int = 0,
|
||||
stderr_offset: int = 0,
|
||||
) -> None:
|
||||
self._stream = message_stream
|
||||
self._control = control
|
||||
self._sandbox = sandbox
|
||||
self._command_id: Optional[str] = None
|
||||
self._pid: Optional[int] = None
|
||||
self._result: Optional[ExecutionResult] = None
|
||||
self._stdout_parts: list[str] = []
|
||||
self._stderr_parts: list[str] = []
|
||||
self._exhausted = False
|
||||
self._last_stdout_offset = stdout_offset
|
||||
self._last_stderr_offset = stderr_offset
|
||||
|
||||
# New executions (command_id=""): _ensure_started reads "started".
|
||||
# Reconnections (command_id set): skip since reconnect streams
|
||||
# don't send a "started" message.
|
||||
if command_id:
|
||||
self._command_id = command_id
|
||||
self._started = True
|
||||
else:
|
||||
self._started = False
|
||||
|
||||
async def _ensure_started(self) -> None:
|
||||
"""Read the 'started' message to populate command_id and pid."""
|
||||
if self._started:
|
||||
return
|
||||
try:
|
||||
first_msg = await self._stream.__anext__()
|
||||
except StopAsyncIteration:
|
||||
raise SandboxOperationError(
|
||||
"Command stream ended before 'started' message",
|
||||
operation="command",
|
||||
)
|
||||
if first_msg.get("type") != "started":
|
||||
raise SandboxOperationError(
|
||||
f"Expected 'started' message, got '{first_msg.get('type')}'",
|
||||
operation="command",
|
||||
)
|
||||
self._command_id = first_msg.get("command_id")
|
||||
self._pid = first_msg.get("pid")
|
||||
self._started = True
|
||||
|
||||
@property
|
||||
def command_id(self) -> Optional[str]:
|
||||
"""The server-assigned command ID. Available after _ensure_started."""
|
||||
return self._command_id
|
||||
|
||||
@property
|
||||
def pid(self) -> Optional[int]:
|
||||
"""The process ID on the sandbox. Available after _ensure_started."""
|
||||
return self._pid
|
||||
|
||||
@property
|
||||
async def result(self) -> ExecutionResult:
|
||||
"""The final execution result. Awaitable."""
|
||||
if self._result is None:
|
||||
async for _ in self:
|
||||
pass
|
||||
if self._result is None:
|
||||
raise SandboxOperationError(
|
||||
"Command stream ended without exit message",
|
||||
operation="command",
|
||||
)
|
||||
return self._result
|
||||
|
||||
async def _aiter_stream(self) -> AsyncIterator[OutputChunk]:
|
||||
"""Iterate over output chunks from the current stream (no reconnect)."""
|
||||
await self._ensure_started()
|
||||
if self._exhausted:
|
||||
return
|
||||
async for msg in self._stream:
|
||||
msg_type = msg.get("type")
|
||||
if msg_type in ("stdout", "stderr"):
|
||||
chunk = OutputChunk(
|
||||
stream=msg_type,
|
||||
data=msg["data"],
|
||||
offset=msg.get("offset", 0),
|
||||
)
|
||||
if msg_type == "stdout":
|
||||
self._stdout_parts.append(msg["data"])
|
||||
else:
|
||||
self._stderr_parts.append(msg["data"])
|
||||
yield chunk
|
||||
elif msg_type == "exit":
|
||||
self._result = ExecutionResult(
|
||||
stdout="".join(self._stdout_parts),
|
||||
stderr="".join(self._stderr_parts),
|
||||
exit_code=msg["exit_code"],
|
||||
)
|
||||
self._exhausted = True
|
||||
return
|
||||
self._exhausted = True
|
||||
|
||||
async def __aiter__(self) -> AsyncIterator[OutputChunk]:
|
||||
"""Async iterate with auto-reconnect on transient errors."""
|
||||
import asyncio
|
||||
|
||||
reconnect_attempts = 0
|
||||
while True:
|
||||
try:
|
||||
async for chunk in self._aiter_stream():
|
||||
reconnect_attempts = 0
|
||||
if chunk.stream == "stdout":
|
||||
self._last_stdout_offset = chunk.offset + len(
|
||||
chunk.data.encode("utf-8")
|
||||
)
|
||||
else:
|
||||
self._last_stderr_offset = chunk.offset + len(
|
||||
chunk.data.encode("utf-8")
|
||||
)
|
||||
yield chunk
|
||||
return # Stream ended normally
|
||||
|
||||
except SandboxConnectionError as e:
|
||||
if self._control and self._control.killed:
|
||||
raise
|
||||
|
||||
reconnect_attempts += 1
|
||||
if reconnect_attempts > self.MAX_AUTO_RECONNECTS:
|
||||
raise SandboxConnectionError(
|
||||
f"Lost connection {reconnect_attempts} times "
|
||||
f"in succession, giving up"
|
||||
) from e
|
||||
|
||||
is_hot_reload = isinstance(e, SandboxServerReloadError)
|
||||
if not is_hot_reload:
|
||||
delay = min(
|
||||
self._BACKOFF_BASE * (2 ** (reconnect_attempts - 1)),
|
||||
self._BACKOFF_MAX,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
assert self._command_id is not None
|
||||
new_handle = await self._sandbox.reconnect(
|
||||
self._command_id,
|
||||
stdout_offset=self._last_stdout_offset,
|
||||
stderr_offset=self._last_stderr_offset,
|
||||
)
|
||||
self._stream = new_handle._stream
|
||||
self._control = new_handle._control
|
||||
self._exhausted = False
|
||||
|
||||
async def kill(self) -> None:
|
||||
"""Send a kill signal to the running command."""
|
||||
if self._control:
|
||||
await self._control.send_kill()
|
||||
|
||||
async def send_input(self, data: str) -> None:
|
||||
"""Write data to the command's stdin."""
|
||||
if self._control:
|
||||
await self._control.send_input(data)
|
||||
|
||||
@property
|
||||
def last_stdout_offset(self) -> int:
|
||||
"""Last known stdout byte offset (for manual reconnection)."""
|
||||
return self._last_stdout_offset
|
||||
|
||||
@property
|
||||
def last_stderr_offset(self) -> int:
|
||||
"""Last known stderr byte offset (for manual reconnection)."""
|
||||
return self._last_stderr_offset
|
||||
|
||||
async def reconnect(self) -> AsyncCommandHandle:
|
||||
"""Reconnect to this command from the last known offsets."""
|
||||
assert self._command_id is not None
|
||||
return await self._sandbox.reconnect(
|
||||
self._command_id,
|
||||
stdout_offset=self._last_stdout_offset,
|
||||
stderr_offset=self._last_stderr_offset,
|
||||
)
|
||||
445
venv/Lib/site-packages/langsmith/sandbox/_sandbox.py
Normal file
445
venv/Lib/site-packages/langsmith/sandbox/_sandbox.py
Normal file
@@ -0,0 +1,445 @@
|
||||
"""Sandbox class for interacting with a specific sandbox instance."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union, overload
|
||||
|
||||
import httpx
|
||||
|
||||
from langsmith.sandbox._exceptions import (
|
||||
DataplaneNotConfiguredError,
|
||||
ResourceNotFoundError,
|
||||
SandboxConnectionError,
|
||||
SandboxNotReadyError,
|
||||
)
|
||||
from langsmith.sandbox._helpers import handle_sandbox_http_error
|
||||
from langsmith.sandbox._models import (
|
||||
CommandHandle,
|
||||
ExecutionResult,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langsmith.sandbox._client import SandboxClient
|
||||
|
||||
|
||||
@dataclass
|
||||
class Sandbox:
|
||||
"""Represents an active sandbox for running commands and file operations.
|
||||
|
||||
This class is typically obtained from SandboxClient.sandbox() and supports
|
||||
the context manager protocol for automatic cleanup.
|
||||
|
||||
Attributes:
|
||||
name: Display name (can be updated).
|
||||
template_name: Name of the template used to create this sandbox.
|
||||
dataplane_url: URL for data plane operations (file I/O, command execution).
|
||||
Only functional when status is "ready".
|
||||
id: Unique identifier (UUID). Remains constant even if name changes.
|
||||
May be None for resources created before ID support was added.
|
||||
status: Sandbox lifecycle status. One of "provisioning", "ready", "failed".
|
||||
status_message: Human-readable details when status is "failed", None otherwise.
|
||||
created_at: Timestamp when the sandbox was created.
|
||||
updated_at: Timestamp when the sandbox was last updated.
|
||||
|
||||
Example:
|
||||
with client.sandbox(template_name="python-sandbox") as sandbox:
|
||||
result = sandbox.run("python --version")
|
||||
print(result.stdout)
|
||||
"""
|
||||
|
||||
# Data fields (from API response)
|
||||
name: str
|
||||
template_name: str
|
||||
dataplane_url: Optional[str] = None
|
||||
id: Optional[str] = None
|
||||
status: str = "ready"
|
||||
status_message: Optional[str] = None
|
||||
created_at: Optional[str] = None
|
||||
updated_at: Optional[str] = None
|
||||
|
||||
# Internal fields (not from API)
|
||||
_client: SandboxClient = field(repr=False, default=None) # type: ignore
|
||||
_auto_delete: bool = field(repr=False, default=True)
|
||||
|
||||
@classmethod
|
||||
def from_dict(
|
||||
cls,
|
||||
data: dict[str, Any],
|
||||
client: SandboxClient,
|
||||
auto_delete: bool = True,
|
||||
) -> Sandbox:
|
||||
"""Create a Sandbox from API response dict.
|
||||
|
||||
Args:
|
||||
data: API response dictionary containing sandbox data.
|
||||
client: Parent SandboxClient for operations.
|
||||
auto_delete: Whether to delete the sandbox on context exit.
|
||||
|
||||
Returns:
|
||||
Sandbox instance.
|
||||
"""
|
||||
return cls(
|
||||
name=data.get("name", ""),
|
||||
template_name=data.get("template_name", ""),
|
||||
dataplane_url=data.get("dataplane_url"),
|
||||
id=data.get("id"),
|
||||
status=data.get("status", "ready"),
|
||||
status_message=data.get("status_message"),
|
||||
created_at=data.get("created_at"),
|
||||
updated_at=data.get("updated_at"),
|
||||
_client=client,
|
||||
_auto_delete=auto_delete,
|
||||
)
|
||||
|
||||
def __enter__(self) -> Sandbox:
|
||||
"""Enter context manager."""
|
||||
return self
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
exc_type: Optional[type],
|
||||
exc_val: Optional[BaseException],
|
||||
exc_tb: Optional[Any],
|
||||
) -> None:
|
||||
"""Exit context manager, optionally deleting the sandbox."""
|
||||
if self._auto_delete:
|
||||
try:
|
||||
self._client.delete_sandbox(self.name)
|
||||
except Exception:
|
||||
# Don't raise on cleanup errors
|
||||
pass
|
||||
|
||||
def _require_dataplane_url(self) -> str:
|
||||
"""Validate and return the dataplane URL.
|
||||
|
||||
Returns:
|
||||
The dataplane URL.
|
||||
|
||||
Raises:
|
||||
SandboxNotReadyError: If sandbox status is not "ready".
|
||||
DataplaneNotConfiguredError: If dataplane_url is not configured.
|
||||
"""
|
||||
if self.status != "ready":
|
||||
raise SandboxNotReadyError(
|
||||
f"Sandbox '{self.name}' is not ready (status: {self.status}). "
|
||||
"Wait for status 'ready' before running operations."
|
||||
)
|
||||
if not self.dataplane_url:
|
||||
raise DataplaneNotConfiguredError(
|
||||
f"Sandbox '{self.name}' does not have a dataplane_url configured. "
|
||||
"Runtime operations require a dataplane URL."
|
||||
)
|
||||
return self.dataplane_url
|
||||
|
||||
@overload
|
||||
def run(
|
||||
self,
|
||||
command: str,
|
||||
*,
|
||||
timeout: int = ...,
|
||||
env: Optional[dict[str, str]] = ...,
|
||||
cwd: Optional[str] = ...,
|
||||
shell: str = ...,
|
||||
on_stdout: Optional[Callable[[str], Any]] = ...,
|
||||
on_stderr: Optional[Callable[[str], Any]] = ...,
|
||||
wait: Literal[True] = ...,
|
||||
) -> ExecutionResult: ...
|
||||
|
||||
@overload
|
||||
def run(
|
||||
self,
|
||||
command: str,
|
||||
*,
|
||||
timeout: int = ...,
|
||||
env: Optional[dict[str, str]] = ...,
|
||||
cwd: Optional[str] = ...,
|
||||
shell: str = ...,
|
||||
on_stdout: Optional[Callable[[str], Any]] = ...,
|
||||
on_stderr: Optional[Callable[[str], Any]] = ...,
|
||||
wait: Literal[False],
|
||||
) -> CommandHandle: ...
|
||||
|
||||
def run(
|
||||
self,
|
||||
command: str,
|
||||
*,
|
||||
timeout: int = 60,
|
||||
env: Optional[dict[str, str]] = None,
|
||||
cwd: Optional[str] = None,
|
||||
shell: str = "/bin/bash",
|
||||
on_stdout: Optional[Callable[[str], Any]] = None,
|
||||
on_stderr: Optional[Callable[[str], Any]] = None,
|
||||
wait: bool = True,
|
||||
) -> Union[ExecutionResult, CommandHandle]:
|
||||
"""Execute a command in the sandbox.
|
||||
|
||||
Args:
|
||||
command: Shell command to execute.
|
||||
timeout: Command timeout in seconds.
|
||||
env: Environment variables to set for the command.
|
||||
cwd: Working directory for command execution. If None, uses sandbox default.
|
||||
shell: Shell to use for command execution. Defaults to "/bin/bash".
|
||||
on_stdout: Callback invoked with each stdout chunk as it arrives.
|
||||
Blocks until the command completes and returns ExecutionResult.
|
||||
Cannot be combined with wait=False.
|
||||
on_stderr: Callback invoked with each stderr chunk as it arrives.
|
||||
Blocks until the command completes and returns ExecutionResult.
|
||||
Cannot be combined with wait=False.
|
||||
wait: If True (default), block until the command completes and
|
||||
return ExecutionResult. If False, return a
|
||||
CommandHandle immediately for streaming output,
|
||||
kill, stdin input, and reconnection. Cannot be combined with
|
||||
on_stdout/on_stderr callbacks.
|
||||
|
||||
Returns:
|
||||
ExecutionResult when wait=True (default).
|
||||
CommandHandle when wait=False.
|
||||
|
||||
Raises:
|
||||
ValueError: If wait=False is combined with callbacks.
|
||||
DataplaneNotConfiguredError: If dataplane_url is not configured.
|
||||
SandboxOperationError: If command execution fails.
|
||||
CommandTimeoutError: If command exceeds its timeout.
|
||||
SandboxConnectionError: If connection to sandbox fails after retries.
|
||||
SandboxNotReadyError: If sandbox is not ready.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
if not wait and (on_stdout or on_stderr):
|
||||
raise ValueError(
|
||||
"Cannot combine wait=False with on_stdout/on_stderr callbacks. "
|
||||
"Use wait=False and iterate the CommandHandle, or use callbacks."
|
||||
)
|
||||
|
||||
self._require_dataplane_url()
|
||||
|
||||
# When not waiting or callbacks are requested, WS is required
|
||||
use_ws = not wait or on_stdout or on_stderr
|
||||
if use_ws:
|
||||
return self._run_ws(
|
||||
command,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
cwd=cwd,
|
||||
shell=shell,
|
||||
wait=wait,
|
||||
on_stdout=on_stdout,
|
||||
on_stderr=on_stderr,
|
||||
)
|
||||
|
||||
# Default (wait=True, no callbacks): try WS, fall back to HTTP.
|
||||
# Catch broad exceptions so that unexpected WS failures (e.g. version
|
||||
# incompatibilities) don't break users who don't need WS features.
|
||||
try:
|
||||
return self._run_ws(
|
||||
command,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
cwd=cwd,
|
||||
shell=shell,
|
||||
wait=True,
|
||||
on_stdout=None,
|
||||
on_stderr=None,
|
||||
)
|
||||
except (SandboxConnectionError, ImportError, OSError, TypeError):
|
||||
return self._run_http(
|
||||
command,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
cwd=cwd,
|
||||
shell=shell,
|
||||
)
|
||||
|
||||
def _run_ws(
|
||||
self,
|
||||
command: str,
|
||||
*,
|
||||
timeout: int,
|
||||
env: Optional[dict[str, str]],
|
||||
cwd: Optional[str],
|
||||
shell: str,
|
||||
wait: bool,
|
||||
on_stdout: Optional[Callable[[str], Any]],
|
||||
on_stderr: Optional[Callable[[str], Any]],
|
||||
) -> Union[ExecutionResult, CommandHandle]:
|
||||
"""Execute via WebSocket /execute/ws."""
|
||||
from langsmith.sandbox._ws_execute import run_ws_stream
|
||||
|
||||
dataplane_url = self._require_dataplane_url()
|
||||
api_key = self._client._api_key
|
||||
|
||||
msg_stream, control = run_ws_stream(
|
||||
dataplane_url,
|
||||
api_key,
|
||||
command,
|
||||
timeout=timeout,
|
||||
env=env,
|
||||
cwd=cwd,
|
||||
shell=shell,
|
||||
on_stdout=on_stdout,
|
||||
on_stderr=on_stderr,
|
||||
)
|
||||
|
||||
handle = CommandHandle(msg_stream, control, self)
|
||||
|
||||
if not wait:
|
||||
return handle
|
||||
|
||||
return handle.result # blocks until command completes
|
||||
|
||||
def _run_http(
|
||||
self,
|
||||
command: str,
|
||||
*,
|
||||
timeout: int,
|
||||
env: Optional[dict[str, str]],
|
||||
cwd: Optional[str],
|
||||
shell: str,
|
||||
) -> ExecutionResult:
|
||||
"""Execute via HTTP POST /execute (existing implementation)."""
|
||||
dataplane_url = self._require_dataplane_url()
|
||||
url = f"{dataplane_url}/execute"
|
||||
payload: dict[str, Any] = {
|
||||
"command": command,
|
||||
"timeout": timeout,
|
||||
"shell": shell,
|
||||
}
|
||||
if env is not None:
|
||||
payload["env"] = env
|
||||
if cwd is not None:
|
||||
payload["cwd"] = cwd
|
||||
|
||||
try:
|
||||
response = self._client._http.post(url, json=payload, timeout=timeout + 10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return ExecutionResult(
|
||||
stdout=data.get("stdout", ""),
|
||||
stderr=data.get("stderr", ""),
|
||||
exit_code=data.get("exit_code", -1),
|
||||
)
|
||||
except httpx.HTTPStatusError as e:
|
||||
handle_sandbox_http_error(e)
|
||||
raise # pragma: no cover
|
||||
|
||||
def reconnect(
|
||||
self,
|
||||
command_id: str,
|
||||
*,
|
||||
stdout_offset: int = 0,
|
||||
stderr_offset: int = 0,
|
||||
) -> CommandHandle:
|
||||
"""Reconnect to a running or recently-finished command.
|
||||
|
||||
Resumes output from the given byte offsets. Any output produced while
|
||||
the client was disconnected is replayed from the server's ring buffer.
|
||||
|
||||
Args:
|
||||
command_id: The command ID from handle.command_id.
|
||||
stdout_offset: Byte offset to resume stdout from (default: 0).
|
||||
stderr_offset: Byte offset to resume stderr from (default: 0).
|
||||
|
||||
Returns:
|
||||
A CommandHandle for the command.
|
||||
|
||||
Raises:
|
||||
SandboxOperationError: If command_id is not found or session expired.
|
||||
SandboxConnectionError: If connection to sandbox fails after retries.
|
||||
"""
|
||||
from langsmith.sandbox._ws_execute import reconnect_ws_stream
|
||||
|
||||
dataplane_url = self._require_dataplane_url()
|
||||
api_key = self._client._api_key
|
||||
|
||||
msg_stream, control = reconnect_ws_stream(
|
||||
dataplane_url,
|
||||
api_key,
|
||||
command_id,
|
||||
stdout_offset=stdout_offset,
|
||||
stderr_offset=stderr_offset,
|
||||
)
|
||||
|
||||
return CommandHandle(
|
||||
msg_stream,
|
||||
control,
|
||||
self,
|
||||
command_id=command_id,
|
||||
stdout_offset=stdout_offset,
|
||||
stderr_offset=stderr_offset,
|
||||
)
|
||||
|
||||
def write(
|
||||
self,
|
||||
path: str,
|
||||
content: Union[str, bytes],
|
||||
*,
|
||||
timeout: int = 60,
|
||||
) -> None:
|
||||
"""Write content to a file in the sandbox.
|
||||
|
||||
Args:
|
||||
path: Target file path in the sandbox.
|
||||
content: File content (str or bytes).
|
||||
timeout: Request timeout in seconds.
|
||||
|
||||
Raises:
|
||||
DataplaneNotConfiguredError: If dataplane_url is not configured.
|
||||
SandboxOperationError: If file write fails.
|
||||
SandboxConnectionError: If connection to sandbox fails after retries.
|
||||
SandboxNotReadyError: If sandbox is not ready.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
dataplane_url = self._require_dataplane_url()
|
||||
url = f"{dataplane_url}/upload"
|
||||
|
||||
# Ensure content is bytes for multipart upload
|
||||
if isinstance(content, str):
|
||||
content = content.encode("utf-8")
|
||||
|
||||
files = {"file": ("file", content)}
|
||||
|
||||
try:
|
||||
response = self._client._http.post(
|
||||
url, params={"path": path}, files=files, timeout=timeout
|
||||
)
|
||||
response.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
handle_sandbox_http_error(e)
|
||||
|
||||
def read(self, path: str, *, timeout: int = 60) -> bytes:
|
||||
"""Read a file from the sandbox.
|
||||
|
||||
Args:
|
||||
path: File path to read. Supports both absolute paths (e.g., /tmp/file.txt)
|
||||
and relative paths (resolved from /home/user/).
|
||||
timeout: Request timeout in seconds.
|
||||
|
||||
Returns:
|
||||
File contents as bytes.
|
||||
|
||||
Raises:
|
||||
DataplaneNotConfiguredError: If dataplane_url is not configured.
|
||||
ResourceNotFoundError: If the file doesn't exist.
|
||||
SandboxOperationError: If file read fails.
|
||||
SandboxConnectionError: If connection to sandbox fails after retries.
|
||||
SandboxNotReadyError: If sandbox is not ready.
|
||||
SandboxClientError: For other errors.
|
||||
"""
|
||||
dataplane_url = self._require_dataplane_url()
|
||||
url = f"{dataplane_url}/download"
|
||||
|
||||
try:
|
||||
response = self._client._http.get(
|
||||
url, params={"path": path}, timeout=timeout
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.content
|
||||
except httpx.HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise ResourceNotFoundError(
|
||||
f"File '{path}' not found in sandbox '{self.name}'",
|
||||
resource_type="file",
|
||||
) from e
|
||||
handle_sandbox_http_error(e)
|
||||
# This line should never be reached but satisfies type checker
|
||||
raise # pragma: no cover
|
||||
216
venv/Lib/site-packages/langsmith/sandbox/_transport.py
Normal file
216
venv/Lib/site-packages/langsmith/sandbox/_transport.py
Normal file
@@ -0,0 +1,216 @@
|
||||
"""Custom httpx transports with retry logic for the sandbox client.
|
||||
|
||||
Provides RetryTransport (sync) and AsyncRetryTransport (async) that wrap
|
||||
the default httpx transports with automatic retry on transient errors.
|
||||
This mirrors the main LangSmith client's _LangSmithHttpAdapter + urllib3.Retry
|
||||
architecture at the transport level, making retries transparent to all call sites.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
from langsmith.sandbox._exceptions import SandboxConnectionError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
RETRYABLE_STATUS_CODES = frozenset({502, 503, 504})
|
||||
|
||||
_MAX_BACKOFF = 10.0
|
||||
|
||||
|
||||
def _compute_backoff(attempt: int) -> float:
|
||||
"""Compute exponential backoff with jitter, capped at _MAX_BACKOFF."""
|
||||
return min(2**attempt + random.random(), _MAX_BACKOFF)
|
||||
|
||||
|
||||
class RetryTransport(httpx.BaseTransport):
|
||||
"""Sync httpx transport that retries on transient errors.
|
||||
|
||||
Retries on:
|
||||
- 502/503/504 with exponential backoff
|
||||
- 429 with Retry-After header support
|
||||
- Connection errors with exponential backoff
|
||||
|
||||
After exhausting retries, the last response is returned (for status errors)
|
||||
or SandboxConnectionError is raised (for connection errors).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
max_retries: int = 3,
|
||||
transport: httpx.BaseTransport | None = None,
|
||||
) -> None:
|
||||
self._transport = transport or httpx.HTTPTransport()
|
||||
self._max_retries = max_retries
|
||||
|
||||
def handle_request(self, request: httpx.Request) -> httpx.Response:
|
||||
last_response: httpx.Response | None = None
|
||||
|
||||
for attempt in range(self._max_retries + 1):
|
||||
is_last_attempt = attempt == self._max_retries
|
||||
|
||||
try:
|
||||
response = self._transport.handle_request(request)
|
||||
last_response = response
|
||||
|
||||
if not is_last_attempt:
|
||||
if response.status_code in RETRYABLE_STATUS_CODES:
|
||||
response.close()
|
||||
sleep_time = _compute_backoff(attempt)
|
||||
logger.debug(
|
||||
"Retrying %s %s (status %d, attempt %d/%d, sleeping %.1fs)",
|
||||
request.method,
|
||||
request.url,
|
||||
response.status_code,
|
||||
attempt + 1,
|
||||
self._max_retries,
|
||||
sleep_time,
|
||||
)
|
||||
time.sleep(sleep_time)
|
||||
continue
|
||||
|
||||
if response.status_code == 429:
|
||||
retry_after = _parse_retry_after(response)
|
||||
sleep_time = retry_after * 2**attempt + random.random()
|
||||
response.close()
|
||||
logger.debug(
|
||||
"Rate limited on %s %s, retrying after %.1fs "
|
||||
"(attempt %d/%d)",
|
||||
request.method,
|
||||
request.url,
|
||||
sleep_time,
|
||||
attempt + 1,
|
||||
self._max_retries,
|
||||
)
|
||||
time.sleep(sleep_time)
|
||||
continue
|
||||
|
||||
return response
|
||||
|
||||
except httpx.ConnectError as exc:
|
||||
if not is_last_attempt:
|
||||
sleep_time = _compute_backoff(attempt)
|
||||
logger.debug(
|
||||
"Connection error on %s %s, retrying "
|
||||
"(attempt %d/%d, sleeping %.1fs): %s",
|
||||
request.method,
|
||||
request.url,
|
||||
attempt + 1,
|
||||
self._max_retries,
|
||||
sleep_time,
|
||||
exc,
|
||||
)
|
||||
time.sleep(sleep_time)
|
||||
continue
|
||||
raise SandboxConnectionError(
|
||||
f"Failed to connect to server after "
|
||||
f"{self._max_retries + 1} attempts: {exc}"
|
||||
) from exc
|
||||
|
||||
assert last_response is not None
|
||||
return last_response
|
||||
|
||||
def close(self) -> None:
|
||||
self._transport.close()
|
||||
|
||||
|
||||
class AsyncRetryTransport(httpx.AsyncBaseTransport):
|
||||
"""Async httpx transport that retries on transient errors.
|
||||
|
||||
Async equivalent of RetryTransport. See RetryTransport for details.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
max_retries: int = 3,
|
||||
transport: httpx.AsyncBaseTransport | None = None,
|
||||
) -> None:
|
||||
self._transport = transport or httpx.AsyncHTTPTransport()
|
||||
self._max_retries = max_retries
|
||||
|
||||
async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
|
||||
last_response: httpx.Response | None = None
|
||||
|
||||
for attempt in range(self._max_retries + 1):
|
||||
is_last_attempt = attempt == self._max_retries
|
||||
|
||||
try:
|
||||
response = await self._transport.handle_async_request(request)
|
||||
last_response = response
|
||||
|
||||
if not is_last_attempt:
|
||||
if response.status_code in RETRYABLE_STATUS_CODES:
|
||||
await response.aclose()
|
||||
sleep_time = _compute_backoff(attempt)
|
||||
logger.debug(
|
||||
"Retrying %s %s (status %d, attempt %d/%d, sleeping %.1fs)",
|
||||
request.method,
|
||||
request.url,
|
||||
response.status_code,
|
||||
attempt + 1,
|
||||
self._max_retries,
|
||||
sleep_time,
|
||||
)
|
||||
await asyncio.sleep(sleep_time)
|
||||
continue
|
||||
|
||||
if response.status_code == 429:
|
||||
retry_after = _parse_retry_after(response)
|
||||
sleep_time = retry_after * 2**attempt + random.random()
|
||||
await response.aclose()
|
||||
logger.debug(
|
||||
"Rate limited on %s %s, retrying after %.1fs "
|
||||
"(attempt %d/%d)",
|
||||
request.method,
|
||||
request.url,
|
||||
sleep_time,
|
||||
attempt + 1,
|
||||
self._max_retries,
|
||||
)
|
||||
await asyncio.sleep(sleep_time)
|
||||
continue
|
||||
|
||||
return response
|
||||
|
||||
except httpx.ConnectError as exc:
|
||||
if not is_last_attempt:
|
||||
sleep_time = _compute_backoff(attempt)
|
||||
logger.debug(
|
||||
"Connection error on %s %s, retrying "
|
||||
"(attempt %d/%d, sleeping %.1fs): %s",
|
||||
request.method,
|
||||
request.url,
|
||||
attempt + 1,
|
||||
self._max_retries,
|
||||
sleep_time,
|
||||
exc,
|
||||
)
|
||||
await asyncio.sleep(sleep_time)
|
||||
continue
|
||||
raise SandboxConnectionError(
|
||||
f"Failed to connect to server after "
|
||||
f"{self._max_retries + 1} attempts: {exc}"
|
||||
) from exc
|
||||
|
||||
assert last_response is not None
|
||||
return last_response
|
||||
|
||||
async def aclose(self) -> None:
|
||||
await self._transport.aclose()
|
||||
|
||||
|
||||
def _parse_retry_after(response: httpx.Response) -> float:
|
||||
"""Parse Retry-After header value, defaulting to 1.0 second."""
|
||||
raw = response.headers.get("retry-after", "1")
|
||||
try:
|
||||
return max(float(raw), 0.0)
|
||||
except (ValueError, TypeError):
|
||||
return 1.0
|
||||
527
venv/Lib/site-packages/langsmith/sandbox/_ws_execute.py
Normal file
527
venv/Lib/site-packages/langsmith/sandbox/_ws_execute.py
Normal file
@@ -0,0 +1,527 @@
|
||||
"""WebSocket-based command execution for long-running commands."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections.abc import AsyncIterator, Iterator
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from langsmith.sandbox._exceptions import (
|
||||
CommandTimeoutError,
|
||||
SandboxConnectionError,
|
||||
SandboxOperationError,
|
||||
SandboxServerReloadError,
|
||||
)
|
||||
|
||||
|
||||
def _ensure_websockets():
|
||||
"""Import websockets or raise a clear error."""
|
||||
try:
|
||||
from websockets.exceptions import ConnectionClosed, InvalidStatus
|
||||
from websockets.sync.client import connect as ws_connect
|
||||
|
||||
return ws_connect, ConnectionClosed, InvalidStatus
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"WebSocket-based execution requires the 'websockets' package. "
|
||||
"Install it with: pip install 'langsmith[sandbox]'"
|
||||
) from None
|
||||
|
||||
|
||||
def _ensure_websockets_async():
|
||||
"""Import async websockets or raise a clear error."""
|
||||
try:
|
||||
from websockets.asyncio.client import connect as ws_connect_async
|
||||
from websockets.exceptions import ConnectionClosed, InvalidStatus
|
||||
|
||||
return ws_connect_async, ConnectionClosed, InvalidStatus
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"WebSocket-based execution requires the 'websockets' package. "
|
||||
"Install it with: pip install 'langsmith[sandbox]'"
|
||||
) from None
|
||||
|
||||
|
||||
def _build_ws_url(dataplane_url: str) -> str:
|
||||
"""Convert dataplane HTTP URL to WebSocket URL for /execute/ws."""
|
||||
ws_url = dataplane_url.replace("https://", "wss://").replace("http://", "ws://")
|
||||
return f"{ws_url}/execute/ws"
|
||||
|
||||
|
||||
def _build_auth_headers(api_key: Optional[str]) -> dict[str, str]:
|
||||
"""Build auth headers for the WebSocket upgrade request."""
|
||||
if api_key:
|
||||
return {"X-Api-Key": api_key}
|
||||
return {}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Stream Control
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class _WSStreamControl:
|
||||
"""Control interface for an active WebSocket stream.
|
||||
|
||||
Created before the generator starts, bound to the WebSocket once
|
||||
the connection opens. The CommandHandle holds a reference to this
|
||||
object to send kill/input messages.
|
||||
|
||||
Thread safety: websockets' sync client supports send() from one
|
||||
thread while recv() runs on another. So kill() from user code
|
||||
and iteration on a different thread are safe.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._ws: Any = None
|
||||
self._closed = False
|
||||
self._killed = False
|
||||
|
||||
def _bind(self, ws: Any) -> None:
|
||||
"""Bind to the active WebSocket. Called inside the generator."""
|
||||
self._ws = ws
|
||||
|
||||
def _unbind(self) -> None:
|
||||
"""Mark as closed. Called when the generator exits."""
|
||||
self._closed = True
|
||||
self._ws = None
|
||||
|
||||
@property
|
||||
def killed(self) -> bool:
|
||||
"""True if kill() has been called on this stream."""
|
||||
return self._killed
|
||||
|
||||
def send_kill(self) -> None:
|
||||
"""Send a kill message to abort the running command."""
|
||||
self._killed = True
|
||||
if self._ws and not self._closed:
|
||||
self._ws.send(json.dumps({"type": "kill"}))
|
||||
|
||||
def send_input(self, data: str) -> None:
|
||||
"""Send stdin data to the running command."""
|
||||
if self._ws and not self._closed:
|
||||
self._ws.send(json.dumps({"type": "input", "data": data}))
|
||||
|
||||
|
||||
class _AsyncWSStreamControl:
|
||||
"""Async equivalent of _WSStreamControl."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._ws: Any = None
|
||||
self._closed = False
|
||||
self._killed = False
|
||||
|
||||
def _bind(self, ws: Any) -> None:
|
||||
self._ws = ws
|
||||
|
||||
def _unbind(self) -> None:
|
||||
self._closed = True
|
||||
self._ws = None
|
||||
|
||||
@property
|
||||
def killed(self) -> bool:
|
||||
return self._killed
|
||||
|
||||
async def send_kill(self) -> None:
|
||||
self._killed = True
|
||||
if self._ws and not self._closed:
|
||||
await self._ws.send(json.dumps({"type": "kill"}))
|
||||
|
||||
async def send_input(self, data: str) -> None:
|
||||
if self._ws and not self._closed:
|
||||
await self._ws.send(json.dumps({"type": "input", "data": data}))
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Error Handling
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _raise_for_invalid_status(exc: Exception, ws_url: str) -> None:
|
||||
"""Raise a clear error when the server rejects the WebSocket upgrade.
|
||||
|
||||
The most common case is HTTP 404 — the server doesn't have the
|
||||
/execute/ws endpoint, meaning it doesn't support WebSocket streaming.
|
||||
"""
|
||||
status = getattr(getattr(exc, "response", None), "status_code", None)
|
||||
if status == 404:
|
||||
raise SandboxConnectionError(
|
||||
f"The sandbox server does not support WebSocket command execution "
|
||||
f"(endpoint {ws_url} returned 404). Ensure the server is updated "
|
||||
f"to a version that supports the /execute/ws endpoint, or use "
|
||||
f"run() without wait=False or callbacks."
|
||||
) from exc
|
||||
# For other HTTP status codes, include the status in the message
|
||||
raise SandboxConnectionError(
|
||||
f"WebSocket upgrade rejected by server (HTTP {status}): {exc}"
|
||||
) from exc
|
||||
|
||||
|
||||
def _raise_from_error_msg(msg: dict, *, command_id: str = "") -> None:
|
||||
"""Raise the appropriate exception from a server error message."""
|
||||
error_type = msg.get("error_type", "CommandError")
|
||||
error_msg = msg.get("error", "Unknown error")
|
||||
|
||||
if error_type == "CommandTimeout":
|
||||
raise CommandTimeoutError(error_msg)
|
||||
if error_type == "CommandNotFound":
|
||||
raise SandboxOperationError(
|
||||
f"Command not found: {command_id}" if command_id else error_msg,
|
||||
operation="reconnect" if command_id else "command",
|
||||
error_type=error_type,
|
||||
)
|
||||
if error_type == "SessionExpired":
|
||||
raise SandboxOperationError(
|
||||
f"Session expired: {command_id}" if command_id else error_msg,
|
||||
operation="reconnect" if command_id else "command",
|
||||
error_type=error_type,
|
||||
)
|
||||
|
||||
raise SandboxOperationError(
|
||||
error_msg,
|
||||
operation="reconnect" if command_id else "command",
|
||||
error_type=error_type,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Sync Stream Functions
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def run_ws_stream(
|
||||
dataplane_url: str,
|
||||
api_key: Optional[str],
|
||||
command: str,
|
||||
*,
|
||||
timeout: int = 60,
|
||||
env: Optional[dict[str, str]] = None,
|
||||
cwd: Optional[str] = None,
|
||||
shell: str = "/bin/bash",
|
||||
on_stdout: Optional[Callable[[str], Any]] = None,
|
||||
on_stderr: Optional[Callable[[str], Any]] = None,
|
||||
) -> tuple[Iterator[dict], _WSStreamControl]:
|
||||
"""Execute a command over WebSocket, yielding raw message dicts.
|
||||
|
||||
Returns a tuple of (message_iterator, control). The control object
|
||||
provides send_kill() and send_input() methods for the CommandHandle.
|
||||
|
||||
The iterator yields dicts with a "type" field:
|
||||
- {"type": "started", "command_id": "...", "pid": N}
|
||||
- {"type": "stdout", "data": "...", "offset": N}
|
||||
- {"type": "stderr", "data": "...", "offset": N}
|
||||
- {"type": "exit", "exit_code": N}
|
||||
|
||||
If on_stdout/on_stderr callbacks are provided, they are invoked as
|
||||
data arrives in addition to yielding the messages.
|
||||
"""
|
||||
ws_connect, ConnectionClosed, InvalidStatus = _ensure_websockets()
|
||||
ws_url = _build_ws_url(dataplane_url)
|
||||
headers = _build_auth_headers(api_key)
|
||||
control = _WSStreamControl()
|
||||
|
||||
def _stream() -> Iterator[dict]:
|
||||
try:
|
||||
with ws_connect(
|
||||
ws_url,
|
||||
additional_headers=headers,
|
||||
open_timeout=30,
|
||||
close_timeout=10,
|
||||
ping_interval=30,
|
||||
ping_timeout=60,
|
||||
) as ws:
|
||||
control._bind(ws)
|
||||
|
||||
# Send execute request
|
||||
ws.send(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "execute",
|
||||
"command": command,
|
||||
"timeout": timeout,
|
||||
"shell": shell,
|
||||
**({"env": env} if env else {}),
|
||||
**({"cwd": cwd} if cwd else {}),
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
# Read messages until exit or error
|
||||
for raw_msg in ws:
|
||||
msg = json.loads(raw_msg)
|
||||
msg_type = msg.get("type")
|
||||
|
||||
if msg_type == "started":
|
||||
yield msg
|
||||
|
||||
elif msg_type == "stdout":
|
||||
if on_stdout:
|
||||
on_stdout(msg["data"])
|
||||
yield msg
|
||||
|
||||
elif msg_type == "stderr":
|
||||
if on_stderr:
|
||||
on_stderr(msg["data"])
|
||||
yield msg
|
||||
|
||||
elif msg_type == "exit":
|
||||
yield msg
|
||||
return
|
||||
|
||||
elif msg_type == "error":
|
||||
_raise_from_error_msg(msg)
|
||||
|
||||
except InvalidStatus as e:
|
||||
_raise_for_invalid_status(e, ws_url)
|
||||
except ConnectionClosed as e:
|
||||
if e.rcvd and e.rcvd.code == 1001:
|
||||
raise SandboxServerReloadError(
|
||||
"Server is reloading, reconnect to resume"
|
||||
) from e
|
||||
raise SandboxConnectionError(
|
||||
f"WebSocket connection closed unexpectedly: {e}"
|
||||
) from e
|
||||
except OSError as e:
|
||||
raise SandboxConnectionError(f"Failed to connect to sandbox: {e}") from e
|
||||
finally:
|
||||
control._unbind()
|
||||
|
||||
return _stream(), control
|
||||
|
||||
|
||||
def reconnect_ws_stream(
|
||||
dataplane_url: str,
|
||||
api_key: Optional[str],
|
||||
command_id: str,
|
||||
*,
|
||||
stdout_offset: int = 0,
|
||||
stderr_offset: int = 0,
|
||||
) -> tuple[Iterator[dict], _WSStreamControl]:
|
||||
"""Reconnect to an existing command over WebSocket.
|
||||
|
||||
Returns a tuple of (message_iterator, control), same as run_ws_stream.
|
||||
The iterator yields stdout, stderr, exit, and error messages.
|
||||
No 'started' message is sent on reconnection.
|
||||
|
||||
With the ring buffer reader server model, there is no replay/live
|
||||
phase distinction and no deduplication needed. The server reads from
|
||||
its ring buffer starting at the requested offsets and streams output
|
||||
from there. If the requested offset is older than the buffer's
|
||||
earliest data, the server sends from the earliest available offset.
|
||||
"""
|
||||
ws_connect, ConnectionClosed, InvalidStatus = _ensure_websockets()
|
||||
ws_url = _build_ws_url(dataplane_url)
|
||||
headers = _build_auth_headers(api_key)
|
||||
control = _WSStreamControl()
|
||||
|
||||
def _stream() -> Iterator[dict]:
|
||||
try:
|
||||
with ws_connect(
|
||||
ws_url,
|
||||
additional_headers=headers,
|
||||
open_timeout=30,
|
||||
close_timeout=10,
|
||||
ping_interval=30,
|
||||
ping_timeout=60,
|
||||
) as ws:
|
||||
control._bind(ws)
|
||||
|
||||
# Send reconnect request
|
||||
ws.send(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "reconnect",
|
||||
"command_id": command_id,
|
||||
"stdout_offset": stdout_offset,
|
||||
"stderr_offset": stderr_offset,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
# Read messages until exit or error
|
||||
for raw_msg in ws:
|
||||
msg = json.loads(raw_msg)
|
||||
msg_type = msg.get("type")
|
||||
|
||||
if msg_type in ("stdout", "stderr"):
|
||||
yield msg
|
||||
|
||||
elif msg_type == "exit":
|
||||
yield msg
|
||||
return
|
||||
|
||||
elif msg_type == "error":
|
||||
_raise_from_error_msg(msg, command_id=command_id)
|
||||
|
||||
except InvalidStatus as e:
|
||||
_raise_for_invalid_status(e, ws_url)
|
||||
except ConnectionClosed as e:
|
||||
if e.rcvd and e.rcvd.code == 1001:
|
||||
raise SandboxServerReloadError(
|
||||
"Server is reloading, reconnect to resume"
|
||||
) from e
|
||||
raise SandboxConnectionError(
|
||||
f"WebSocket connection closed unexpectedly: {e}"
|
||||
) from e
|
||||
except OSError as e:
|
||||
raise SandboxConnectionError(f"Failed to connect to sandbox: {e}") from e
|
||||
finally:
|
||||
control._unbind()
|
||||
|
||||
return _stream(), control
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Async Stream Functions
|
||||
# =============================================================================
|
||||
|
||||
|
||||
async def run_ws_stream_async(
|
||||
dataplane_url: str,
|
||||
api_key: Optional[str],
|
||||
command: str,
|
||||
*,
|
||||
timeout: int = 60,
|
||||
env: Optional[dict[str, str]] = None,
|
||||
cwd: Optional[str] = None,
|
||||
shell: str = "/bin/bash",
|
||||
on_stdout: Optional[Callable[[str], Any]] = None,
|
||||
on_stderr: Optional[Callable[[str], Any]] = None,
|
||||
) -> tuple[AsyncIterator[dict], _AsyncWSStreamControl]:
|
||||
"""Async equivalent of run_ws_stream.
|
||||
|
||||
Returns (async_message_iterator, async_control).
|
||||
"""
|
||||
ws_connect_async, ConnectionClosed, InvalidStatus = _ensure_websockets_async()
|
||||
ws_url = _build_ws_url(dataplane_url)
|
||||
headers = _build_auth_headers(api_key)
|
||||
control = _AsyncWSStreamControl()
|
||||
|
||||
async def _stream() -> AsyncIterator[dict]:
|
||||
try:
|
||||
async with ws_connect_async(
|
||||
ws_url,
|
||||
additional_headers=headers,
|
||||
open_timeout=30,
|
||||
close_timeout=10,
|
||||
ping_interval=30,
|
||||
ping_timeout=60,
|
||||
) as ws:
|
||||
control._bind(ws)
|
||||
|
||||
await ws.send(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "execute",
|
||||
"command": command,
|
||||
"timeout": timeout,
|
||||
"shell": shell,
|
||||
**({"env": env} if env else {}),
|
||||
**({"cwd": cwd} if cwd else {}),
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
async for raw_msg in ws:
|
||||
msg = json.loads(raw_msg)
|
||||
msg_type = msg.get("type")
|
||||
|
||||
if msg_type == "started":
|
||||
yield msg
|
||||
elif msg_type == "stdout":
|
||||
if on_stdout:
|
||||
on_stdout(msg["data"])
|
||||
yield msg
|
||||
elif msg_type == "stderr":
|
||||
if on_stderr:
|
||||
on_stderr(msg["data"])
|
||||
yield msg
|
||||
elif msg_type == "exit":
|
||||
yield msg
|
||||
return
|
||||
elif msg_type == "error":
|
||||
_raise_from_error_msg(msg)
|
||||
|
||||
except InvalidStatus as e:
|
||||
_raise_for_invalid_status(e, ws_url)
|
||||
except ConnectionClosed as e:
|
||||
if e.rcvd and e.rcvd.code == 1001:
|
||||
raise SandboxServerReloadError(
|
||||
"Server is reloading, reconnect to resume"
|
||||
) from e
|
||||
raise SandboxConnectionError(
|
||||
f"WebSocket connection closed unexpectedly: {e}"
|
||||
) from e
|
||||
except OSError as e:
|
||||
raise SandboxConnectionError(f"Failed to connect to sandbox: {e}") from e
|
||||
finally:
|
||||
control._unbind()
|
||||
|
||||
return _stream(), control
|
||||
|
||||
|
||||
async def reconnect_ws_stream_async(
|
||||
dataplane_url: str,
|
||||
api_key: Optional[str],
|
||||
command_id: str,
|
||||
*,
|
||||
stdout_offset: int = 0,
|
||||
stderr_offset: int = 0,
|
||||
) -> tuple[AsyncIterator[dict], _AsyncWSStreamControl]:
|
||||
"""Async equivalent of reconnect_ws_stream."""
|
||||
ws_connect_async, ConnectionClosed, InvalidStatus = _ensure_websockets_async()
|
||||
ws_url = _build_ws_url(dataplane_url)
|
||||
headers = _build_auth_headers(api_key)
|
||||
control = _AsyncWSStreamControl()
|
||||
|
||||
async def _stream() -> AsyncIterator[dict]:
|
||||
try:
|
||||
async with ws_connect_async(
|
||||
ws_url,
|
||||
additional_headers=headers,
|
||||
open_timeout=30,
|
||||
close_timeout=10,
|
||||
ping_interval=30,
|
||||
ping_timeout=60,
|
||||
) as ws:
|
||||
control._bind(ws)
|
||||
|
||||
await ws.send(
|
||||
json.dumps(
|
||||
{
|
||||
"type": "reconnect",
|
||||
"command_id": command_id,
|
||||
"stdout_offset": stdout_offset,
|
||||
"stderr_offset": stderr_offset,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
async for raw_msg in ws:
|
||||
msg = json.loads(raw_msg)
|
||||
msg_type = msg.get("type")
|
||||
|
||||
if msg_type in ("stdout", "stderr"):
|
||||
yield msg
|
||||
elif msg_type == "exit":
|
||||
yield msg
|
||||
return
|
||||
elif msg_type == "error":
|
||||
_raise_from_error_msg(msg, command_id=command_id)
|
||||
|
||||
except InvalidStatus as e:
|
||||
_raise_for_invalid_status(e, ws_url)
|
||||
except ConnectionClosed as e:
|
||||
if e.rcvd and e.rcvd.code == 1001:
|
||||
raise SandboxServerReloadError(
|
||||
"Server is reloading, reconnect to resume"
|
||||
) from e
|
||||
raise SandboxConnectionError(
|
||||
f"WebSocket connection closed unexpectedly: {e}"
|
||||
) from e
|
||||
except OSError as e:
|
||||
raise SandboxConnectionError(f"Failed to connect to sandbox: {e}") from e
|
||||
finally:
|
||||
control._unbind()
|
||||
|
||||
return _stream(), control
|
||||
Reference in New Issue
Block a user