initial commit

This commit is contained in:
2026-05-11 12:36:20 +05:30
commit 384cbe8019
15377 changed files with 2360544 additions and 0 deletions

View File

@@ -0,0 +1,680 @@
"""Data models for the sandbox client."""
from __future__ import annotations
from collections.abc import AsyncIterator, Iterator
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Optional
from langsmith.sandbox._exceptions import (
SandboxConnectionError,
SandboxOperationError,
SandboxServerReloadError,
)
if TYPE_CHECKING:
from langsmith.sandbox._async_sandbox import AsyncSandbox
from langsmith.sandbox._sandbox import Sandbox
from langsmith.sandbox._ws_execute import (
_AsyncWSStreamControl,
_WSStreamControl,
)
@dataclass
class ExecutionResult:
"""Result of executing a command in a sandbox."""
stdout: str
stderr: str
exit_code: int
@property
def success(self) -> bool:
"""Return True if the command exited with code 0."""
return self.exit_code == 0
@dataclass
class ResourceSpec:
"""Resource specification for a sandbox."""
cpu: str = "500m"
memory: str = "512Mi"
storage: Optional[str] = None
@dataclass
class Volume:
"""Represents a persistent volume.
Volumes are persistent storage that can be mounted in sandboxes.
Attributes:
id: Unique identifier (UUID). Remains constant even if name changes.
May be None for resources created before ID support was added.
name: Display name (can be updated).
"""
name: str
size: str
storage_class: str
id: Optional[str] = None
created_at: Optional[str] = None
updated_at: Optional[str] = None
@classmethod
def from_dict(cls, data: dict[str, Any]) -> Volume:
"""Create a Volume from API response dict."""
return cls(
name=data.get("name", ""),
size=data.get("size", "unknown"),
storage_class=data.get("storage_class", "default"),
id=data.get("id"),
created_at=data.get("created_at"),
updated_at=data.get("updated_at"),
)
@dataclass
class VolumeMountSpec:
"""Specification for mounting a volume in a sandbox template."""
volume_name: str
mount_path: str
@dataclass
class SandboxTemplate:
"""Represents a SandboxTemplate.
Templates define the image, resource limits, and volume mounts for sandboxes.
All other container details are handled by the server with secure defaults.
Attributes:
id: Unique identifier (UUID). Remains constant even if name changes.
May be None for resources created before ID support was added.
name: Display name (can be updated).
"""
name: str
image: str
resources: ResourceSpec
volume_mounts: list[VolumeMountSpec] = field(default_factory=list)
id: Optional[str] = None
created_at: Optional[str] = None
updated_at: Optional[str] = None
@classmethod
def from_dict(cls, data: dict[str, Any]) -> SandboxTemplate:
"""Create a SandboxTemplate from API response dict."""
resources_data = data.get("resources", {})
volume_mounts_data = data.get("volume_mounts", [])
return cls(
name=data.get("name", ""),
image=data.get("image", "unknown"),
resources=ResourceSpec(
cpu=resources_data.get("cpu", "500m"),
memory=resources_data.get("memory", "512Mi"),
storage=resources_data.get("storage"),
),
volume_mounts=[
VolumeMountSpec(
volume_name=vm.get("volume_name", ""),
mount_path=vm.get("mount_path", ""),
)
for vm in volume_mounts_data
],
id=data.get("id"),
created_at=data.get("created_at"),
updated_at=data.get("updated_at"),
)
@dataclass
class ResourceStatus:
"""Lightweight provisioning status for any async-created resource.
Attributes:
status: Resource lifecycle status. One of "provisioning", "ready", "failed".
status_message: Human-readable details when status is "failed", None otherwise.
"""
status: str
status_message: Optional[str] = None
@classmethod
def from_dict(cls, data: dict[str, Any]) -> ResourceStatus:
"""Create a ResourceStatus from API response dict."""
return cls(
status=data.get("status", "provisioning"),
status_message=data.get("status_message"),
)
@dataclass
class Pool:
"""Represents a Sandbox Pool for pre-provisioned sandboxes.
Pools pre-provision sandboxes from a template for faster startup.
Instead of waiting for a new sandbox to be created, sandboxes can
be served from a pre-warmed pool.
Note: Templates with volume mounts cannot be used in pools.
Attributes:
id: Unique identifier (UUID). Remains constant even if name changes.
May be None for resources created before ID support was added.
name: Display name (can be updated).
"""
name: str
template_name: str
replicas: int # Desired replicas
id: Optional[str] = None
created_at: Optional[str] = None
updated_at: Optional[str] = None
@classmethod
def from_dict(cls, data: dict[str, Any]) -> Pool:
"""Create a Pool from API response dict."""
return cls(
name=data.get("name", ""),
template_name=data.get("template_name", ""),
replicas=data.get("replicas", 0),
id=data.get("id"),
created_at=data.get("created_at"),
updated_at=data.get("updated_at"),
)
# =============================================================================
# WebSocket Command Execution Models
# =============================================================================
@dataclass
class OutputChunk:
"""A single chunk of streaming output from command execution.
Attributes:
stream: Either "stdout" or "stderr".
data: The text content of this chunk (valid UTF-8, server handles
boundary splitting).
offset: Byte offset within the stream. Used internally for
reconnection; users typically don't need this.
"""
stream: str
data: str
offset: int
class CommandHandle:
"""Handle to a running command with streaming output and auto-reconnect.
Iterable, yielding OutputChunk objects (stdout and stderr interleaved
in arrival order). Access .result after iteration to get the full
ExecutionResult.
Auto-reconnect behavior:
- Server hot-reload (1001 Going Away): reconnect immediately
- Network error / unexpected close: reconnect with exponential backoff
- User called kill(): do NOT reconnect (propagate error)
The auto-reconnect is transparent -- the iterator reconnects and
continues yielding chunks without any user intervention. If all
reconnect attempts are exhausted, SandboxConnectionError is raised.
Construction modes (controlled by ``command_id``):
- **New execution** (``command_id=""``, the default): the constructor
eagerly reads the server's ``"started"`` message to populate
``command_id`` and ``pid`` before returning.
- **Reconnection** (``command_id`` set): skips the started-message
read, since reconnect streams don't emit one.
Example:
handle = sandbox.run("make build", timeout=600, wait=False)
for chunk in handle: # auto-reconnects on transient errors
print(chunk.data, end="")
result = handle.result
print(f"Exit code: {result.exit_code}")
"""
MAX_AUTO_RECONNECTS = 5
_BACKOFF_BASE = 0.5 # seconds
_BACKOFF_MAX = 8.0 # seconds
def __init__(
self,
message_stream: Iterator[dict],
control: Optional[_WSStreamControl],
sandbox: Sandbox,
*,
command_id: str = "",
stdout_offset: int = 0,
stderr_offset: int = 0,
) -> None:
self._stream = message_stream
self._control = control
self._sandbox = sandbox
self._command_id: Optional[str] = None
self._pid: Optional[int] = None
self._result: Optional[ExecutionResult] = None
self._stdout_parts: list[str] = []
self._stderr_parts: list[str] = []
self._exhausted = False
self._last_stdout_offset = stdout_offset
self._last_stderr_offset = stderr_offset
# New executions (command_id=""): eager_start reads "started" message.
# Reconnections (command_id set): skip eager_start since reconnect
# streams don't send a "started" message.
if command_id:
self._command_id = command_id
else:
self._consume_started()
def _consume_started(self) -> None:
"""Eagerly read the 'started' message to populate command_id and pid.
Blocks briefly until the server sends the started message (arrives
near-instantly after connection). After this call, command_id and
pid are available, and the WebSocket is bound to the control object
(so kill() works).
"""
try:
first_msg = next(self._stream)
except StopIteration:
raise SandboxOperationError(
"Command stream ended before 'started' message",
operation="command",
)
if first_msg.get("type") != "started":
raise SandboxOperationError(
f"Expected 'started' message, got '{first_msg.get('type')}'",
operation="command",
)
self._command_id = first_msg.get("command_id")
self._pid = first_msg.get("pid")
@property
def command_id(self) -> Optional[str]:
"""The server-assigned command ID. Available after construction."""
return self._command_id
@property
def pid(self) -> Optional[int]:
"""The process ID on the sandbox. Available after construction."""
return self._pid
@property
def result(self) -> ExecutionResult:
"""The final execution result. Blocks until the command completes.
Drains the remaining stream if not already exhausted, then returns
the ExecutionResult with aggregated stdout, stderr, and exit_code.
"""
if self._result is None:
for _ in self:
pass
if self._result is None:
raise SandboxOperationError(
"Command stream ended without exit message",
operation="command",
)
return self._result
def _iter_stream(self) -> Iterator[OutputChunk]:
"""Iterate over output chunks from the current stream (no reconnect)."""
if self._exhausted:
return
for msg in self._stream:
msg_type = msg.get("type")
if msg_type in ("stdout", "stderr"):
chunk = OutputChunk(
stream=msg_type,
data=msg["data"],
offset=msg.get("offset", 0),
)
if msg_type == "stdout":
self._stdout_parts.append(msg["data"])
else:
self._stderr_parts.append(msg["data"])
yield chunk
elif msg_type == "exit":
self._result = ExecutionResult(
stdout="".join(self._stdout_parts),
stderr="".join(self._stderr_parts),
exit_code=msg["exit_code"],
)
self._exhausted = True
return
self._exhausted = True
def __iter__(self) -> Iterator[OutputChunk]:
"""Iterate over output chunks, auto-reconnecting on transient errors.
Reconnect strategy:
- 1001 Going Away (hot-reload): immediate reconnect, no delay
- Other SandboxConnectionError: exponential backoff (0.5s, 1s, 2s...)
- After kill(): no reconnect, error propagates
"""
import time
reconnect_attempts = 0
while True:
try:
for chunk in self._iter_stream():
reconnect_attempts = 0 # Reset on successful data
if chunk.stream == "stdout":
self._last_stdout_offset = chunk.offset + len(
chunk.data.encode("utf-8")
)
else:
self._last_stderr_offset = chunk.offset + len(
chunk.data.encode("utf-8")
)
yield chunk
return # Stream ended normally (exit message received)
except SandboxConnectionError as e:
if self._control and self._control.killed:
raise
reconnect_attempts += 1
if reconnect_attempts > self.MAX_AUTO_RECONNECTS:
raise SandboxConnectionError(
f"Lost connection {reconnect_attempts} times in "
f"succession, giving up"
) from e
is_hot_reload = isinstance(e, SandboxServerReloadError)
if not is_hot_reload:
delay = min(
self._BACKOFF_BASE * (2 ** (reconnect_attempts - 1)),
self._BACKOFF_MAX,
)
time.sleep(delay)
assert self._command_id is not None
new_handle = self._sandbox.reconnect(
self._command_id,
stdout_offset=self._last_stdout_offset,
stderr_offset=self._last_stderr_offset,
)
self._stream = new_handle._stream
self._control = new_handle._control
self._exhausted = False
def kill(self) -> None:
"""Send a kill signal to the running command (SIGKILL).
The server kills the entire process group. The stream will
subsequently yield an exit message with a non-zero exit code.
Has no effect if the command has already exited or the
WebSocket connection is closed.
"""
if self._control:
self._control.send_kill()
def send_input(self, data: str) -> None:
"""Write data to the command's stdin.
Args:
data: String data to write to stdin.
Has no effect if the command has already exited or the
WebSocket connection is closed.
"""
if self._control:
self._control.send_input(data)
@property
def last_stdout_offset(self) -> int:
"""Last known stdout byte offset (for manual reconnection)."""
return self._last_stdout_offset
@property
def last_stderr_offset(self) -> int:
"""Last known stderr byte offset (for manual reconnection)."""
return self._last_stderr_offset
def reconnect(self) -> CommandHandle:
"""Reconnect to this command from the last known offsets.
Returns a new handle that resumes output from where this one
left off. Any output produced while disconnected is replayed
from the server's ring buffer.
Returns:
A new CommandHandle.
Raises:
SandboxOperationError: If command_id is not found or
session expired.
SandboxConnectionError: If connection to sandbox fails.
"""
assert self._command_id is not None
return self._sandbox.reconnect(
self._command_id,
stdout_offset=self._last_stdout_offset,
stderr_offset=self._last_stderr_offset,
)
class AsyncCommandHandle:
"""Async handle to a running command with streaming output and auto-reconnect.
Async iterable, yielding OutputChunk objects (stdout and stderr interleaved
in arrival order). Access .result after iteration to get the full
ExecutionResult.
Auto-reconnect behavior:
- Server hot-reload (1001 Going Away): reconnect immediately
- Network error / unexpected close: reconnect with exponential backoff
- User called kill(): do NOT reconnect (propagate error)
Construction modes (controlled by ``command_id``):
- **New execution** (``command_id=""``, the default): call
``await handle._ensure_started()`` after construction to read the
server's ``"started"`` message and populate ``command_id`` / ``pid``.
- **Reconnection** (``command_id`` set): skips the started-message
read, since reconnect streams don't emit one.
Example:
handle = await sandbox.run("make build", timeout=600, wait=False)
async for chunk in handle: # auto-reconnects on transient errors
print(chunk.data, end="")
result = await handle.result
print(f"Exit code: {result.exit_code}")
"""
MAX_AUTO_RECONNECTS = 5
_BACKOFF_BASE = 0.5 # seconds
_BACKOFF_MAX = 8.0 # seconds
def __init__(
self,
message_stream: AsyncIterator[dict],
control: Optional[_AsyncWSStreamControl],
sandbox: AsyncSandbox,
*,
command_id: str = "",
stdout_offset: int = 0,
stderr_offset: int = 0,
) -> None:
self._stream = message_stream
self._control = control
self._sandbox = sandbox
self._command_id: Optional[str] = None
self._pid: Optional[int] = None
self._result: Optional[ExecutionResult] = None
self._stdout_parts: list[str] = []
self._stderr_parts: list[str] = []
self._exhausted = False
self._last_stdout_offset = stdout_offset
self._last_stderr_offset = stderr_offset
# New executions (command_id=""): _ensure_started reads "started".
# Reconnections (command_id set): skip since reconnect streams
# don't send a "started" message.
if command_id:
self._command_id = command_id
self._started = True
else:
self._started = False
async def _ensure_started(self) -> None:
"""Read the 'started' message to populate command_id and pid."""
if self._started:
return
try:
first_msg = await self._stream.__anext__()
except StopAsyncIteration:
raise SandboxOperationError(
"Command stream ended before 'started' message",
operation="command",
)
if first_msg.get("type") != "started":
raise SandboxOperationError(
f"Expected 'started' message, got '{first_msg.get('type')}'",
operation="command",
)
self._command_id = first_msg.get("command_id")
self._pid = first_msg.get("pid")
self._started = True
@property
def command_id(self) -> Optional[str]:
"""The server-assigned command ID. Available after _ensure_started."""
return self._command_id
@property
def pid(self) -> Optional[int]:
"""The process ID on the sandbox. Available after _ensure_started."""
return self._pid
@property
async def result(self) -> ExecutionResult:
"""The final execution result. Awaitable."""
if self._result is None:
async for _ in self:
pass
if self._result is None:
raise SandboxOperationError(
"Command stream ended without exit message",
operation="command",
)
return self._result
async def _aiter_stream(self) -> AsyncIterator[OutputChunk]:
"""Iterate over output chunks from the current stream (no reconnect)."""
await self._ensure_started()
if self._exhausted:
return
async for msg in self._stream:
msg_type = msg.get("type")
if msg_type in ("stdout", "stderr"):
chunk = OutputChunk(
stream=msg_type,
data=msg["data"],
offset=msg.get("offset", 0),
)
if msg_type == "stdout":
self._stdout_parts.append(msg["data"])
else:
self._stderr_parts.append(msg["data"])
yield chunk
elif msg_type == "exit":
self._result = ExecutionResult(
stdout="".join(self._stdout_parts),
stderr="".join(self._stderr_parts),
exit_code=msg["exit_code"],
)
self._exhausted = True
return
self._exhausted = True
async def __aiter__(self) -> AsyncIterator[OutputChunk]:
"""Async iterate with auto-reconnect on transient errors."""
import asyncio
reconnect_attempts = 0
while True:
try:
async for chunk in self._aiter_stream():
reconnect_attempts = 0
if chunk.stream == "stdout":
self._last_stdout_offset = chunk.offset + len(
chunk.data.encode("utf-8")
)
else:
self._last_stderr_offset = chunk.offset + len(
chunk.data.encode("utf-8")
)
yield chunk
return # Stream ended normally
except SandboxConnectionError as e:
if self._control and self._control.killed:
raise
reconnect_attempts += 1
if reconnect_attempts > self.MAX_AUTO_RECONNECTS:
raise SandboxConnectionError(
f"Lost connection {reconnect_attempts} times "
f"in succession, giving up"
) from e
is_hot_reload = isinstance(e, SandboxServerReloadError)
if not is_hot_reload:
delay = min(
self._BACKOFF_BASE * (2 ** (reconnect_attempts - 1)),
self._BACKOFF_MAX,
)
await asyncio.sleep(delay)
assert self._command_id is not None
new_handle = await self._sandbox.reconnect(
self._command_id,
stdout_offset=self._last_stdout_offset,
stderr_offset=self._last_stderr_offset,
)
self._stream = new_handle._stream
self._control = new_handle._control
self._exhausted = False
async def kill(self) -> None:
"""Send a kill signal to the running command."""
if self._control:
await self._control.send_kill()
async def send_input(self, data: str) -> None:
"""Write data to the command's stdin."""
if self._control:
await self._control.send_input(data)
@property
def last_stdout_offset(self) -> int:
"""Last known stdout byte offset (for manual reconnection)."""
return self._last_stdout_offset
@property
def last_stderr_offset(self) -> int:
"""Last known stderr byte offset (for manual reconnection)."""
return self._last_stderr_offset
async def reconnect(self) -> AsyncCommandHandle:
"""Reconnect to this command from the last known offsets."""
assert self._command_id is not None
return await self._sandbox.reconnect(
self._command_id,
stdout_offset=self._last_stdout_offset,
stderr_offset=self._last_stderr_offset,
)