Analytical_engine_backend/venv/Lib/site-packages/langsmith/sandbox/_models.py

"""Data models for the sandbox client."""

from __future__ import annotations

from collections.abc import AsyncIterator, Iterator
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Optional

from langsmith.sandbox._exceptions import (
    SandboxConnectionError,
    SandboxOperationError,
    SandboxServerReloadError,
)

if TYPE_CHECKING:
    from langsmith.sandbox._async_sandbox import AsyncSandbox
    from langsmith.sandbox._sandbox import Sandbox
    from langsmith.sandbox._ws_execute import (
        _AsyncWSStreamControl,
        _WSStreamControl,
    )


@dataclass
class ExecutionResult:
    """Result of executing a command in a sandbox."""

    stdout: str
    stderr: str
    exit_code: int

    @property
    def success(self) -> bool:
        """Return True if the command exited with code 0."""
        return self.exit_code == 0


@dataclass
class ResourceSpec:
    """Resource specification for a sandbox."""

    cpu: str = "500m"
    memory: str = "512Mi"
    storage: Optional[str] = None


@dataclass
class Volume:
    """Represents a persistent volume.

    Volumes are persistent storage that can be mounted in sandboxes.

    Attributes:
        id: Unique identifier (UUID). Remains constant even if name changes.
            May be None for resources created before ID support was added.
        name: Display name (can be updated).
    """

    name: str
    size: str
    storage_class: str
    id: Optional[str] = None
    created_at: Optional[str] = None
    updated_at: Optional[str] = None

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> Volume:
        """Create a Volume from API response dict."""
        return cls(
            name=data.get("name", ""),
            size=data.get("size", "unknown"),
            storage_class=data.get("storage_class", "default"),
            id=data.get("id"),
            created_at=data.get("created_at"),
            updated_at=data.get("updated_at"),
        )


@dataclass
class VolumeMountSpec:
    """Specification for mounting a volume in a sandbox template."""

    volume_name: str
    mount_path: str


@dataclass
class SandboxTemplate:
    """Represents a SandboxTemplate.

    Templates define the image, resource limits, and volume mounts for sandboxes.
    All other container details are handled by the server with secure defaults.

    Attributes:
        id: Unique identifier (UUID). Remains constant even if name changes.
            May be None for resources created before ID support was added.
        name: Display name (can be updated).
    """

    name: str
    image: str
    resources: ResourceSpec
    volume_mounts: list[VolumeMountSpec] = field(default_factory=list)
    id: Optional[str] = None
    created_at: Optional[str] = None
    updated_at: Optional[str] = None

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> SandboxTemplate:
        """Create a SandboxTemplate from API response dict."""
        resources_data = data.get("resources", {})
        volume_mounts_data = data.get("volume_mounts", [])
        return cls(
            name=data.get("name", ""),
            image=data.get("image", "unknown"),
            resources=ResourceSpec(
                cpu=resources_data.get("cpu", "500m"),
                memory=resources_data.get("memory", "512Mi"),
                storage=resources_data.get("storage"),
            ),
            volume_mounts=[
                VolumeMountSpec(
                    volume_name=vm.get("volume_name", ""),
                    mount_path=vm.get("mount_path", ""),
                )
                for vm in volume_mounts_data
            ],
            id=data.get("id"),
            created_at=data.get("created_at"),
            updated_at=data.get("updated_at"),
        )


@dataclass
class ResourceStatus:
    """Lightweight provisioning status for any async-created resource.

    Attributes:
        status: Resource lifecycle status. One of "provisioning", "ready", "failed".
        status_message: Human-readable details when status is "failed", None otherwise.
    """

    status: str
    status_message: Optional[str] = None

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> ResourceStatus:
        """Create a ResourceStatus from API response dict."""
        return cls(
            status=data.get("status", "provisioning"),
            status_message=data.get("status_message"),
        )


@dataclass
class Pool:
    """Represents a Sandbox Pool for pre-provisioned sandboxes.

    Pools pre-provision sandboxes from a template for faster startup.
    Instead of waiting for a new sandbox to be created, sandboxes can
    be served from a pre-warmed pool.

    Note: Templates with volume mounts cannot be used in pools.

    Attributes:
        id: Unique identifier (UUID). Remains constant even if name changes.
            May be None for resources created before ID support was added.
        name: Display name (can be updated).
    """

    name: str
    template_name: str
    replicas: int  # Desired replicas
    id: Optional[str] = None
    created_at: Optional[str] = None
    updated_at: Optional[str] = None

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> Pool:
        """Create a Pool from API response dict."""
        return cls(
            name=data.get("name", ""),
            template_name=data.get("template_name", ""),
            replicas=data.get("replicas", 0),
            id=data.get("id"),
            created_at=data.get("created_at"),
            updated_at=data.get("updated_at"),
        )


# =============================================================================
# WebSocket Command Execution Models
# =============================================================================


@dataclass
class OutputChunk:
    """A single chunk of streaming output from command execution.

    Attributes:
        stream: Either "stdout" or "stderr".
        data: The text content of this chunk (valid UTF-8, server handles
            boundary splitting).
        offset: Byte offset within the stream. Used internally for
            reconnection; users typically don't need this.
    """

    stream: str
    data: str
    offset: int


class CommandHandle:
    """Handle to a running command with streaming output and auto-reconnect.

    Iterable, yielding OutputChunk objects (stdout and stderr interleaved
    in arrival order). Access .result after iteration to get the full
    ExecutionResult.

    Auto-reconnect behavior:
    - Server hot-reload (1001 Going Away): reconnect immediately
    - Network error / unexpected close:    reconnect with exponential backoff
    - User called kill():                  do NOT reconnect (propagate error)

    The auto-reconnect is transparent -- the iterator reconnects and
    continues yielding chunks without any user intervention. If all
    reconnect attempts are exhausted, SandboxConnectionError is raised.

    Construction modes (controlled by ``command_id``):
    - **New execution** (``command_id=""``, the default): the constructor
      eagerly reads the server's ``"started"`` message to populate
      ``command_id`` and ``pid`` before returning.
    - **Reconnection** (``command_id`` set): skips the started-message
      read, since reconnect streams don't emit one.

    Example:
        handle = sandbox.run("make build", timeout=600, wait=False)

        for chunk in handle:          # auto-reconnects on transient errors
            print(chunk.data, end="")

        result = handle.result
        print(f"Exit code: {result.exit_code}")
    """

    MAX_AUTO_RECONNECTS = 5
    _BACKOFF_BASE = 0.5  # seconds
    _BACKOFF_MAX = 8.0  # seconds

    def __init__(
        self,
        message_stream: Iterator[dict],
        control: Optional[_WSStreamControl],
        sandbox: Sandbox,
        *,
        command_id: str = "",
        stdout_offset: int = 0,
        stderr_offset: int = 0,
    ) -> None:
        self._stream = message_stream
        self._control = control
        self._sandbox = sandbox
        self._command_id: Optional[str] = None
        self._pid: Optional[int] = None
        self._result: Optional[ExecutionResult] = None
        self._stdout_parts: list[str] = []
        self._stderr_parts: list[str] = []
        self._exhausted = False
        self._last_stdout_offset = stdout_offset
        self._last_stderr_offset = stderr_offset

        # New executions (command_id=""): eager_start reads "started" message.
        # Reconnections (command_id set): skip eager_start since reconnect
        # streams don't send a "started" message.
        if command_id:
            self._command_id = command_id
        else:
            self._consume_started()

    def _consume_started(self) -> None:
        """Eagerly read the 'started' message to populate command_id and pid.

        Blocks briefly until the server sends the started message (arrives
        near-instantly after connection). After this call, command_id and
        pid are available, and the WebSocket is bound to the control object
        (so kill() works).
        """
        try:
            first_msg = next(self._stream)
        except StopIteration:
            raise SandboxOperationError(
                "Command stream ended before 'started' message",
                operation="command",
            )
        if first_msg.get("type") != "started":
            raise SandboxOperationError(
                f"Expected 'started' message, got '{first_msg.get('type')}'",
                operation="command",
            )
        self._command_id = first_msg.get("command_id")
        self._pid = first_msg.get("pid")

    @property
    def command_id(self) -> Optional[str]:
        """The server-assigned command ID. Available after construction."""
        return self._command_id

    @property
    def pid(self) -> Optional[int]:
        """The process ID on the sandbox. Available after construction."""
        return self._pid

    @property
    def result(self) -> ExecutionResult:
        """The final execution result. Blocks until the command completes.

        Drains the remaining stream if not already exhausted, then returns
        the ExecutionResult with aggregated stdout, stderr, and exit_code.
        """
        if self._result is None:
            for _ in self:
                pass
        if self._result is None:
            raise SandboxOperationError(
                "Command stream ended without exit message",
                operation="command",
            )
        return self._result

    def _iter_stream(self) -> Iterator[OutputChunk]:
        """Iterate over output chunks from the current stream (no reconnect)."""
        if self._exhausted:
            return
        for msg in self._stream:
            msg_type = msg.get("type")
            if msg_type in ("stdout", "stderr"):
                chunk = OutputChunk(
                    stream=msg_type,
                    data=msg["data"],
                    offset=msg.get("offset", 0),
                )
                if msg_type == "stdout":
                    self._stdout_parts.append(msg["data"])
                else:
                    self._stderr_parts.append(msg["data"])
                yield chunk
            elif msg_type == "exit":
                self._result = ExecutionResult(
                    stdout="".join(self._stdout_parts),
                    stderr="".join(self._stderr_parts),
                    exit_code=msg["exit_code"],
                )
                self._exhausted = True
                return
        self._exhausted = True

    def __iter__(self) -> Iterator[OutputChunk]:
        """Iterate over output chunks, auto-reconnecting on transient errors.

        Reconnect strategy:
        - 1001 Going Away (hot-reload): immediate reconnect, no delay
        - Other SandboxConnectionError:  exponential backoff (0.5s, 1s, 2s...)
        - After kill():                  no reconnect, error propagates
        """
        import time

        reconnect_attempts = 0
        while True:
            try:
                for chunk in self._iter_stream():
                    reconnect_attempts = 0  # Reset on successful data
                    if chunk.stream == "stdout":
                        self._last_stdout_offset = chunk.offset + len(
                            chunk.data.encode("utf-8")
                        )
                    else:
                        self._last_stderr_offset = chunk.offset + len(
                            chunk.data.encode("utf-8")
                        )
                    yield chunk
                return  # Stream ended normally (exit message received)

            except SandboxConnectionError as e:
                if self._control and self._control.killed:
                    raise

                reconnect_attempts += 1
                if reconnect_attempts > self.MAX_AUTO_RECONNECTS:
                    raise SandboxConnectionError(
                        f"Lost connection {reconnect_attempts} times in "
                        f"succession, giving up"
                    ) from e

                is_hot_reload = isinstance(e, SandboxServerReloadError)
                if not is_hot_reload:
                    delay = min(
                        self._BACKOFF_BASE * (2 ** (reconnect_attempts - 1)),
                        self._BACKOFF_MAX,
                    )
                    time.sleep(delay)

                assert self._command_id is not None
                new_handle = self._sandbox.reconnect(
                    self._command_id,
                    stdout_offset=self._last_stdout_offset,
                    stderr_offset=self._last_stderr_offset,
                )
                self._stream = new_handle._stream
                self._control = new_handle._control
                self._exhausted = False

    def kill(self) -> None:
        """Send a kill signal to the running command (SIGKILL).

        The server kills the entire process group. The stream will
        subsequently yield an exit message with a non-zero exit code.

        Has no effect if the command has already exited or the
        WebSocket connection is closed.
        """
        if self._control:
            self._control.send_kill()

    def send_input(self, data: str) -> None:
        """Write data to the command's stdin.

        Args:
            data: String data to write to stdin.

        Has no effect if the command has already exited or the
        WebSocket connection is closed.
        """
        if self._control:
            self._control.send_input(data)

    @property
    def last_stdout_offset(self) -> int:
        """Last known stdout byte offset (for manual reconnection)."""
        return self._last_stdout_offset

    @property
    def last_stderr_offset(self) -> int:
        """Last known stderr byte offset (for manual reconnection)."""
        return self._last_stderr_offset

    def reconnect(self) -> CommandHandle:
        """Reconnect to this command from the last known offsets.

        Returns a new handle that resumes output from where this one
        left off. Any output produced while disconnected is replayed
        from the server's ring buffer.

        Returns:
            A new CommandHandle.

        Raises:
            SandboxOperationError: If command_id is not found or
                session expired.
            SandboxConnectionError: If connection to sandbox fails.
        """
        assert self._command_id is not None
        return self._sandbox.reconnect(
            self._command_id,
            stdout_offset=self._last_stdout_offset,
            stderr_offset=self._last_stderr_offset,
        )


class AsyncCommandHandle:
    """Async handle to a running command with streaming output and auto-reconnect.

    Async iterable, yielding OutputChunk objects (stdout and stderr interleaved
    in arrival order). Access .result after iteration to get the full
    ExecutionResult.

    Auto-reconnect behavior:
    - Server hot-reload (1001 Going Away): reconnect immediately
    - Network error / unexpected close:    reconnect with exponential backoff
    - User called kill():                  do NOT reconnect (propagate error)

    Construction modes (controlled by ``command_id``):
    - **New execution** (``command_id=""``, the default): call
      ``await handle._ensure_started()`` after construction to read the
      server's ``"started"`` message and populate ``command_id`` / ``pid``.
    - **Reconnection** (``command_id`` set): skips the started-message
      read, since reconnect streams don't emit one.

    Example:
        handle = await sandbox.run("make build", timeout=600, wait=False)

        async for chunk in handle:    # auto-reconnects on transient errors
            print(chunk.data, end="")

        result = await handle.result
        print(f"Exit code: {result.exit_code}")
    """

    MAX_AUTO_RECONNECTS = 5
    _BACKOFF_BASE = 0.5  # seconds
    _BACKOFF_MAX = 8.0  # seconds

    def __init__(
        self,
        message_stream: AsyncIterator[dict],
        control: Optional[_AsyncWSStreamControl],
        sandbox: AsyncSandbox,
        *,
        command_id: str = "",
        stdout_offset: int = 0,
        stderr_offset: int = 0,
    ) -> None:
        self._stream = message_stream
        self._control = control
        self._sandbox = sandbox
        self._command_id: Optional[str] = None
        self._pid: Optional[int] = None
        self._result: Optional[ExecutionResult] = None
        self._stdout_parts: list[str] = []
        self._stderr_parts: list[str] = []
        self._exhausted = False
        self._last_stdout_offset = stdout_offset
        self._last_stderr_offset = stderr_offset

        # New executions (command_id=""): _ensure_started reads "started".
        # Reconnections (command_id set): skip since reconnect streams
        # don't send a "started" message.
        if command_id:
            self._command_id = command_id
            self._started = True
        else:
            self._started = False

    async def _ensure_started(self) -> None:
        """Read the 'started' message to populate command_id and pid."""
        if self._started:
            return
        try:
            first_msg = await self._stream.__anext__()
        except StopAsyncIteration:
            raise SandboxOperationError(
                "Command stream ended before 'started' message",
                operation="command",
            )
        if first_msg.get("type") != "started":
            raise SandboxOperationError(
                f"Expected 'started' message, got '{first_msg.get('type')}'",
                operation="command",
            )
        self._command_id = first_msg.get("command_id")
        self._pid = first_msg.get("pid")
        self._started = True

    @property
    def command_id(self) -> Optional[str]:
        """The server-assigned command ID. Available after _ensure_started."""
        return self._command_id

    @property
    def pid(self) -> Optional[int]:
        """The process ID on the sandbox. Available after _ensure_started."""
        return self._pid

    @property
    async def result(self) -> ExecutionResult:
        """The final execution result. Awaitable."""
        if self._result is None:
            async for _ in self:
                pass
        if self._result is None:
            raise SandboxOperationError(
                "Command stream ended without exit message",
                operation="command",
            )
        return self._result

    async def _aiter_stream(self) -> AsyncIterator[OutputChunk]:
        """Iterate over output chunks from the current stream (no reconnect)."""
        await self._ensure_started()
        if self._exhausted:
            return
        async for msg in self._stream:
            msg_type = msg.get("type")
            if msg_type in ("stdout", "stderr"):
                chunk = OutputChunk(
                    stream=msg_type,
                    data=msg["data"],
                    offset=msg.get("offset", 0),
                )
                if msg_type == "stdout":
                    self._stdout_parts.append(msg["data"])
                else:
                    self._stderr_parts.append(msg["data"])
                yield chunk
            elif msg_type == "exit":
                self._result = ExecutionResult(
                    stdout="".join(self._stdout_parts),
                    stderr="".join(self._stderr_parts),
                    exit_code=msg["exit_code"],
                )
                self._exhausted = True
                return
        self._exhausted = True

    async def __aiter__(self) -> AsyncIterator[OutputChunk]:
        """Async iterate with auto-reconnect on transient errors."""
        import asyncio

        reconnect_attempts = 0
        while True:
            try:
                async for chunk in self._aiter_stream():
                    reconnect_attempts = 0
                    if chunk.stream == "stdout":
                        self._last_stdout_offset = chunk.offset + len(
                            chunk.data.encode("utf-8")
                        )
                    else:
                        self._last_stderr_offset = chunk.offset + len(
                            chunk.data.encode("utf-8")
                        )
                    yield chunk
                return  # Stream ended normally

            except SandboxConnectionError as e:
                if self._control and self._control.killed:
                    raise

                reconnect_attempts += 1
                if reconnect_attempts > self.MAX_AUTO_RECONNECTS:
                    raise SandboxConnectionError(
                        f"Lost connection {reconnect_attempts} times "
                        f"in succession, giving up"
                    ) from e

                is_hot_reload = isinstance(e, SandboxServerReloadError)
                if not is_hot_reload:
                    delay = min(
                        self._BACKOFF_BASE * (2 ** (reconnect_attempts - 1)),
                        self._BACKOFF_MAX,
                    )
                    await asyncio.sleep(delay)

                assert self._command_id is not None
                new_handle = await self._sandbox.reconnect(
                    self._command_id,
                    stdout_offset=self._last_stdout_offset,
                    stderr_offset=self._last_stderr_offset,
                )
                self._stream = new_handle._stream
                self._control = new_handle._control
                self._exhausted = False

    async def kill(self) -> None:
        """Send a kill signal to the running command."""
        if self._control:
            await self._control.send_kill()

    async def send_input(self, data: str) -> None:
        """Write data to the command's stdin."""
        if self._control:
            await self._control.send_input(data)

    @property
    def last_stdout_offset(self) -> int:
        """Last known stdout byte offset (for manual reconnection)."""
        return self._last_stdout_offset

    @property
    def last_stderr_offset(self) -> int:
        """Last known stderr byte offset (for manual reconnection)."""
        return self._last_stderr_offset

    async def reconnect(self) -> AsyncCommandHandle:
        """Reconnect to this command from the last known offsets."""
        assert self._command_id is not None
        return await self._sandbox.reconnect(
            self._command_id,
            stdout_offset=self._last_stdout_offset,
            stderr_offset=self._last_stderr_offset,
        )