initial commit

2026-05-11 12:36:20 +05:30
commit 384cbe8019
15377 changed files with 2360544 additions and 0 deletions
--- a/venv/Lib/site-packages/langsmith/_expect.py
+++ b/venv/Lib/site-packages/langsmith/_expect.py
@@ -0,0 +1,465 @@
+"""Make approximate assertions as "expectations" on test results.
+
+This module is designed to be used within test cases decorated with the
+`@pytest.mark.decorator` decorator
+
+It allows you to log scores about a test case and optionally make assertions that log as
+"expectation" feedback to LangSmith.
+
+Example:
+    ```python
+    import pytest
+    from langsmith import expect
+
+
+    @pytest.mark.langsmith
+    def test_output_semantically_close():
+        response = oai_client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": "Say hello!"},
+            ],
+        )
+        response_txt = response.choices[0].message.content
+        # Intended usage
+        expect.embedding_distance(
+            prediction=response_txt,
+            reference="Hello!",
+        ).to_be_less_than(0.9)
+
+        # Score the test case
+        matcher = expect.edit_distance(
+            prediction=response_txt,
+            reference="Hello!",
+        )
+        # Apply an assertion and log 'expectation' feedback to LangSmith
+        matcher.to_be_less_than(1)
+
+        # You can also directly make assertions on values directly
+        expect.value(response_txt).to_contain("Hello!")
+        # Or using a custom check
+        expect.value(response_txt).against(lambda x: "Hello" in x)
+
+        # You can even use this for basic metric logging within tests
+
+        expect.score(0.8)
+        expect.score(0.7, key="similarity").to_be_greater_than(0.7)
+    ```
+"""  # noqa: E501
+
+from __future__ import annotations
+
+import atexit
+import inspect
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Literal,
+    Optional,
+    Union,
+    overload,
+)
+
+from langsmith import client as ls_client
+from langsmith import run_helpers as rh
+from langsmith import run_trees as rt
+from langsmith import utils as ls_utils
+
+if TYPE_CHECKING:
+    from langsmith._internal._edit_distance import EditDistanceConfig
+    from langsmith._internal._embedding_distance import EmbeddingConfig
+
+
+# Sentinel class used until PEP 0661 is accepted
+class _NULL_SENTRY:
+    """A sentinel singleton class used to distinguish omitted keyword arguments
+    from those passed in with the value None (which may have different behavior).
+    """  # noqa: D205
+
+    def __bool__(self) -> Literal[False]:
+        return False
+
+    def __repr__(self) -> str:
+        return "NOT_GIVEN"
+
+
+NOT_GIVEN = _NULL_SENTRY()
+
+
+class _Matcher:
+    """A class for making assertions on expectation values."""
+
+    def __init__(
+        self,
+        client: Optional[ls_client.Client],
+        key: str,
+        value: Any,
+        _executor: Optional[ls_utils.ContextThreadPoolExecutor] = None,
+        run_id: Optional[str] = None,
+    ):
+        self._client = client
+        self.key = key
+        self.value = value
+        self._executor = _executor or ls_utils.ContextThreadPoolExecutor(max_workers=3)
+        self._rt = rh.get_current_run_tree()
+        self._run_id = self._rt.trace_id if self._rt else run_id
+
+    def _submit_feedback(self, score: int, message: Optional[str] = None) -> None:
+        if not ls_utils.test_tracking_is_disabled():
+            if not self._client:
+                self._client = rt.get_cached_client()
+            self._executor.submit(
+                self._client.create_feedback,
+                run_id=self._run_id,
+                key="expectation",
+                score=score,
+                comment=message,
+                session_id=self._rt.session_id if self._rt else None,
+                start_time=self._rt.start_time if self._rt else None,
+            )
+
+    def _assert(self, condition: bool, message: str, method_name: str) -> None:
+        try:
+            assert condition, message
+            self._submit_feedback(1, message=f"Success: {self.key}.{method_name}")
+        except AssertionError as e:
+            self._submit_feedback(0, repr(e))
+            raise e from None
+
+    def to_be_less_than(self, value: float) -> None:
+        """Assert that the expectation value is less than the given value.
+
+        Args:
+            value: The value to compare against.
+
+        Raises:
+            AssertionError: If the expectation value is not less than the given value.
+        """
+        self._assert(
+            self.value < value,
+            f"Expected {self.key} to be less than {value}, but got {self.value}",
+            "to_be_less_than",
+        )
+
+    def to_be_greater_than(self, value: float) -> None:
+        """Assert that the expectation value is greater than the given value.
+
+        Args:
+            value: The value to compare against.
+
+        Raises:
+            AssertionError: If the expectation value is not
+            greater than the given value.
+        """
+        self._assert(
+            self.value > value,
+            f"Expected {self.key} to be greater than {value}, but got {self.value}",
+            "to_be_greater_than",
+        )
+
+    def to_be_between(self, min_value: float, max_value: float) -> None:
+        """Assert that the expectation value is between the given min and max values.
+
+        Args:
+            min_value: The minimum value (exclusive).
+            max_value: The maximum value (exclusive).
+
+        Raises:
+            AssertionError: If the expectation value is not between the min and max.
+        """
+        self._assert(
+            min_value < self.value < max_value,
+            f"Expected {self.key} to be between {min_value} and {max_value},"
+            f" but got {self.value}",
+            "to_be_between",
+        )
+
+    def to_be_approximately(self, value: float, precision: int = 2) -> None:
+        """Assert that the expectation value is approximately equal to the given value.
+
+        Args:
+            value: The value to compare against.
+            precision: The number of decimal places to round to for comparison.
+
+        Raises:
+            AssertionError: If the rounded expectation value
+                does not equal the rounded given value.
+        """
+        self._assert(
+            round(self.value, precision) == round(value, precision),
+            f"Expected {self.key} to be approximately {value}, but got {self.value}",
+            "to_be_approximately",
+        )
+
+    def to_equal(self, value: float) -> None:
+        """Assert that the expectation value equals the given value.
+
+        Args:
+            value: The value to compare against.
+
+        Raises:
+            AssertionError: If the expectation value does
+                not exactly equal the given value.
+        """
+        self._assert(
+            self.value == value,
+            f"Expected {self.key} to be equal to {value}, but got {self.value}",
+            "to_equal",
+        )
+
+    def to_be_none(self) -> None:
+        """Assert that the expectation value is `None`.
+
+        Raises:
+            AssertionError: If the expectation value is not `None`.
+        """
+        self._assert(
+            self.value is None,
+            f"Expected {self.key} to be None, but got {self.value}",
+            "to_be_none",
+        )
+
+    def to_contain(self, value: Any) -> None:
+        """Assert that the expectation value contains the given value.
+
+        Args:
+            value: The value to check for containment.
+
+        Raises:
+            AssertionError: If the expectation value does not contain the given value.
+        """
+        self._assert(
+            value in self.value,
+            f"Expected {self.key} to contain {value}, but it does not",
+            "to_contain",
+        )
+
+    # Custom assertions
+    def against(self, func: Callable, /) -> None:
+        """Assert the expectation value against a custom function.
+
+        Args:
+            func: A custom function that takes the expectation value as input.
+
+        Raises:
+            AssertionError: If the custom function returns False.
+        """
+        func_signature = inspect.signature(func)
+        self._assert(
+            func(self.value),
+            f"Assertion {func_signature} failed for {self.key}",
+            "against",
+        )
+
+
+class _Expect:
+    """A class for setting expectations on test results."""
+
+    def __init__(self, *, client: Optional[ls_client.Client] = None):
+        self._client = client
+        self.executor = ls_utils.ContextThreadPoolExecutor(max_workers=3)
+        atexit.register(self.executor.shutdown, wait=True)
+
+    def embedding_distance(
+        self,
+        prediction: str,
+        reference: str,
+        *,
+        config: Optional[EmbeddingConfig] = None,
+    ) -> _Matcher:
+        """Compute the embedding distance between the prediction and reference.
+
+        This logs the embedding distance to LangSmith and returns a `_Matcher` instance
+        for making assertions on the distance value.
+
+        By default, this uses the OpenAI API for computing embeddings.
+
+        Args:
+            prediction: The predicted string to compare.
+            reference: The reference string to compare against.
+            config: Optional configuration for the embedding distance evaluator.
+
+                Supported options:
+
+                - `encoder`: A custom encoder function to encode the list of input
+                    strings to embeddings.
+
+                    Defaults to the OpenAI API.
+                - `metric`: The distance metric to use for comparison.
+
+                    Supported values: `'cosine'`, `'euclidean'`, `'manhattan'`,
+                    `'chebyshev'`, `'hamming'`.
+
+        Returns:
+            A `_Matcher` instance for the embedding distance value.
+
+
+        Example:
+            ```python
+            expect.embedding_distance(
+                prediction="hello",
+                reference="hi",
+            ).to_be_less_than(1.0)
+            ```
+        """  # noqa: E501
+        from langsmith._internal._embedding_distance import EmbeddingDistance
+
+        config = config or {}
+        encoder_func = "custom" if config.get("encoder") else "openai"
+        evaluator = EmbeddingDistance(config=config)
+        score = evaluator.evaluate(prediction=prediction, reference=reference)
+        src_info = {"encoder": encoder_func, "metric": evaluator.distance}
+        self._submit_feedback(
+            "embedding_distance",
+            {
+                "score": score,
+                "source_info": src_info,
+                "comment": f"Using {encoder_func}, Metric: {evaluator.distance}",
+            },
+        )
+        return _Matcher(
+            self._client, "embedding_distance", score, _executor=self.executor
+        )
+
+    def edit_distance(
+        self,
+        prediction: str,
+        reference: str,
+        *,
+        config: Optional[EditDistanceConfig] = None,
+    ) -> _Matcher:
+        """Compute the string distance between the prediction and reference.
+
+        This logs the string distance (Damerau-Levenshtein) to LangSmith and returns
+        a `_Matcher` instance for making assertions on the distance value.
+
+        This depends on the `rapidfuzz` package for string distance computation.
+
+        Args:
+            prediction: The predicted string to compare.
+            reference: The reference string to compare against.
+            config: Optional configuration for the string distance evaluator.
+
+                Supported options:
+
+                - `metric`: The distance metric to use for comparison.
+
+                    Supported values: `'damerau_levenshtein'`, `'levenshtein'`,
+                    `'jaro'`, `'jaro_winkler'`, `'hamming'`, `'indel'`.
+                - `normalize_score`: Whether to normalize the score between `0` and `1`.
+
+        Returns:
+            A `_Matcher` instance for the string distance value.
+
+        Examples:
+            ```python
+            expect.edit_distance("hello", "helo").to_be_less_than(1)
+            ```
+        """
+        from langsmith._internal._edit_distance import EditDistance
+
+        config = config or {}
+        metric = config.get("metric") or "damerau_levenshtein"
+        normalize = config.get("normalize_score", True)
+        evaluator = EditDistance(config=config)
+        score = evaluator.evaluate(prediction=prediction, reference=reference)
+        src_info = {"metric": metric, "normalize": normalize}
+        self._submit_feedback(
+            "edit_distance",
+            {
+                "score": score,
+                "source_info": src_info,
+                "comment": f"Using {metric}, Normalize: {normalize}",
+            },
+        )
+        return _Matcher(
+            self._client,
+            "edit_distance",
+            score,
+            _executor=self.executor,
+        )
+
+    def value(self, value: Any) -> _Matcher:
+        """Create a `_Matcher` instance for making assertions on the given value.
+
+        Args:
+            value: The value to make assertions on.
+
+        Returns:
+            A `_Matcher` instance for the given value.
+
+        Example:
+            ```python
+            expect.value(10).to_be_less_than(20)
+            ```
+        """
+        return _Matcher(self._client, "value", value, _executor=self.executor)
+
+    def score(
+        self,
+        score: Union[float, int, bool],
+        *,
+        key: str = "score",
+        source_run_id: Optional[ls_client.ID_TYPE] = None,
+        comment: Optional[str] = None,
+    ) -> _Matcher:
+        """Log a numeric score to LangSmith.
+
+        Args:
+            score: The score value to log.
+            key: The key to use for logging the score. Defaults to `'score'`.
+
+        Example:
+            ```python
+            expect.score(0.8)  # doctest: +ELLIPSIS
+            <langsmith._expect._Matcher object at ...>
+
+            expect.score(0.8, key="similarity").to_be_greater_than(0.7)
+            ```
+        """
+        self._submit_feedback(
+            key,
+            {
+                "score": score,
+                "source_info": {"method": "expect.score"},
+                "source_run_id": source_run_id,
+                "comment": comment,
+            },
+        )
+        return _Matcher(self._client, key, score, _executor=self.executor)
+
+    ## Private Methods
+
+    @overload
+    def __call__(self, value: Any, /) -> _Matcher: ...
+
+    @overload
+    def __call__(self, /, *, client: ls_client.Client) -> _Expect: ...
+
+    def __call__(
+        self,
+        value: Optional[Any] = NOT_GIVEN,
+        /,
+        client: Optional[ls_client.Client] = None,
+    ) -> Union[_Expect, _Matcher]:
+        expected = _Expect(client=client)
+        if value is not NOT_GIVEN:
+            return expected.value(value)
+        return expected
+
+    def _submit_feedback(self, key: str, results: dict):
+        current_run = rh.get_current_run_tree()
+        run_id = current_run.trace_id if current_run else None
+        if not ls_utils.test_tracking_is_disabled():
+            if not self._client:
+                self._client = rt.get_cached_client()
+            self.executor.submit(
+                self._client.create_feedback, run_id=run_id, key=key, **results
+            )
+
+
+expect = _Expect()
+
+__all__ = ["expect"]