initial commit

2026-05-11 12:36:20 +05:30
commit 384cbe8019
15377 changed files with 2360544 additions and 0 deletions
--- a/venv/Lib/site-packages/langchain_community/callbacks/tracers/init.py
+++ b/venv/Lib/site-packages/langchain_community/callbacks/tracers/init.py
@@ -0,0 +1,16 @@
+"""Tracers that record execution of LangChain runs."""
+
+from langchain_core.tracers.langchain import LangChainTracer
+from langchain_core.tracers.stdout import (
+    ConsoleCallbackHandler,
+    FunctionCallbackHandler,
+)
+
+from langchain_community.callbacks.tracers.wandb import WandbTracer
+
+__all__ = [
+    "ConsoleCallbackHandler",
+    "FunctionCallbackHandler",
+    "LangChainTracer",
+    "WandbTracer",
+]
--- a/venv/Lib/site-packages/langchain_community/callbacks/tracers/pycache/init.cpython-311.pyc
+++ b/venv/Lib/site-packages/langchain_community/callbacks/tracers/pycache/init.cpython-311.pyc
--- a/venv/Lib/site-packages/langchain_community/callbacks/tracers/pycache/comet.cpython-311.pyc
+++ b/venv/Lib/site-packages/langchain_community/callbacks/tracers/pycache/comet.cpython-311.pyc
--- a/venv/Lib/site-packages/langchain_community/callbacks/tracers/pycache/wandb.cpython-311.pyc
+++ b/venv/Lib/site-packages/langchain_community/callbacks/tracers/pycache/wandb.cpython-311.pyc
--- a/venv/Lib/site-packages/langchain_community/callbacks/tracers/comet.py
+++ b/venv/Lib/site-packages/langchain_community/callbacks/tracers/comet.py
@@ -0,0 +1,135 @@
+from types import ModuleType, SimpleNamespace
+from typing import TYPE_CHECKING, Any, Callable, Dict
+
+from langchain_core.tracers import BaseTracer
+from langchain_core.utils import guard_import
+
+if TYPE_CHECKING:
+    from uuid import UUID
+
+    from comet_llm import Span
+    from comet_llm.chains.chain import Chain
+
+    from langchain_community.callbacks.tracers.schemas import Run
+
+
+def _get_run_type(run: "Run") -> str:
+    if isinstance(run.run_type, str):
+        return run.run_type
+    elif hasattr(run.run_type, "value"):
+        return run.run_type.value
+    else:
+        return str(run.run_type)
+
+
+def import_comet_llm_api() -> SimpleNamespace:
+    """Import comet_llm api and raise an error if it is not installed."""
+    comet_llm = guard_import("comet_llm")
+    comet_llm_chains = guard_import("comet_llm.chains")
+
+    return SimpleNamespace(
+        chain=comet_llm_chains.chain,
+        span=comet_llm_chains.span,
+        chain_api=comet_llm_chains.api,
+        experiment_info=comet_llm.experiment_info,
+        flush=comet_llm.flush,
+    )
+
+
+class CometTracer(BaseTracer):
+    """Comet Tracer."""
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the Comet Tracer."""
+        super().__init__(**kwargs)
+        self._span_map: Dict["UUID", "Span"] = {}
+        """Map from run id to span."""
+        self._chains_map: Dict["UUID", "Chain"] = {}
+        """Map from run id to chain."""
+        self._initialize_comet_modules()
+
+    def _initialize_comet_modules(self) -> None:
+        comet_llm_api = import_comet_llm_api()
+        self._chain: ModuleType = comet_llm_api.chain
+        self._span: ModuleType = comet_llm_api.span
+        self._chain_api: ModuleType = comet_llm_api.chain_api
+        self._experiment_info: ModuleType = comet_llm_api.experiment_info
+        self._flush: Callable[[], None] = comet_llm_api.flush
+
+    def _persist_run(self, run: "Run") -> None:
+        run_dict: Dict[str, Any] = run.dict()
+        chain_ = self._chains_map[run.id]
+        chain_.set_outputs(outputs=run_dict["outputs"])
+        self._chain_api.log_chain(chain_)
+
+    def _process_start_trace(self, run: "Run") -> None:
+        run_dict: Dict[str, Any] = run.dict()
+        if not run.parent_run_id:
+            # This is the first run, which maps to a chain
+            metadata = run_dict["extra"].get("metadata", None)
+
+            chain_: "Chain" = self._chain.Chain(
+                inputs=run_dict["inputs"],
+                metadata=metadata,
+                experiment_info=self._experiment_info.get(),
+            )
+            self._chains_map[run.id] = chain_
+        else:
+            span: "Span" = self._span.Span(
+                inputs=run_dict["inputs"],
+                category=_get_run_type(run),
+                metadata=run_dict["extra"],
+                name=run.name,
+            )
+            span.__api__start__(self._chains_map[run.parent_run_id])
+            self._chains_map[run.id] = self._chains_map[run.parent_run_id]
+            self._span_map[run.id] = span
+
+    def _process_end_trace(self, run: "Run") -> None:
+        run_dict: Dict[str, Any] = run.dict()
+        if not run.parent_run_id:
+            pass
+            # Langchain will call _persist_run for us
+        else:
+            span = self._span_map[run.id]
+            span.set_outputs(outputs=run_dict["outputs"])
+            span.__api__end__()
+
+    def flush(self) -> None:
+        self._flush()
+
+    def _on_llm_start(self, run: "Run") -> None:
+        """Process the LLM Run upon start."""
+        self._process_start_trace(run)
+
+    def _on_llm_end(self, run: "Run") -> None:
+        """Process the LLM Run."""
+        self._process_end_trace(run)
+
+    def _on_llm_error(self, run: "Run") -> None:
+        """Process the LLM Run upon error."""
+        self._process_end_trace(run)
+
+    def _on_chain_start(self, run: "Run") -> None:
+        """Process the Chain Run upon start."""
+        self._process_start_trace(run)
+
+    def _on_chain_end(self, run: "Run") -> None:
+        """Process the Chain Run."""
+        self._process_end_trace(run)
+
+    def _on_chain_error(self, run: "Run") -> None:
+        """Process the Chain Run upon error."""
+        self._process_end_trace(run)
+
+    def _on_tool_start(self, run: "Run") -> None:
+        """Process the Tool Run upon start."""
+        self._process_start_trace(run)
+
+    def _on_tool_end(self, run: "Run") -> None:
+        """Process the Tool Run."""
+        self._process_end_trace(run)
+
+    def _on_tool_error(self, run: "Run") -> None:
+        """Process the Tool Run upon error."""
+        self._process_end_trace(run)
--- a/venv/Lib/site-packages/langchain_community/callbacks/tracers/wandb.py
+++ b/venv/Lib/site-packages/langchain_community/callbacks/tracers/wandb.py
@@ -0,0 +1,507 @@
+"""A Tracer Implementation that records activity to Weights & Biases."""
+
+from __future__ import annotations
+
+import json
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    TypedDict,
+    Union,
+)
+
+from langchain_core._api import warn_deprecated
+from langchain_core.output_parsers.pydantic import PydanticBaseModel
+from langchain_core.tracers.base import BaseTracer
+from langchain_core.tracers.schemas import Run
+
+if TYPE_CHECKING:
+    from wandb import Settings as WBSettings
+    from wandb.sdk.data_types.trace_tree import Trace
+    from wandb.sdk.lib.paths import StrPath
+    from wandb.wandb_run import Run as WBRun
+
+PRINT_WARNINGS = True
+
+
+def _serialize_io(run_io: Optional[dict]) -> dict:
+    """Utility to serialize the input and output of a run to store in wandb.
+    Currently, supports serializing pydantic models and protobuf messages.
+
+    :param run_io: The inputs and outputs of the run.
+    :return: The serialized inputs and outputs.
+
+
+    """
+    if not run_io:
+        return {}
+    from google.protobuf.json_format import MessageToJson
+    from google.protobuf.message import Message
+
+    serialized_inputs = {}
+    for key, value in run_io.items():
+        if isinstance(value, Message):
+            serialized_inputs[key] = MessageToJson(value)
+
+        elif isinstance(value, PydanticBaseModel):
+            serialized_inputs[key] = (
+                value.model_dump_json()
+                if hasattr(value, "model_dump_json")
+                else value.json()
+            )
+
+        elif key == "input_documents":
+            serialized_inputs.update(
+                {f"input_document_{i}": doc.json() for i, doc in enumerate(value)}
+            )
+        else:
+            serialized_inputs[key] = value
+    return serialized_inputs
+
+
+def flatten_run(run: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Utility to flatten a nest run object into a list of runs.
+    :param run: The base run to flatten.
+    :return: The flattened list of runs.
+    """
+
+    def flatten(child_runs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Utility to recursively flatten a list of child runs in a run.
+        :param child_runs: The list of child runs to flatten.
+        :return: The flattened list of runs.
+        """
+        if child_runs is None:
+            return []
+
+        result = []
+        for item in child_runs:
+            child_runs = item.pop("child_runs", [])
+            result.append(item)
+            result.extend(flatten(child_runs))
+
+        return result
+
+    return flatten([run])
+
+
+def truncate_run_iterative(
+    runs: List[Dict[str, Any]], keep_keys: Tuple[str, ...] = ()
+) -> List[Dict[str, Any]]:
+    """Utility to truncate a list of runs dictionaries to only keep the specified
+        keys in each run.
+    :param runs: The list of runs to truncate.
+    :param keep_keys: The keys to keep in each run.
+    :return: The truncated list of runs.
+    """
+
+    def truncate_single(run: Dict[str, Any]) -> Dict[str, Any]:
+        """Utility to truncate a single run dictionary to only keep the specified
+            keys.
+        :param run: The run dictionary to truncate.
+        :return: The truncated run dictionary
+        """
+        new_dict = {}
+        for key in run:
+            if key in keep_keys:
+                new_dict[key] = run.get(key)
+        return new_dict
+
+    return list(map(truncate_single, runs))
+
+
+def modify_serialized_iterative(
+    runs: List[Dict[str, Any]],
+    exact_keys: Tuple[str, ...] = (),
+    partial_keys: Tuple[str, ...] = (),
+) -> List[Dict[str, Any]]:
+    """Utility to modify the serialized field of a list of runs dictionaries.
+    removes any keys that match the exact_keys and any keys that contain any of the
+    partial_keys.
+    recursively moves the dictionaries under the kwargs key to the top level.
+    changes the "id" field to a string "_kind" field that tells WBTraceTree how to
+    visualize the run. promotes the "serialized" field to the top level.
+    :param runs: The list of runs to modify.
+    :param exact_keys: A tuple of keys to remove from the serialized field.
+    :param partial_keys: A tuple of partial keys to remove from the serialized
+        field.
+    :return: The modified list of runs.
+    """
+
+    def remove_exact_and_partial_keys(obj: Dict[str, Any]) -> Dict[str, Any]:
+        """Recursively removes exact and partial keys from a dictionary.
+        :param obj: The dictionary to remove keys from.
+        :return: The modified dictionary.
+        """
+        if isinstance(obj, dict):
+            obj = {
+                k: v
+                for k, v in obj.items()
+                if k not in exact_keys
+                and not any(partial in k for partial in partial_keys)
+            }
+            for k, v in obj.items():
+                obj[k] = remove_exact_and_partial_keys(v)
+        elif isinstance(obj, list):
+            obj = [remove_exact_and_partial_keys(x) for x in obj]
+        return obj
+
+    def handle_id_and_kwargs(obj: Dict[str, Any], root: bool = False) -> Dict[str, Any]:
+        """Recursively handles the id and kwargs fields of a dictionary.
+        changes the id field to a string "_kind" field that tells WBTraceTree how
+        to visualize the run. recursively moves the dictionaries under the kwargs
+        key to the top level.
+        :param obj: a run dictionary with id and kwargs fields.
+        :param root: whether this is the root dictionary or the serialized
+            dictionary.
+        :return: The modified dictionary.
+        """
+        if isinstance(obj, dict):
+            if "data" in obj and isinstance(obj["data"], dict):
+                obj = obj["data"]
+            if ("id" in obj or "name" in obj) and not root:
+                _kind = obj.get("id")
+                if not _kind:
+                    _kind = [obj.get("name")]
+                if isinstance(_kind, list):
+                    obj["_kind"] = _kind[-1]
+                    obj.pop("id", None)
+                    obj.pop("name", None)
+                if "kwargs" in obj:
+                    kwargs = obj.pop("kwargs")
+                    for k, v in kwargs.items():
+                        obj[k] = v
+            for k, v in obj.items():
+                obj[k] = handle_id_and_kwargs(v)
+        elif isinstance(obj, list):
+            obj = [handle_id_and_kwargs(x) for x in obj]
+        return obj
+
+    def transform_serialized(serialized: Dict[str, Any]) -> Dict[str, Any]:
+        """Transforms the serialized field of a run dictionary to be compatible
+            with WBTraceTree.
+        :param serialized: The serialized field of a run dictionary.
+        :return: The transformed serialized field.
+        """
+        serialized = handle_id_and_kwargs(serialized, root=True)
+        serialized = remove_exact_and_partial_keys(serialized)
+        return serialized
+
+    def transform_run(run: Dict[str, Any]) -> Dict[str, Any]:
+        """Transforms a run dictionary to be compatible with WBTraceTree.
+        :param run: The run dictionary to transform.
+        :return: The transformed run dictionary.
+        """
+        transformed_dict = transform_serialized(run)
+
+        serialized = transformed_dict.pop("serialized")
+        for k, v in serialized.items():
+            transformed_dict[k] = v
+
+        _kind = transformed_dict.get("_kind", None)
+        name = transformed_dict.pop("name", None)
+
+        if not name:
+            name = _kind
+
+        output_dict = {
+            f"{name}": transformed_dict,
+        }
+        return output_dict
+
+    return list(map(transform_run, runs))
+
+
+def build_tree(runs: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """Builds a nested dictionary from a list of runs.
+    :param runs: The list of runs to build the tree from.
+    :return: The nested dictionary representing the langchain Run in a tree
+        structure compatible with WBTraceTree.
+    """
+    id_to_data = {}
+    child_to_parent = {}
+
+    for entity in runs:
+        for key, data in entity.items():
+            id_val = data.pop("id", None)
+            parent_run_id = data.pop("parent_run_id", None)
+            id_to_data[id_val] = {key: data}
+            if parent_run_id:
+                child_to_parent[id_val] = parent_run_id
+
+    for child_id, parent_id in child_to_parent.items():
+        parent_dict = id_to_data[parent_id]
+        parent_dict[next(iter(parent_dict))][next(iter(id_to_data[child_id]))] = (
+            id_to_data[child_id][next(iter(id_to_data[child_id]))]
+        )
+
+    root_dict = next(
+        data for id_val, data in id_to_data.items() if id_val not in child_to_parent
+    )
+
+    return root_dict
+
+
+class WandbRunArgs(TypedDict):
+    """Arguments for the WandbTracer."""
+
+    job_type: Optional[str]
+    dir: Optional[StrPath]
+    config: Union[Dict, str, None]
+    project: Optional[str]
+    entity: Optional[str]
+    reinit: Optional[bool]
+    tags: Optional[Sequence]
+    group: Optional[str]
+    name: Optional[str]
+    notes: Optional[str]
+    magic: Optional[Union[dict, str, bool]]
+    config_exclude_keys: Optional[List[str]]
+    config_include_keys: Optional[List[str]]
+    anonymous: Optional[str]
+    mode: Optional[str]
+    allow_val_change: Optional[bool]
+    resume: Optional[Union[bool, str]]
+    force: Optional[bool]
+    tensorboard: Optional[bool]
+    sync_tensorboard: Optional[bool]
+    monitor_gym: Optional[bool]
+    save_code: Optional[bool]
+    id: Optional[str]
+    settings: Union[WBSettings, Dict[str, Any], None]
+
+
+class WandbTracer(BaseTracer):
+    """Callback Handler that logs to Weights and Biases.
+
+    This handler will log the model architecture and run traces to Weights and Biases.
+    This will ensure that all LangChain activity is logged to W&B.
+    """
+
+    _run: Optional[WBRun] = None
+    _run_args: Optional[WandbRunArgs] = None
+
+    def __init__(
+        self,
+        run_args: Optional[WandbRunArgs] = None,
+        io_serializer: Callable = _serialize_io,
+        **kwargs: Any,
+    ) -> None:
+        """Initializes the WandbTracer.
+
+        Parameters:
+            run_args: (dict, optional) Arguments to pass to `wandb.init()`. If not
+                provided, `wandb.init()` will be called with no arguments. Please
+                refer to the `wandb.init` for more details.
+            io_serializer: callable A function that serializes the input and outputs
+             of a run to store in wandb. Defaults to "_serialize_io"
+
+        To use W&B to monitor all LangChain activity, add this tracer like any other
+        LangChain callback:
+        ```
+        from wandb.integration.langchain import WandbTracer
+
+        tracer = WandbTracer()
+        chain = LLMChain(llm, callbacks=[tracer])
+        # ...end of notebook / script:
+        tracer.finish()
+        ```
+        """
+        super().__init__(**kwargs)
+        try:
+            import wandb
+            from wandb.sdk.data_types import trace_tree
+        except ImportError as e:
+            raise ImportError(
+                "Could not import wandb python package."
+                "Please install it with `pip install -U wandb`."
+            ) from e
+        self._wandb = wandb
+        self._trace_tree = trace_tree
+        self._run_args = run_args
+        self._ensure_run(should_print_url=(wandb.run is None))
+        self._io_serializer = io_serializer
+        warn_deprecated(
+            "0.3.8",
+            pending=False,
+            message=(
+                "Please use the `WeaveTracer` from the `weave` package instead of this."
+                "The `WeaveTracer` is a more flexible and powerful tool for logging "
+                "and tracing your LangChain callables."
+                "Find more information at https://weave-docs.wandb.ai/guides/integrations/langchain"
+            ),
+            alternative=(
+                "Please instantiate the WeaveTracer from "
+                "`weave.integrations.langchain import WeaveTracer` ."
+                "For autologging simply use `weave.init()` and log all traces "
+                "from your LangChain callables."
+            ),
+        )
+
+    def finish(self) -> None:
+        """Waits for all asynchronous processes to finish and data to upload.
+
+        Proxy for `wandb.finish()`.
+        """
+        self._wandb.finish()
+
+    def _ensure_run(self, should_print_url: bool = False) -> None:
+        """Ensures an active W&B run exists.
+
+        If not, will start a new run with the provided run_args.
+        """
+        if self._wandb.run is None:
+            run_args: Dict = {**(self._run_args or {})}
+
+            if "settings" not in run_args:
+                run_args["settings"] = {"silent": True}
+
+            self._wandb.init(**run_args)
+        if self._wandb.run is not None:
+            if should_print_url:
+                run_url = self._wandb.run.settings.run_url
+                self._wandb.termlog(
+                    f"Streaming LangChain activity to W&B at {run_url}\n"
+                    "`WandbTracer` is currently in beta.\n"
+                    "Please report any issues to "
+                    "https://github.com/wandb/wandb/issues with the tag "
+                    "`langchain`."
+                )
+
+            self._wandb.run._label(repo="langchain")
+
+    def process_model_dict(self, run: Run) -> Optional[Dict[str, Any]]:
+        """Utility to process a run for wandb model_dict serialization.
+        :param run: The run to process.
+        :return: The convert model_dict to pass to WBTraceTree.
+        """
+        try:
+            data = json.loads(run.json())
+            processed = flatten_run(data)
+            keep_keys = (
+                "id",
+                "name",
+                "serialized",
+                "parent_run_id",
+            )
+            processed = truncate_run_iterative(processed, keep_keys=keep_keys)
+            exact_keys, partial_keys = (
+                ("lc", "type", "graph"),
+                (
+                    "api_key",
+                    "input",
+                    "output",
+                ),
+            )
+            processed = modify_serialized_iterative(
+                processed, exact_keys=exact_keys, partial_keys=partial_keys
+            )
+            output = build_tree(processed)
+            return output
+        except Exception as e:
+            if PRINT_WARNINGS:
+                self._wandb.termerror(f"WARNING: Failed to serialize model: {e}")
+            return None
+
+    def _log_trace_from_run(self, run: Run) -> None:
+        """Logs a LangChain Run to W*B as a W&B Trace."""
+        self._ensure_run()
+
+        def create_trace(
+            run: "Run", parent: Optional["Trace"] = None
+        ) -> Optional["Trace"]:
+            """
+            Create a trace for a given run and its child runs.
+
+            Args:
+                run (Run): The run for which to create a trace.
+                parent (Optional[Trace]): The parent trace.
+                If provided, the created trace is added as a child to the parent trace.
+
+            Returns:
+                The created trace. If an error occurs during the creation of the trace,
+                    None is returned.
+
+            Raises:
+                Exception: If an error occurs during the creation of the trace,
+                no exception is raised and a warning is printed.
+            """
+
+            def get_metadata_dict(r: "Run") -> Dict[str, Any]:
+                """
+                Extract metadata from a given run.
+
+                This function extracts metadata from a given run
+                and returns it as a dictionary.
+
+                Args:
+                    r (Run): The run from which to extract metadata.
+
+                Returns:
+                    `dict` containing the extracted metadata.
+                """
+                run_dict = json.loads(r.json())
+                metadata_dict = run_dict.get("metadata", {})
+                metadata_dict["run_id"] = run_dict.get("id")
+                metadata_dict["parent_run_id"] = run_dict.get("parent_run_id")
+                metadata_dict["tags"] = run_dict.get("tags")
+                metadata_dict["execution_order"] = run_dict.get(
+                    "dotted_order", ""
+                ).count(".")
+                return metadata_dict
+
+            try:
+                if run.run_type in ["llm", "tool"]:
+                    run_type = run.run_type
+                elif run.run_type == "chain":
+                    run_type = "agent" if "agent" in run.name.lower() else "chain"
+                else:
+                    run_type = None
+
+                metadata = get_metadata_dict(run)
+                trace_tree = self._trace_tree.Trace(
+                    name=run.name,
+                    kind=run_type,
+                    status_code="error" if run.error else "success",
+                    start_time_ms=int(run.start_time.timestamp() * 1000)
+                    if run.start_time is not None
+                    else None,
+                    end_time_ms=int(run.end_time.timestamp() * 1000)
+                    if run.end_time is not None
+                    else None,
+                    metadata=metadata,
+                    inputs=self._io_serializer(run.inputs),
+                    outputs=self._io_serializer(run.outputs),
+                )
+
+                # If the run has child runs, recursively create traces for them
+                for child_run in run.child_runs:
+                    create_trace(child_run, trace_tree)
+
+                if parent is None:
+                    return trace_tree
+                else:
+                    parent.add_child(trace_tree)
+                    return parent
+            except Exception as e:
+                if PRINT_WARNINGS:
+                    self._wandb.termwarn(
+                        f"WARNING: Failed to serialize trace for run due to: {e}"
+                    )
+                return None
+
+        run_trace = create_trace(run)
+        model_dict = self.process_model_dict(run)
+        if model_dict is not None and run_trace is not None:
+            run_trace._model_dict = model_dict
+        if self._wandb.run is not None and run_trace is not None:
+            run_trace.log("langchain_trace")
+
+    def _persist_run(self, run: "Run") -> None:
+        """Persist a run."""
+        self._log_trace_from_run(run)