initial commit

This commit is contained in:
2026-05-11 12:36:20 +05:30
commit 384cbe8019
15377 changed files with 2360544 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
"""Tracers that record execution of LangChain runs."""
from langchain_core.tracers.langchain import LangChainTracer
from langchain_core.tracers.stdout import (
ConsoleCallbackHandler,
FunctionCallbackHandler,
)
from langchain_community.callbacks.tracers.wandb import WandbTracer
__all__ = [
"ConsoleCallbackHandler",
"FunctionCallbackHandler",
"LangChainTracer",
"WandbTracer",
]

View File

@@ -0,0 +1,135 @@
from types import ModuleType, SimpleNamespace
from typing import TYPE_CHECKING, Any, Callable, Dict
from langchain_core.tracers import BaseTracer
from langchain_core.utils import guard_import
if TYPE_CHECKING:
from uuid import UUID
from comet_llm import Span
from comet_llm.chains.chain import Chain
from langchain_community.callbacks.tracers.schemas import Run
def _get_run_type(run: "Run") -> str:
if isinstance(run.run_type, str):
return run.run_type
elif hasattr(run.run_type, "value"):
return run.run_type.value
else:
return str(run.run_type)
def import_comet_llm_api() -> SimpleNamespace:
"""Import comet_llm api and raise an error if it is not installed."""
comet_llm = guard_import("comet_llm")
comet_llm_chains = guard_import("comet_llm.chains")
return SimpleNamespace(
chain=comet_llm_chains.chain,
span=comet_llm_chains.span,
chain_api=comet_llm_chains.api,
experiment_info=comet_llm.experiment_info,
flush=comet_llm.flush,
)
class CometTracer(BaseTracer):
"""Comet Tracer."""
def __init__(self, **kwargs: Any) -> None:
"""Initialize the Comet Tracer."""
super().__init__(**kwargs)
self._span_map: Dict["UUID", "Span"] = {}
"""Map from run id to span."""
self._chains_map: Dict["UUID", "Chain"] = {}
"""Map from run id to chain."""
self._initialize_comet_modules()
def _initialize_comet_modules(self) -> None:
comet_llm_api = import_comet_llm_api()
self._chain: ModuleType = comet_llm_api.chain
self._span: ModuleType = comet_llm_api.span
self._chain_api: ModuleType = comet_llm_api.chain_api
self._experiment_info: ModuleType = comet_llm_api.experiment_info
self._flush: Callable[[], None] = comet_llm_api.flush
def _persist_run(self, run: "Run") -> None:
run_dict: Dict[str, Any] = run.dict()
chain_ = self._chains_map[run.id]
chain_.set_outputs(outputs=run_dict["outputs"])
self._chain_api.log_chain(chain_)
def _process_start_trace(self, run: "Run") -> None:
run_dict: Dict[str, Any] = run.dict()
if not run.parent_run_id:
# This is the first run, which maps to a chain
metadata = run_dict["extra"].get("metadata", None)
chain_: "Chain" = self._chain.Chain(
inputs=run_dict["inputs"],
metadata=metadata,
experiment_info=self._experiment_info.get(),
)
self._chains_map[run.id] = chain_
else:
span: "Span" = self._span.Span(
inputs=run_dict["inputs"],
category=_get_run_type(run),
metadata=run_dict["extra"],
name=run.name,
)
span.__api__start__(self._chains_map[run.parent_run_id])
self._chains_map[run.id] = self._chains_map[run.parent_run_id]
self._span_map[run.id] = span
def _process_end_trace(self, run: "Run") -> None:
run_dict: Dict[str, Any] = run.dict()
if not run.parent_run_id:
pass
# Langchain will call _persist_run for us
else:
span = self._span_map[run.id]
span.set_outputs(outputs=run_dict["outputs"])
span.__api__end__()
def flush(self) -> None:
self._flush()
def _on_llm_start(self, run: "Run") -> None:
"""Process the LLM Run upon start."""
self._process_start_trace(run)
def _on_llm_end(self, run: "Run") -> None:
"""Process the LLM Run."""
self._process_end_trace(run)
def _on_llm_error(self, run: "Run") -> None:
"""Process the LLM Run upon error."""
self._process_end_trace(run)
def _on_chain_start(self, run: "Run") -> None:
"""Process the Chain Run upon start."""
self._process_start_trace(run)
def _on_chain_end(self, run: "Run") -> None:
"""Process the Chain Run."""
self._process_end_trace(run)
def _on_chain_error(self, run: "Run") -> None:
"""Process the Chain Run upon error."""
self._process_end_trace(run)
def _on_tool_start(self, run: "Run") -> None:
"""Process the Tool Run upon start."""
self._process_start_trace(run)
def _on_tool_end(self, run: "Run") -> None:
"""Process the Tool Run."""
self._process_end_trace(run)
def _on_tool_error(self, run: "Run") -> None:
"""Process the Tool Run upon error."""
self._process_end_trace(run)

View File

@@ -0,0 +1,507 @@
"""A Tracer Implementation that records activity to Weights & Biases."""
from __future__ import annotations
import json
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Optional,
Sequence,
Tuple,
TypedDict,
Union,
)
from langchain_core._api import warn_deprecated
from langchain_core.output_parsers.pydantic import PydanticBaseModel
from langchain_core.tracers.base import BaseTracer
from langchain_core.tracers.schemas import Run
if TYPE_CHECKING:
from wandb import Settings as WBSettings
from wandb.sdk.data_types.trace_tree import Trace
from wandb.sdk.lib.paths import StrPath
from wandb.wandb_run import Run as WBRun
PRINT_WARNINGS = True
def _serialize_io(run_io: Optional[dict]) -> dict:
"""Utility to serialize the input and output of a run to store in wandb.
Currently, supports serializing pydantic models and protobuf messages.
:param run_io: The inputs and outputs of the run.
:return: The serialized inputs and outputs.
"""
if not run_io:
return {}
from google.protobuf.json_format import MessageToJson
from google.protobuf.message import Message
serialized_inputs = {}
for key, value in run_io.items():
if isinstance(value, Message):
serialized_inputs[key] = MessageToJson(value)
elif isinstance(value, PydanticBaseModel):
serialized_inputs[key] = (
value.model_dump_json()
if hasattr(value, "model_dump_json")
else value.json()
)
elif key == "input_documents":
serialized_inputs.update(
{f"input_document_{i}": doc.json() for i, doc in enumerate(value)}
)
else:
serialized_inputs[key] = value
return serialized_inputs
def flatten_run(run: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Utility to flatten a nest run object into a list of runs.
:param run: The base run to flatten.
:return: The flattened list of runs.
"""
def flatten(child_runs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Utility to recursively flatten a list of child runs in a run.
:param child_runs: The list of child runs to flatten.
:return: The flattened list of runs.
"""
if child_runs is None:
return []
result = []
for item in child_runs:
child_runs = item.pop("child_runs", [])
result.append(item)
result.extend(flatten(child_runs))
return result
return flatten([run])
def truncate_run_iterative(
runs: List[Dict[str, Any]], keep_keys: Tuple[str, ...] = ()
) -> List[Dict[str, Any]]:
"""Utility to truncate a list of runs dictionaries to only keep the specified
keys in each run.
:param runs: The list of runs to truncate.
:param keep_keys: The keys to keep in each run.
:return: The truncated list of runs.
"""
def truncate_single(run: Dict[str, Any]) -> Dict[str, Any]:
"""Utility to truncate a single run dictionary to only keep the specified
keys.
:param run: The run dictionary to truncate.
:return: The truncated run dictionary
"""
new_dict = {}
for key in run:
if key in keep_keys:
new_dict[key] = run.get(key)
return new_dict
return list(map(truncate_single, runs))
def modify_serialized_iterative(
runs: List[Dict[str, Any]],
exact_keys: Tuple[str, ...] = (),
partial_keys: Tuple[str, ...] = (),
) -> List[Dict[str, Any]]:
"""Utility to modify the serialized field of a list of runs dictionaries.
removes any keys that match the exact_keys and any keys that contain any of the
partial_keys.
recursively moves the dictionaries under the kwargs key to the top level.
changes the "id" field to a string "_kind" field that tells WBTraceTree how to
visualize the run. promotes the "serialized" field to the top level.
:param runs: The list of runs to modify.
:param exact_keys: A tuple of keys to remove from the serialized field.
:param partial_keys: A tuple of partial keys to remove from the serialized
field.
:return: The modified list of runs.
"""
def remove_exact_and_partial_keys(obj: Dict[str, Any]) -> Dict[str, Any]:
"""Recursively removes exact and partial keys from a dictionary.
:param obj: The dictionary to remove keys from.
:return: The modified dictionary.
"""
if isinstance(obj, dict):
obj = {
k: v
for k, v in obj.items()
if k not in exact_keys
and not any(partial in k for partial in partial_keys)
}
for k, v in obj.items():
obj[k] = remove_exact_and_partial_keys(v)
elif isinstance(obj, list):
obj = [remove_exact_and_partial_keys(x) for x in obj]
return obj
def handle_id_and_kwargs(obj: Dict[str, Any], root: bool = False) -> Dict[str, Any]:
"""Recursively handles the id and kwargs fields of a dictionary.
changes the id field to a string "_kind" field that tells WBTraceTree how
to visualize the run. recursively moves the dictionaries under the kwargs
key to the top level.
:param obj: a run dictionary with id and kwargs fields.
:param root: whether this is the root dictionary or the serialized
dictionary.
:return: The modified dictionary.
"""
if isinstance(obj, dict):
if "data" in obj and isinstance(obj["data"], dict):
obj = obj["data"]
if ("id" in obj or "name" in obj) and not root:
_kind = obj.get("id")
if not _kind:
_kind = [obj.get("name")]
if isinstance(_kind, list):
obj["_kind"] = _kind[-1]
obj.pop("id", None)
obj.pop("name", None)
if "kwargs" in obj:
kwargs = obj.pop("kwargs")
for k, v in kwargs.items():
obj[k] = v
for k, v in obj.items():
obj[k] = handle_id_and_kwargs(v)
elif isinstance(obj, list):
obj = [handle_id_and_kwargs(x) for x in obj]
return obj
def transform_serialized(serialized: Dict[str, Any]) -> Dict[str, Any]:
"""Transforms the serialized field of a run dictionary to be compatible
with WBTraceTree.
:param serialized: The serialized field of a run dictionary.
:return: The transformed serialized field.
"""
serialized = handle_id_and_kwargs(serialized, root=True)
serialized = remove_exact_and_partial_keys(serialized)
return serialized
def transform_run(run: Dict[str, Any]) -> Dict[str, Any]:
"""Transforms a run dictionary to be compatible with WBTraceTree.
:param run: The run dictionary to transform.
:return: The transformed run dictionary.
"""
transformed_dict = transform_serialized(run)
serialized = transformed_dict.pop("serialized")
for k, v in serialized.items():
transformed_dict[k] = v
_kind = transformed_dict.get("_kind", None)
name = transformed_dict.pop("name", None)
if not name:
name = _kind
output_dict = {
f"{name}": transformed_dict,
}
return output_dict
return list(map(transform_run, runs))
def build_tree(runs: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Builds a nested dictionary from a list of runs.
:param runs: The list of runs to build the tree from.
:return: The nested dictionary representing the langchain Run in a tree
structure compatible with WBTraceTree.
"""
id_to_data = {}
child_to_parent = {}
for entity in runs:
for key, data in entity.items():
id_val = data.pop("id", None)
parent_run_id = data.pop("parent_run_id", None)
id_to_data[id_val] = {key: data}
if parent_run_id:
child_to_parent[id_val] = parent_run_id
for child_id, parent_id in child_to_parent.items():
parent_dict = id_to_data[parent_id]
parent_dict[next(iter(parent_dict))][next(iter(id_to_data[child_id]))] = (
id_to_data[child_id][next(iter(id_to_data[child_id]))]
)
root_dict = next(
data for id_val, data in id_to_data.items() if id_val not in child_to_parent
)
return root_dict
class WandbRunArgs(TypedDict):
"""Arguments for the WandbTracer."""
job_type: Optional[str]
dir: Optional[StrPath]
config: Union[Dict, str, None]
project: Optional[str]
entity: Optional[str]
reinit: Optional[bool]
tags: Optional[Sequence]
group: Optional[str]
name: Optional[str]
notes: Optional[str]
magic: Optional[Union[dict, str, bool]]
config_exclude_keys: Optional[List[str]]
config_include_keys: Optional[List[str]]
anonymous: Optional[str]
mode: Optional[str]
allow_val_change: Optional[bool]
resume: Optional[Union[bool, str]]
force: Optional[bool]
tensorboard: Optional[bool]
sync_tensorboard: Optional[bool]
monitor_gym: Optional[bool]
save_code: Optional[bool]
id: Optional[str]
settings: Union[WBSettings, Dict[str, Any], None]
class WandbTracer(BaseTracer):
"""Callback Handler that logs to Weights and Biases.
This handler will log the model architecture and run traces to Weights and Biases.
This will ensure that all LangChain activity is logged to W&B.
"""
_run: Optional[WBRun] = None
_run_args: Optional[WandbRunArgs] = None
def __init__(
self,
run_args: Optional[WandbRunArgs] = None,
io_serializer: Callable = _serialize_io,
**kwargs: Any,
) -> None:
"""Initializes the WandbTracer.
Parameters:
run_args: (dict, optional) Arguments to pass to `wandb.init()`. If not
provided, `wandb.init()` will be called with no arguments. Please
refer to the `wandb.init` for more details.
io_serializer: callable A function that serializes the input and outputs
of a run to store in wandb. Defaults to "_serialize_io"
To use W&B to monitor all LangChain activity, add this tracer like any other
LangChain callback:
```
from wandb.integration.langchain import WandbTracer
tracer = WandbTracer()
chain = LLMChain(llm, callbacks=[tracer])
# ...end of notebook / script:
tracer.finish()
```
"""
super().__init__(**kwargs)
try:
import wandb
from wandb.sdk.data_types import trace_tree
except ImportError as e:
raise ImportError(
"Could not import wandb python package."
"Please install it with `pip install -U wandb`."
) from e
self._wandb = wandb
self._trace_tree = trace_tree
self._run_args = run_args
self._ensure_run(should_print_url=(wandb.run is None))
self._io_serializer = io_serializer
warn_deprecated(
"0.3.8",
pending=False,
message=(
"Please use the `WeaveTracer` from the `weave` package instead of this."
"The `WeaveTracer` is a more flexible and powerful tool for logging "
"and tracing your LangChain callables."
"Find more information at https://weave-docs.wandb.ai/guides/integrations/langchain"
),
alternative=(
"Please instantiate the WeaveTracer from "
"`weave.integrations.langchain import WeaveTracer` ."
"For autologging simply use `weave.init()` and log all traces "
"from your LangChain callables."
),
)
def finish(self) -> None:
"""Waits for all asynchronous processes to finish and data to upload.
Proxy for `wandb.finish()`.
"""
self._wandb.finish()
def _ensure_run(self, should_print_url: bool = False) -> None:
"""Ensures an active W&B run exists.
If not, will start a new run with the provided run_args.
"""
if self._wandb.run is None:
run_args: Dict = {**(self._run_args or {})}
if "settings" not in run_args:
run_args["settings"] = {"silent": True}
self._wandb.init(**run_args)
if self._wandb.run is not None:
if should_print_url:
run_url = self._wandb.run.settings.run_url
self._wandb.termlog(
f"Streaming LangChain activity to W&B at {run_url}\n"
"`WandbTracer` is currently in beta.\n"
"Please report any issues to "
"https://github.com/wandb/wandb/issues with the tag "
"`langchain`."
)
self._wandb.run._label(repo="langchain")
def process_model_dict(self, run: Run) -> Optional[Dict[str, Any]]:
"""Utility to process a run for wandb model_dict serialization.
:param run: The run to process.
:return: The convert model_dict to pass to WBTraceTree.
"""
try:
data = json.loads(run.json())
processed = flatten_run(data)
keep_keys = (
"id",
"name",
"serialized",
"parent_run_id",
)
processed = truncate_run_iterative(processed, keep_keys=keep_keys)
exact_keys, partial_keys = (
("lc", "type", "graph"),
(
"api_key",
"input",
"output",
),
)
processed = modify_serialized_iterative(
processed, exact_keys=exact_keys, partial_keys=partial_keys
)
output = build_tree(processed)
return output
except Exception as e:
if PRINT_WARNINGS:
self._wandb.termerror(f"WARNING: Failed to serialize model: {e}")
return None
def _log_trace_from_run(self, run: Run) -> None:
"""Logs a LangChain Run to W*B as a W&B Trace."""
self._ensure_run()
def create_trace(
run: "Run", parent: Optional["Trace"] = None
) -> Optional["Trace"]:
"""
Create a trace for a given run and its child runs.
Args:
run (Run): The run for which to create a trace.
parent (Optional[Trace]): The parent trace.
If provided, the created trace is added as a child to the parent trace.
Returns:
The created trace. If an error occurs during the creation of the trace,
None is returned.
Raises:
Exception: If an error occurs during the creation of the trace,
no exception is raised and a warning is printed.
"""
def get_metadata_dict(r: "Run") -> Dict[str, Any]:
"""
Extract metadata from a given run.
This function extracts metadata from a given run
and returns it as a dictionary.
Args:
r (Run): The run from which to extract metadata.
Returns:
`dict` containing the extracted metadata.
"""
run_dict = json.loads(r.json())
metadata_dict = run_dict.get("metadata", {})
metadata_dict["run_id"] = run_dict.get("id")
metadata_dict["parent_run_id"] = run_dict.get("parent_run_id")
metadata_dict["tags"] = run_dict.get("tags")
metadata_dict["execution_order"] = run_dict.get(
"dotted_order", ""
).count(".")
return metadata_dict
try:
if run.run_type in ["llm", "tool"]:
run_type = run.run_type
elif run.run_type == "chain":
run_type = "agent" if "agent" in run.name.lower() else "chain"
else:
run_type = None
metadata = get_metadata_dict(run)
trace_tree = self._trace_tree.Trace(
name=run.name,
kind=run_type,
status_code="error" if run.error else "success",
start_time_ms=int(run.start_time.timestamp() * 1000)
if run.start_time is not None
else None,
end_time_ms=int(run.end_time.timestamp() * 1000)
if run.end_time is not None
else None,
metadata=metadata,
inputs=self._io_serializer(run.inputs),
outputs=self._io_serializer(run.outputs),
)
# If the run has child runs, recursively create traces for them
for child_run in run.child_runs:
create_trace(child_run, trace_tree)
if parent is None:
return trace_tree
else:
parent.add_child(trace_tree)
return parent
except Exception as e:
if PRINT_WARNINGS:
self._wandb.termwarn(
f"WARNING: Failed to serialize trace for run due to: {e}"
)
return None
run_trace = create_trace(run)
model_dict = self.process_model_dict(run)
if model_dict is not None and run_trace is not None:
run_trace._model_dict = model_dict
if self._wandb.run is not None and run_trace is not None:
run_trace.log("langchain_trace")
def _persist_run(self, run: "Run") -> None:
"""Persist a run."""
self._log_trace_from_run(run)