initial commit

This commit is contained in:
2026-05-11 12:36:20 +05:30
commit 384cbe8019
15377 changed files with 2360544 additions and 0 deletions

View File

@@ -0,0 +1,181 @@
"""Evaluators for parsing strings."""
import json
import logging
from collections.abc import Callable
from operator import eq
from typing import Any, cast
from langchain_core.utils.json import parse_json_markdown
from typing_extensions import override
from langchain_classic.evaluation.schema import StringEvaluator
_logger = logging.getLogger(__name__)
class JsonValidityEvaluator(StringEvaluator):
"""Evaluate whether the prediction is valid JSON.
This evaluator checks if the prediction is a valid JSON string. It does not
require any input or reference.
Attributes:
requires_input: Whether this evaluator requires an input
string. Always False.
requires_reference: Whether this evaluator requires a
reference string. Always False.
evaluation_name: The name of the evaluation metric.
Always "json".
Examples:
>>> evaluator = JsonValidityEvaluator()
>>> prediction = '{"name": "John", "age": 30, "city": "New York"}'
>>> evaluator.evaluate(prediction)
{'score': 1}
>>> prediction = '{"name": "John", "age": 30, "city": "New York",}'
>>> evaluator.evaluate(prediction)
{'score': 0, 'reasoning': 'Expecting property name enclosed in double quotes'}
"""
def __init__(self, **_: Any) -> None:
"""Initialize the JsonValidityEvaluator."""
super().__init__()
@property
@override
def requires_input(self) -> bool:
return False
@property
@override
def requires_reference(self) -> bool:
return False
@property
@override
def evaluation_name(self) -> str:
return "json_validity"
@override
def _evaluate_strings(
self,
prediction: str,
**kwargs: Any,
) -> dict:
"""Evaluate the prediction string.
Args:
prediction: The prediction string to evaluate.
**kwargs: Additional keyword arguments (not used).
Returns:
`dict` containing the evaluation score. The score is `1` if
the prediction is valid JSON, and `0` otherwise.
If the prediction is not valid JSON, the dictionary also contains
a `reasoning` field with the error message.
"""
try:
parse_json_markdown(prediction, parser=json.loads)
except json.JSONDecodeError as e:
return {"score": 0, "reasoning": str(e)}
except Exception as e:
_logger.exception("Passing JSON failed with unexpected error.")
return {"score": 0, "reasoning": str(e)}
return {"score": 1}
class JsonEqualityEvaluator(StringEvaluator):
"""Json Equality Evaluator.
Evaluate whether the prediction is equal to the reference after
parsing both as JSON.
This evaluator checks if the prediction, after parsing as JSON, is equal
to the reference,
which is also parsed as JSON. It does not require an input string.
Attributes:
requires_input: Whether this evaluator requires an
input string. Always False.
requires_reference: Whether this evaluator requires
a reference string. Always True.
evaluation_name: The name of the evaluation metric.
Always "parsed_equality".
Examples:
>>> evaluator = JsonEqualityEvaluator()
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 1}')
{'score': True}
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 2}')
{'score': False}
>>> evaluator = JsonEqualityEvaluator(operator=lambda x, y: x["a"] == y["a"])
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 1}')
{'score': True}
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 2}')
{'score': False}
"""
def __init__(self, operator: Callable | None = None, **_: Any) -> None:
"""Initialize the JsonEqualityEvaluator.
Args:
operator: A custom operator to compare the parsed JSON objects.
Defaults to equality (`eq`).
"""
super().__init__()
self.operator = operator or eq
@property
@override
def requires_input(self) -> bool:
return False
@property
@override
def requires_reference(self) -> bool:
return True
@property
@override
def evaluation_name(self) -> str:
return "json_equality"
def _parse_json(
self,
string: Any,
) -> dict | list | None | float | bool | int | str:
if isinstance(string, str):
return parse_json_markdown(string)
return string
@override
def _evaluate_strings(
self,
prediction: str,
reference: str | None = None,
**kwargs: Any,
) -> dict:
"""Evaluate the prediction string.
Args:
prediction: The prediction string to evaluate.
reference: The reference string to compare against.
**kwargs: Additional keyword arguments (not used).
Returns:
`dict` containing the evaluation score.
"""
parsed = self._parse_json(prediction)
label = self._parse_json(cast("str", reference))
if isinstance(label, list):
if not isinstance(parsed, list):
return {"score": 0}
parsed = sorted(parsed, key=str)
label = sorted(label, key=str)
return {"score": self.operator(parsed, label)}

View File

@@ -0,0 +1,109 @@
import json
from collections.abc import Callable
from typing import Any
from langchain_core.utils.json import parse_json_markdown
from typing_extensions import override
from langchain_classic.evaluation.schema import StringEvaluator
class JsonEditDistanceEvaluator(StringEvaluator):
"""An evaluator that calculates the edit distance between JSON strings.
This evaluator computes a normalized Damerau-Levenshtein distance between two JSON strings
after parsing them and converting them to a canonical format (i.e., whitespace and key order are normalized).
It can be customized with alternative distance and canonicalization functions.
Attributes:
_string_distance (Callable[[str, str], float]): The internal distance computation function.
_canonicalize (Callable[[Any], Any]): The internal canonicalization function.
Examples:
>>> evaluator = JsonEditDistanceEvaluator()
>>> result = evaluator.evaluate_strings(
... prediction='{"a": 1, "b": 2}', reference='{"a": 1, "b": 3}'
... )
>>> assert result["score"] is not None
Raises:
ImportError: If `rapidfuzz` is not installed and no alternative `string_distance` function is provided.
""" # noqa: E501
def __init__(
self,
string_distance: Callable[[str, str], float] | None = None,
canonicalize: Callable[[Any], Any] | None = None,
**_: Any,
) -> None:
"""Initialize the JsonEditDistanceEvaluator.
Args:
string_distance: A callable that computes the distance between two strings.
If not provided, a Damerau-Levenshtein distance from the `rapidfuzz`
package will be used.
canonicalize: A callable that converts a parsed JSON object into its
canonical string form.
If not provided, the default behavior is to serialize the JSON with
sorted keys and no extra whitespace.
Raises:
ImportError: If the `rapidfuzz` package is not installed and no
`string_distance` function is provided.
"""
super().__init__()
if string_distance is not None:
self._string_distance = string_distance
else:
try:
from rapidfuzz import distance as rfd
except ImportError as e:
msg = (
"The default string_distance operator for the "
" JsonEditDistanceEvaluator requires installation of "
"the rapidfuzz package. "
"Please install it with `pip install rapidfuzz`."
)
raise ImportError(msg) from e
self._string_distance = rfd.DamerauLevenshtein.normalized_distance
if canonicalize is not None:
self._canonicalize = canonicalize
else:
self._canonicalize = lambda x: json.dumps(
x,
separators=(",", ":"),
sort_keys=True, # eliminate whitespace
)
@property
@override
def requires_input(self) -> bool:
return False
@property
@override
def requires_reference(self) -> bool:
return True
@property
@override
def evaluation_name(self) -> str:
return "json_edit_distance"
def _parse_json(self, node: Any) -> dict | list | None | float | bool | int | str:
if isinstance(node, str):
return parse_json_markdown(node)
return node
@override
def _evaluate_strings(
self,
prediction: str,
reference: str | None = None,
**kwargs: Any,
) -> dict:
parsed = self._canonicalize(self._parse_json(prediction))
label = self._canonicalize(self._parse_json(reference))
distance = self._string_distance(parsed, label)
return {"score": distance}

View File

@@ -0,0 +1,97 @@
from typing import Any
from langchain_core.utils.json import parse_json_markdown
from typing_extensions import override
from langchain_classic.evaluation.schema import StringEvaluator
class JsonSchemaEvaluator(StringEvaluator):
"""An evaluator that validates a JSON prediction against a JSON schema reference.
This evaluator checks if a given JSON prediction conforms to the provided JSON schema.
If the prediction is valid, the score is True (no errors). Otherwise, the score is False (error occurred).
Attributes:
requires_input: Whether the evaluator requires input.
requires_reference: Whether the evaluator requires reference.
evaluation_name: The name of the evaluation.
Examples:
evaluator = JsonSchemaEvaluator()
result = evaluator.evaluate_strings(
prediction='{"name": "John", "age": 30}',
reference={
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"}
}
}
)
assert result["score"] is not None
""" # noqa: E501
def __init__(self, **_: Any) -> None:
"""Initializes the JsonSchemaEvaluator.
Raises:
ImportError: If the jsonschema package is not installed.
"""
super().__init__()
try:
import jsonschema # noqa: F401
except ImportError as e:
msg = (
"The JsonSchemaEvaluator requires the jsonschema package."
" Please install it with `pip install jsonschema`."
)
raise ImportError(msg) from e
@property
def requires_input(self) -> bool:
"""Returns whether the evaluator requires input."""
return False
@property
def requires_reference(self) -> bool:
"""Returns whether the evaluator requires reference."""
return True
@property
def evaluation_name(self) -> str:
"""Returns the name of the evaluation."""
return "json_schema_validation"
def _parse_json(self, node: Any) -> dict | list | None | float | bool | int | str:
if isinstance(node, str):
return parse_json_markdown(node)
if hasattr(node, "model_json_schema") and callable(node.model_json_schema):
# Pydantic v2 model
return node.model_json_schema()
if hasattr(node, "schema") and callable(node.schema):
# Pydantic v1 model
return node.schema()
return node
def _validate(self, prediction: Any, schema: Any) -> dict:
from jsonschema import ValidationError, validate
try:
validate(instance=prediction, schema=schema)
except ValidationError as e:
return {"score": False, "reasoning": repr(e)}
return {"score": True}
@override
def _evaluate_strings(
self,
prediction: str | Any,
input: str | Any = None,
reference: str | Any = None,
**kwargs: Any,
) -> dict:
parsed_prediction = self._parse_json(prediction)
schema = self._parse_json(reference)
return self._validate(parsed_prediction, schema)