initial commit
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,181 @@
|
||||
"""Evaluators for parsing strings."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from collections.abc import Callable
|
||||
from operator import eq
|
||||
from typing import Any, cast
|
||||
|
||||
from langchain_core.utils.json import parse_json_markdown
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_classic.evaluation.schema import StringEvaluator
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class JsonValidityEvaluator(StringEvaluator):
|
||||
"""Evaluate whether the prediction is valid JSON.
|
||||
|
||||
This evaluator checks if the prediction is a valid JSON string. It does not
|
||||
require any input or reference.
|
||||
|
||||
Attributes:
|
||||
requires_input: Whether this evaluator requires an input
|
||||
string. Always False.
|
||||
requires_reference: Whether this evaluator requires a
|
||||
reference string. Always False.
|
||||
evaluation_name: The name of the evaluation metric.
|
||||
Always "json".
|
||||
|
||||
Examples:
|
||||
>>> evaluator = JsonValidityEvaluator()
|
||||
>>> prediction = '{"name": "John", "age": 30, "city": "New York"}'
|
||||
>>> evaluator.evaluate(prediction)
|
||||
{'score': 1}
|
||||
|
||||
>>> prediction = '{"name": "John", "age": 30, "city": "New York",}'
|
||||
>>> evaluator.evaluate(prediction)
|
||||
{'score': 0, 'reasoning': 'Expecting property name enclosed in double quotes'}
|
||||
"""
|
||||
|
||||
def __init__(self, **_: Any) -> None:
|
||||
"""Initialize the JsonValidityEvaluator."""
|
||||
super().__init__()
|
||||
|
||||
@property
|
||||
@override
|
||||
def requires_input(self) -> bool:
|
||||
return False
|
||||
|
||||
@property
|
||||
@override
|
||||
def requires_reference(self) -> bool:
|
||||
return False
|
||||
|
||||
@property
|
||||
@override
|
||||
def evaluation_name(self) -> str:
|
||||
return "json_validity"
|
||||
|
||||
@override
|
||||
def _evaluate_strings(
|
||||
self,
|
||||
prediction: str,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Evaluate the prediction string.
|
||||
|
||||
Args:
|
||||
prediction: The prediction string to evaluate.
|
||||
**kwargs: Additional keyword arguments (not used).
|
||||
|
||||
Returns:
|
||||
`dict` containing the evaluation score. The score is `1` if
|
||||
the prediction is valid JSON, and `0` otherwise.
|
||||
|
||||
If the prediction is not valid JSON, the dictionary also contains
|
||||
a `reasoning` field with the error message.
|
||||
|
||||
"""
|
||||
try:
|
||||
parse_json_markdown(prediction, parser=json.loads)
|
||||
except json.JSONDecodeError as e:
|
||||
return {"score": 0, "reasoning": str(e)}
|
||||
except Exception as e:
|
||||
_logger.exception("Passing JSON failed with unexpected error.")
|
||||
return {"score": 0, "reasoning": str(e)}
|
||||
return {"score": 1}
|
||||
|
||||
|
||||
class JsonEqualityEvaluator(StringEvaluator):
|
||||
"""Json Equality Evaluator.
|
||||
|
||||
Evaluate whether the prediction is equal to the reference after
|
||||
parsing both as JSON.
|
||||
|
||||
This evaluator checks if the prediction, after parsing as JSON, is equal
|
||||
to the reference,
|
||||
which is also parsed as JSON. It does not require an input string.
|
||||
|
||||
Attributes:
|
||||
requires_input: Whether this evaluator requires an
|
||||
input string. Always False.
|
||||
requires_reference: Whether this evaluator requires
|
||||
a reference string. Always True.
|
||||
evaluation_name: The name of the evaluation metric.
|
||||
Always "parsed_equality".
|
||||
|
||||
Examples:
|
||||
>>> evaluator = JsonEqualityEvaluator()
|
||||
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 1}')
|
||||
{'score': True}
|
||||
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 2}')
|
||||
{'score': False}
|
||||
|
||||
>>> evaluator = JsonEqualityEvaluator(operator=lambda x, y: x["a"] == y["a"])
|
||||
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 1}')
|
||||
{'score': True}
|
||||
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 2}')
|
||||
{'score': False}
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, operator: Callable | None = None, **_: Any) -> None:
|
||||
"""Initialize the JsonEqualityEvaluator.
|
||||
|
||||
Args:
|
||||
operator: A custom operator to compare the parsed JSON objects.
|
||||
Defaults to equality (`eq`).
|
||||
"""
|
||||
super().__init__()
|
||||
self.operator = operator or eq
|
||||
|
||||
@property
|
||||
@override
|
||||
def requires_input(self) -> bool:
|
||||
return False
|
||||
|
||||
@property
|
||||
@override
|
||||
def requires_reference(self) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
@override
|
||||
def evaluation_name(self) -> str:
|
||||
return "json_equality"
|
||||
|
||||
def _parse_json(
|
||||
self,
|
||||
string: Any,
|
||||
) -> dict | list | None | float | bool | int | str:
|
||||
if isinstance(string, str):
|
||||
return parse_json_markdown(string)
|
||||
return string
|
||||
|
||||
@override
|
||||
def _evaluate_strings(
|
||||
self,
|
||||
prediction: str,
|
||||
reference: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Evaluate the prediction string.
|
||||
|
||||
Args:
|
||||
prediction: The prediction string to evaluate.
|
||||
reference: The reference string to compare against.
|
||||
**kwargs: Additional keyword arguments (not used).
|
||||
|
||||
Returns:
|
||||
`dict` containing the evaluation score.
|
||||
"""
|
||||
parsed = self._parse_json(prediction)
|
||||
label = self._parse_json(cast("str", reference))
|
||||
if isinstance(label, list):
|
||||
if not isinstance(parsed, list):
|
||||
return {"score": 0}
|
||||
parsed = sorted(parsed, key=str)
|
||||
label = sorted(label, key=str)
|
||||
return {"score": self.operator(parsed, label)}
|
||||
@@ -0,0 +1,109 @@
|
||||
import json
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.utils.json import parse_json_markdown
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_classic.evaluation.schema import StringEvaluator
|
||||
|
||||
|
||||
class JsonEditDistanceEvaluator(StringEvaluator):
|
||||
"""An evaluator that calculates the edit distance between JSON strings.
|
||||
|
||||
This evaluator computes a normalized Damerau-Levenshtein distance between two JSON strings
|
||||
after parsing them and converting them to a canonical format (i.e., whitespace and key order are normalized).
|
||||
It can be customized with alternative distance and canonicalization functions.
|
||||
|
||||
Attributes:
|
||||
_string_distance (Callable[[str, str], float]): The internal distance computation function.
|
||||
_canonicalize (Callable[[Any], Any]): The internal canonicalization function.
|
||||
|
||||
Examples:
|
||||
>>> evaluator = JsonEditDistanceEvaluator()
|
||||
>>> result = evaluator.evaluate_strings(
|
||||
... prediction='{"a": 1, "b": 2}', reference='{"a": 1, "b": 3}'
|
||||
... )
|
||||
>>> assert result["score"] is not None
|
||||
|
||||
Raises:
|
||||
ImportError: If `rapidfuzz` is not installed and no alternative `string_distance` function is provided.
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
string_distance: Callable[[str, str], float] | None = None,
|
||||
canonicalize: Callable[[Any], Any] | None = None,
|
||||
**_: Any,
|
||||
) -> None:
|
||||
"""Initialize the JsonEditDistanceEvaluator.
|
||||
|
||||
Args:
|
||||
string_distance: A callable that computes the distance between two strings.
|
||||
If not provided, a Damerau-Levenshtein distance from the `rapidfuzz`
|
||||
package will be used.
|
||||
canonicalize: A callable that converts a parsed JSON object into its
|
||||
canonical string form.
|
||||
If not provided, the default behavior is to serialize the JSON with
|
||||
sorted keys and no extra whitespace.
|
||||
|
||||
Raises:
|
||||
ImportError: If the `rapidfuzz` package is not installed and no
|
||||
`string_distance` function is provided.
|
||||
"""
|
||||
super().__init__()
|
||||
if string_distance is not None:
|
||||
self._string_distance = string_distance
|
||||
else:
|
||||
try:
|
||||
from rapidfuzz import distance as rfd
|
||||
except ImportError as e:
|
||||
msg = (
|
||||
"The default string_distance operator for the "
|
||||
" JsonEditDistanceEvaluator requires installation of "
|
||||
"the rapidfuzz package. "
|
||||
"Please install it with `pip install rapidfuzz`."
|
||||
)
|
||||
raise ImportError(msg) from e
|
||||
self._string_distance = rfd.DamerauLevenshtein.normalized_distance
|
||||
if canonicalize is not None:
|
||||
self._canonicalize = canonicalize
|
||||
else:
|
||||
self._canonicalize = lambda x: json.dumps(
|
||||
x,
|
||||
separators=(",", ":"),
|
||||
sort_keys=True, # eliminate whitespace
|
||||
)
|
||||
|
||||
@property
|
||||
@override
|
||||
def requires_input(self) -> bool:
|
||||
return False
|
||||
|
||||
@property
|
||||
@override
|
||||
def requires_reference(self) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
@override
|
||||
def evaluation_name(self) -> str:
|
||||
return "json_edit_distance"
|
||||
|
||||
def _parse_json(self, node: Any) -> dict | list | None | float | bool | int | str:
|
||||
if isinstance(node, str):
|
||||
return parse_json_markdown(node)
|
||||
return node
|
||||
|
||||
@override
|
||||
def _evaluate_strings(
|
||||
self,
|
||||
prediction: str,
|
||||
reference: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
parsed = self._canonicalize(self._parse_json(prediction))
|
||||
label = self._canonicalize(self._parse_json(reference))
|
||||
distance = self._string_distance(parsed, label)
|
||||
return {"score": distance}
|
||||
@@ -0,0 +1,97 @@
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.utils.json import parse_json_markdown
|
||||
from typing_extensions import override
|
||||
|
||||
from langchain_classic.evaluation.schema import StringEvaluator
|
||||
|
||||
|
||||
class JsonSchemaEvaluator(StringEvaluator):
|
||||
"""An evaluator that validates a JSON prediction against a JSON schema reference.
|
||||
|
||||
This evaluator checks if a given JSON prediction conforms to the provided JSON schema.
|
||||
If the prediction is valid, the score is True (no errors). Otherwise, the score is False (error occurred).
|
||||
|
||||
Attributes:
|
||||
requires_input: Whether the evaluator requires input.
|
||||
requires_reference: Whether the evaluator requires reference.
|
||||
evaluation_name: The name of the evaluation.
|
||||
|
||||
Examples:
|
||||
evaluator = JsonSchemaEvaluator()
|
||||
result = evaluator.evaluate_strings(
|
||||
prediction='{"name": "John", "age": 30}',
|
||||
reference={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"age": {"type": "integer"}
|
||||
}
|
||||
}
|
||||
)
|
||||
assert result["score"] is not None
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
def __init__(self, **_: Any) -> None:
|
||||
"""Initializes the JsonSchemaEvaluator.
|
||||
|
||||
Raises:
|
||||
ImportError: If the jsonschema package is not installed.
|
||||
"""
|
||||
super().__init__()
|
||||
try:
|
||||
import jsonschema # noqa: F401
|
||||
except ImportError as e:
|
||||
msg = (
|
||||
"The JsonSchemaEvaluator requires the jsonschema package."
|
||||
" Please install it with `pip install jsonschema`."
|
||||
)
|
||||
raise ImportError(msg) from e
|
||||
|
||||
@property
|
||||
def requires_input(self) -> bool:
|
||||
"""Returns whether the evaluator requires input."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def requires_reference(self) -> bool:
|
||||
"""Returns whether the evaluator requires reference."""
|
||||
return True
|
||||
|
||||
@property
|
||||
def evaluation_name(self) -> str:
|
||||
"""Returns the name of the evaluation."""
|
||||
return "json_schema_validation"
|
||||
|
||||
def _parse_json(self, node: Any) -> dict | list | None | float | bool | int | str:
|
||||
if isinstance(node, str):
|
||||
return parse_json_markdown(node)
|
||||
if hasattr(node, "model_json_schema") and callable(node.model_json_schema):
|
||||
# Pydantic v2 model
|
||||
return node.model_json_schema()
|
||||
if hasattr(node, "schema") and callable(node.schema):
|
||||
# Pydantic v1 model
|
||||
return node.schema()
|
||||
return node
|
||||
|
||||
def _validate(self, prediction: Any, schema: Any) -> dict:
|
||||
from jsonschema import ValidationError, validate
|
||||
|
||||
try:
|
||||
validate(instance=prediction, schema=schema)
|
||||
except ValidationError as e:
|
||||
return {"score": False, "reasoning": repr(e)}
|
||||
return {"score": True}
|
||||
|
||||
@override
|
||||
def _evaluate_strings(
|
||||
self,
|
||||
prediction: str | Any,
|
||||
input: str | Any = None,
|
||||
reference: str | Any = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
parsed_prediction = self._parse_json(prediction)
|
||||
schema = self._parse_json(reference)
|
||||
return self._validate(parsed_prediction, schema)
|
||||
Reference in New Issue
Block a user