initial commit

This commit is contained in:
2026-05-11 12:36:20 +05:30
commit 384cbe8019
15377 changed files with 2360544 additions and 0 deletions

View File

@@ -0,0 +1,57 @@
"""Implementations of key-value stores and storage helpers.
Module provides implementations of various key-value stores that conform
to a simple key-value interface.
The primary goal of these storages is to support implementation of caching.
"""
from typing import TYPE_CHECKING, Any
from langchain_core.stores import (
InMemoryByteStore,
InMemoryStore,
InvalidKeyException,
)
from langchain_classic._api import create_importer
from langchain_classic.storage._lc_store import create_kv_docstore, create_lc_store
from langchain_classic.storage.encoder_backed import EncoderBackedStore
from langchain_classic.storage.file_system import LocalFileStore
if TYPE_CHECKING:
from langchain_community.storage import (
RedisStore,
UpstashRedisByteStore,
UpstashRedisStore,
)
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"RedisStore": "langchain_community.storage",
"UpstashRedisByteStore": "langchain_community.storage",
"UpstashRedisStore": "langchain_community.storage",
}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"EncoderBackedStore",
"InMemoryByteStore",
"InMemoryStore",
"InvalidKeyException",
"LocalFileStore",
"RedisStore",
"UpstashRedisByteStore",
"UpstashRedisStore",
"create_kv_docstore",
"create_lc_store",
]

View File

@@ -0,0 +1,92 @@
"""Create a key-value store for any langchain serializable object."""
from collections.abc import Callable
from typing import Any
from langchain_core.documents import Document
from langchain_core.load import Serializable, dumps, loads
from langchain_core.stores import BaseStore, ByteStore
from langchain_classic.storage.encoder_backed import EncoderBackedStore
def _dump_as_bytes(obj: Serializable) -> bytes:
"""Return a bytes representation of a `Document`."""
return dumps(obj).encode("utf-8")
def _dump_document_as_bytes(obj: Any) -> bytes:
"""Return a bytes representation of a `Document`."""
if not isinstance(obj, Document):
msg = "Expected a Document instance"
raise TypeError(msg)
return dumps(obj).encode("utf-8")
def _load_document_from_bytes(serialized: bytes) -> Document:
"""Return a document from a bytes representation."""
obj = loads(serialized.decode("utf-8"))
if not isinstance(obj, Document):
msg = f"Expected a Document instance. Got {type(obj)}"
raise TypeError(msg)
return obj
def _load_from_bytes(serialized: bytes) -> Serializable:
"""Return a document from a bytes representation."""
return loads(serialized.decode("utf-8"))
def _identity(x: str) -> str:
"""Return the same object."""
return x
# PUBLIC API
def create_lc_store(
store: ByteStore,
*,
key_encoder: Callable[[str], str] | None = None,
) -> BaseStore[str, Serializable]:
"""Create a store for LangChain serializable objects from a bytes store.
Args:
store: A bytes store to use as the underlying store.
key_encoder: A function to encode keys; if `None` uses identity function.
Returns:
A key-value store for `Document` objects.
"""
return EncoderBackedStore(
store,
key_encoder or _identity,
_dump_as_bytes,
_load_from_bytes,
)
def create_kv_docstore(
store: ByteStore,
*,
key_encoder: Callable[[str], str] | None = None,
) -> BaseStore[str, Document]:
"""Create a store for langchain `Document` objects from a bytes store.
This store does run time type checking to ensure that the values are
`Document` objects.
Args:
store: A bytes store to use as the underlying store.
key_encoder: A function to encode keys; if `None`, uses identity function.
Returns:
A key-value store for `Document` objects.
"""
return EncoderBackedStore(
store,
key_encoder or _identity,
_dump_document_as_bytes,
_load_document_from_bytes,
)

View File

@@ -0,0 +1,181 @@
from collections.abc import AsyncIterator, Callable, Iterator, Sequence
from typing import (
Any,
TypeVar,
)
from langchain_core.stores import BaseStore
K = TypeVar("K")
V = TypeVar("V")
class EncoderBackedStore(BaseStore[K, V]):
"""Wraps a store with key and value encoders/decoders.
Examples that uses JSON for encoding/decoding:
```python
import json
def key_encoder(key: int) -> str:
return json.dumps(key)
def value_serializer(value: float) -> str:
return json.dumps(value)
def value_deserializer(serialized_value: str) -> float:
return json.loads(serialized_value)
# Create an instance of the abstract store
abstract_store = MyCustomStore()
# Create an instance of the encoder-backed store
store = EncoderBackedStore(
store=abstract_store,
key_encoder=key_encoder,
value_serializer=value_serializer,
value_deserializer=value_deserializer,
)
# Use the encoder-backed store methods
store.mset([(1, 3.14), (2, 2.718)])
values = store.mget([1, 2]) # Retrieves [3.14, 2.718]
store.mdelete([1, 2]) # Deletes the keys 1 and 2
```
"""
def __init__(
self,
store: BaseStore[str, Any],
key_encoder: Callable[[K], str],
value_serializer: Callable[[V], bytes],
value_deserializer: Callable[[Any], V],
) -> None:
"""Initialize an `EncodedStore`.
Args:
store: The underlying byte store to wrap.
key_encoder: Function to encode keys from type `K` to strings.
value_serializer: Function to serialize values from type `V` to bytes.
value_deserializer: Function to deserialize bytes back to type V.
"""
self.store = store
self.key_encoder = key_encoder
self.value_serializer = value_serializer
self.value_deserializer = value_deserializer
def mget(self, keys: Sequence[K]) -> list[V | None]:
"""Get the values associated with the given keys.
Args:
keys: A sequence of keys.
Returns:
A sequence of optional values associated with the keys.
If a key is not found, the corresponding value will be `None`.
"""
encoded_keys: list[str] = [self.key_encoder(key) for key in keys]
values = self.store.mget(encoded_keys)
return [
self.value_deserializer(value) if value is not None else value
for value in values
]
async def amget(self, keys: Sequence[K]) -> list[V | None]:
"""Async get the values associated with the given keys.
Args:
keys: A sequence of keys.
Returns:
A sequence of optional values associated with the keys.
If a key is not found, the corresponding value will be `None`.
"""
encoded_keys: list[str] = [self.key_encoder(key) for key in keys]
values = await self.store.amget(encoded_keys)
return [
self.value_deserializer(value) if value is not None else value
for value in values
]
def mset(self, key_value_pairs: Sequence[tuple[K, V]]) -> None:
"""Set the values for the given keys.
Args:
key_value_pairs: A sequence of key-value pairs.
"""
encoded_pairs = [
(self.key_encoder(key), self.value_serializer(value))
for key, value in key_value_pairs
]
self.store.mset(encoded_pairs)
async def amset(self, key_value_pairs: Sequence[tuple[K, V]]) -> None:
"""Async set the values for the given keys.
Args:
key_value_pairs: A sequence of key-value pairs.
"""
encoded_pairs = [
(self.key_encoder(key), self.value_serializer(value))
for key, value in key_value_pairs
]
await self.store.amset(encoded_pairs)
def mdelete(self, keys: Sequence[K]) -> None:
"""Delete the given keys and their associated values.
Args:
keys: A sequence of keys to delete.
"""
encoded_keys = [self.key_encoder(key) for key in keys]
self.store.mdelete(encoded_keys)
async def amdelete(self, keys: Sequence[K]) -> None:
"""Async delete the given keys and their associated values.
Args:
keys: A sequence of keys to delete.
"""
encoded_keys = [self.key_encoder(key) for key in keys]
await self.store.amdelete(encoded_keys)
def yield_keys(
self,
*,
prefix: str | None = None,
) -> Iterator[K] | Iterator[str]:
"""Get an iterator over keys that match the given prefix.
Args:
prefix: The prefix to match.
Yields:
Keys that match the given prefix.
"""
# For the time being this does not return K, but str
# it's for debugging purposes. Should fix this.
yield from self.store.yield_keys(prefix=prefix)
async def ayield_keys(
self,
*,
prefix: str | None = None,
) -> AsyncIterator[K] | AsyncIterator[str]:
"""Async get an iterator over keys that match the given prefix.
Args:
prefix: The prefix to match.
Yields:
Keys that match the given prefix.
"""
# For the time being this does not return K, but str
# it's for debugging purposes. Should fix this.
async for key in self.store.ayield_keys(prefix=prefix):
yield key

View File

@@ -0,0 +1,3 @@
from langchain_core.stores import InvalidKeyException
__all__ = ["InvalidKeyException"]

View File

@@ -0,0 +1,164 @@
import os
import re
import time
from collections.abc import Iterator, Sequence
from pathlib import Path
from langchain_core.stores import ByteStore
from langchain_classic.storage.exceptions import InvalidKeyException
class LocalFileStore(ByteStore):
"""`BaseStore` interface that works on the local file system.
Examples:
Create a `LocalFileStore` instance and perform operations on it:
```python
from langchain_classic.storage import LocalFileStore
# Instantiate the LocalFileStore with the root path
file_store = LocalFileStore("/path/to/root")
# Set values for keys
file_store.mset([("key1", b"value1"), ("key2", b"value2")])
# Get values for keys
values = file_store.mget(["key1", "key2"]) # Returns [b"value1", b"value2"]
# Delete keys
file_store.mdelete(["key1"])
# Iterate over keys
for key in file_store.yield_keys():
print(key) # noqa: T201
```
"""
def __init__(
self,
root_path: str | Path,
*,
chmod_file: int | None = None,
chmod_dir: int | None = None,
update_atime: bool = False,
) -> None:
"""Implement the `BaseStore` interface for the local file system.
Args:
root_path: The root path of the file store. All keys are interpreted as
paths relative to this root.
chmod_file: Sets permissions for newly created files, overriding the
current `umask` if needed.
chmod_dir: Sets permissions for newly created dirs, overriding the
current `umask` if needed.
update_atime: Updates the filesystem access time (but not the modified
time) when a file is read. This allows MRU/LRU cache policies to be
implemented for filesystems where access time updates are disabled.
"""
self.root_path = Path(root_path).absolute()
self.chmod_file = chmod_file
self.chmod_dir = chmod_dir
self.update_atime = update_atime
def _get_full_path(self, key: str) -> Path:
"""Get the full path for a given key relative to the root path.
Args:
key: The key relative to the root path.
Returns:
The full path for the given key.
"""
if not re.match(r"^[a-zA-Z0-9_.\-/]+$", key):
msg = f"Invalid characters in key: {key}"
raise InvalidKeyException(msg)
full_path = (self.root_path / key).resolve()
root_path = self.root_path.resolve()
common_path = os.path.commonpath([root_path, full_path])
if common_path != str(root_path):
msg = (
f"Invalid key: {key}. Key should be relative to the full path. "
f"{root_path} vs. {common_path} and full path of {full_path}"
)
raise InvalidKeyException(msg)
return full_path
def _mkdir_for_store(self, dir_path: Path) -> None:
"""Makes a store directory path (including parents) with specified permissions.
This is needed because `Path.mkdir()` is restricted by the current `umask`,
whereas the explicit `os.chmod()` used here is not.
Args:
dir_path: The store directory to make.
"""
if not dir_path.exists():
self._mkdir_for_store(dir_path.parent)
dir_path.mkdir(exist_ok=True)
if self.chmod_dir is not None:
dir_path.chmod(self.chmod_dir)
def mget(self, keys: Sequence[str]) -> list[bytes | None]:
"""Get the values associated with the given keys.
Args:
keys: A sequence of keys.
Returns:
A sequence of optional values associated with the keys.
If a key is not found, the corresponding value will be `None`.
"""
values: list[bytes | None] = []
for key in keys:
full_path = self._get_full_path(key)
if full_path.exists():
value = full_path.read_bytes()
values.append(value)
if self.update_atime:
# update access time only; preserve modified time
os.utime(full_path, (time.time(), full_path.stat().st_mtime))
else:
values.append(None)
return values
def mset(self, key_value_pairs: Sequence[tuple[str, bytes]]) -> None:
"""Set the values for the given keys.
Args:
key_value_pairs: A sequence of key-value pairs.
"""
for key, value in key_value_pairs:
full_path = self._get_full_path(key)
self._mkdir_for_store(full_path.parent)
full_path.write_bytes(value)
if self.chmod_file is not None:
full_path.chmod(self.chmod_file)
def mdelete(self, keys: Sequence[str]) -> None:
"""Delete the given keys and their associated values.
Args:
keys: A sequence of keys to delete.
"""
for key in keys:
full_path = self._get_full_path(key)
if full_path.exists():
full_path.unlink()
def yield_keys(self, *, prefix: str | None = None) -> Iterator[str]:
"""Get an iterator over keys that match the given prefix.
Args:
prefix: The prefix to match.
Yields:
Keys that match the given prefix.
"""
prefix_path = self._get_full_path(prefix) if prefix else self.root_path
for file in prefix_path.rglob("*"):
if file.is_file():
relative_path = file.relative_to(self.root_path)
yield str(relative_path)

View File

@@ -0,0 +1,13 @@
"""In memory store that is not thread safe and has no eviction policy.
This is a simple implementation of the BaseStore using a dictionary that is useful
primarily for unit testing purposes.
"""
from langchain_core.stores import InMemoryBaseStore, InMemoryByteStore, InMemoryStore
__all__ = [
"InMemoryBaseStore",
"InMemoryByteStore",
"InMemoryStore",
]

View File

@@ -0,0 +1,23 @@
from typing import TYPE_CHECKING, Any
from langchain_classic._api import create_importer
if TYPE_CHECKING:
from langchain_community.storage import RedisStore
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {"RedisStore": "langchain_community.storage"}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"RedisStore",
]

View File

@@ -0,0 +1,27 @@
from typing import TYPE_CHECKING, Any
from langchain_classic._api import create_importer
if TYPE_CHECKING:
from langchain_community.storage import UpstashRedisByteStore, UpstashRedisStore
# Create a way to dynamically look up deprecated imports.
# Used to consolidate logic for raising deprecation warnings and
# handling optional imports.
DEPRECATED_LOOKUP = {
"UpstashRedisStore": "langchain_community.storage",
"UpstashRedisByteStore": "langchain_community.storage",
}
_import_attribute = create_importer(__package__, deprecated_lookups=DEPRECATED_LOOKUP)
def __getattr__(name: str) -> Any:
"""Look up attributes dynamically."""
return _import_attribute(name)
__all__ = [
"UpstashRedisByteStore",
"UpstashRedisStore",
]