initial commit

This commit is contained in:
2026-05-11 12:36:20 +05:30
commit 384cbe8019
15377 changed files with 2360544 additions and 0 deletions

View File

@@ -0,0 +1,46 @@
"""**Docstores** are classes to store and load Documents.
The **Docstore** is a simplified version of the Document Loader.
**Class hierarchy:**
.. code-block::
Docstore --> <name> # Examples: InMemoryDocstore, Wikipedia
**Main helpers:**
.. code-block::
Document, AddableMixin
"""
import importlib
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from langchain_community.docstore.arbitrary_fn import (
DocstoreFn,
)
from langchain_community.docstore.in_memory import (
InMemoryDocstore,
)
from langchain_community.docstore.wikipedia import (
Wikipedia,
)
_module_lookup = {
"DocstoreFn": "langchain_community.docstore.arbitrary_fn",
"InMemoryDocstore": "langchain_community.docstore.in_memory",
"Wikipedia": "langchain_community.docstore.wikipedia",
}
def __getattr__(name: str) -> Any:
if name in _module_lookup:
module = importlib.import_module(_module_lookup[name])
return getattr(module, name)
raise AttributeError(f"module {__name__} has no attribute {name}")
__all__ = ["DocstoreFn", "InMemoryDocstore", "Wikipedia"]

View File

@@ -0,0 +1,38 @@
from typing import Callable, Union
from langchain_core.documents import Document
from langchain_community.docstore.base import Docstore
class DocstoreFn(Docstore):
"""Docstore via arbitrary lookup function.
This is useful when:
* it's expensive to construct an InMemoryDocstore/dict
* you retrieve documents from remote sources
* you just want to reuse existing objects
"""
def __init__(
self,
lookup_fn: Callable[[str], Union[Document, str]],
):
self._lookup_fn = lookup_fn
def search(self, search: str) -> Document:
"""Search for a document.
Args:
search: search string
Returns:
Document if found, else error message.
"""
r = self._lookup_fn(search)
if isinstance(r, str):
# NOTE: assume the search string is the source ID
return Document(page_content=r, metadata={"source": search})
elif isinstance(r, Document):
return r
raise ValueError(f"Unexpected type of document {type(r)}")

View File

@@ -0,0 +1,30 @@
"""Interface to access to place that stores documents."""
from abc import ABC, abstractmethod
from typing import Dict, List, Union
from langchain_core.documents import Document
class Docstore(ABC):
"""Interface to access to place that stores documents."""
@abstractmethod
def search(self, search: str) -> Union[str, Document]:
"""Search for document.
If page exists, return the page summary, and a Document object.
If page does not exist, return similar entries.
"""
def delete(self, ids: List) -> None:
"""Deleting IDs from in memory dictionary."""
raise NotImplementedError
class AddableMixin(ABC):
"""Mixin class that supports adding texts."""
@abstractmethod
def add(self, texts: Dict[str, Document]) -> None:
"""Add more documents."""

View File

@@ -0,0 +1,3 @@
from langchain_core.documents import Document
__all__ = ["Document"]

View File

@@ -0,0 +1,51 @@
"""Simple in memory docstore in the form of a dict."""
from typing import Dict, List, Optional, Union
from langchain_core.documents import Document
from langchain_community.docstore.base import AddableMixin, Docstore
class InMemoryDocstore(Docstore, AddableMixin):
"""Simple in memory docstore in the form of a dict."""
def __init__(self, _dict: Optional[Dict[str, Document]] = None):
"""Initialize with dict."""
self._dict = _dict if _dict is not None else {}
def add(self, texts: Dict[str, Document]) -> None:
"""Add texts to in memory dictionary.
Args:
texts: dictionary of id -> document.
Returns:
None
"""
overlapping = set(texts).intersection(self._dict)
if overlapping:
raise ValueError(f"Tried to add ids that already exist: {overlapping}")
self._dict = {**self._dict, **texts}
def delete(self, ids: List) -> None:
"""Deleting IDs from in memory dictionary."""
overlapping = set(ids).intersection(self._dict)
if not overlapping:
raise ValueError(f"Tried to delete ids that does not exist: {ids}")
for _id in ids:
self._dict.pop(_id)
def search(self, search: str) -> Union[str, Document]:
"""Search via direct lookup.
Args:
search: id of a document to search for.
Returns:
Document if found, else error message.
"""
if search not in self._dict:
return f"ID {search} not found."
else:
return self._dict[search]

View File

@@ -0,0 +1,46 @@
"""Wrapper around wikipedia API."""
from typing import Union
from langchain_core.documents import Document
from langchain_community.docstore.base import Docstore
class Wikipedia(Docstore):
"""Wikipedia API."""
def __init__(self) -> None:
"""Check that wikipedia package is installed."""
try:
import wikipedia # noqa: F401
except ImportError:
raise ImportError(
"Could not import wikipedia python package. "
"Please install it with `pip install wikipedia`."
)
def search(self, search: str) -> Union[str, Document]:
"""Try to search for wiki page.
If page exists, return the page summary, and a PageWithLookups object.
If page does not exist, return similar entries.
Args:
search: search string.
Returns: a Document object or error message.
"""
import wikipedia
try:
page_content = wikipedia.page(search).content
url = wikipedia.page(search).url
result: Union[str, Document] = Document(
page_content=page_content, metadata={"page": url}
)
except wikipedia.PageError:
result = f"Could not find [{search}]. Similar: {wikipedia.search(search)}"
except wikipedia.DisambiguationError:
result = f"Could not find [{search}]. Similar: {wikipedia.search(search)}"
return result