initial commit
This commit is contained in:
@@ -0,0 +1,46 @@
|
||||
"""**Docstores** are classes to store and load Documents.
|
||||
|
||||
The **Docstore** is a simplified version of the Document Loader.
|
||||
|
||||
**Class hierarchy:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
Docstore --> <name> # Examples: InMemoryDocstore, Wikipedia
|
||||
|
||||
**Main helpers:**
|
||||
|
||||
.. code-block::
|
||||
|
||||
Document, AddableMixin
|
||||
"""
|
||||
|
||||
import importlib
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_community.docstore.arbitrary_fn import (
|
||||
DocstoreFn,
|
||||
)
|
||||
from langchain_community.docstore.in_memory import (
|
||||
InMemoryDocstore,
|
||||
)
|
||||
from langchain_community.docstore.wikipedia import (
|
||||
Wikipedia,
|
||||
)
|
||||
|
||||
_module_lookup = {
|
||||
"DocstoreFn": "langchain_community.docstore.arbitrary_fn",
|
||||
"InMemoryDocstore": "langchain_community.docstore.in_memory",
|
||||
"Wikipedia": "langchain_community.docstore.wikipedia",
|
||||
}
|
||||
|
||||
|
||||
def __getattr__(name: str) -> Any:
|
||||
if name in _module_lookup:
|
||||
module = importlib.import_module(_module_lookup[name])
|
||||
return getattr(module, name)
|
||||
raise AttributeError(f"module {__name__} has no attribute {name}")
|
||||
|
||||
|
||||
__all__ = ["DocstoreFn", "InMemoryDocstore", "Wikipedia"]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,38 @@
|
||||
from typing import Callable, Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.docstore.base import Docstore
|
||||
|
||||
|
||||
class DocstoreFn(Docstore):
|
||||
"""Docstore via arbitrary lookup function.
|
||||
|
||||
This is useful when:
|
||||
* it's expensive to construct an InMemoryDocstore/dict
|
||||
* you retrieve documents from remote sources
|
||||
* you just want to reuse existing objects
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
lookup_fn: Callable[[str], Union[Document, str]],
|
||||
):
|
||||
self._lookup_fn = lookup_fn
|
||||
|
||||
def search(self, search: str) -> Document:
|
||||
"""Search for a document.
|
||||
|
||||
Args:
|
||||
search: search string
|
||||
|
||||
Returns:
|
||||
Document if found, else error message.
|
||||
"""
|
||||
r = self._lookup_fn(search)
|
||||
if isinstance(r, str):
|
||||
# NOTE: assume the search string is the source ID
|
||||
return Document(page_content=r, metadata={"source": search})
|
||||
elif isinstance(r, Document):
|
||||
return r
|
||||
raise ValueError(f"Unexpected type of document {type(r)}")
|
||||
30
venv/Lib/site-packages/langchain_community/docstore/base.py
Normal file
30
venv/Lib/site-packages/langchain_community/docstore/base.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""Interface to access to place that stores documents."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, List, Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
|
||||
class Docstore(ABC):
|
||||
"""Interface to access to place that stores documents."""
|
||||
|
||||
@abstractmethod
|
||||
def search(self, search: str) -> Union[str, Document]:
|
||||
"""Search for document.
|
||||
|
||||
If page exists, return the page summary, and a Document object.
|
||||
If page does not exist, return similar entries.
|
||||
"""
|
||||
|
||||
def delete(self, ids: List) -> None:
|
||||
"""Deleting IDs from in memory dictionary."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class AddableMixin(ABC):
|
||||
"""Mixin class that supports adding texts."""
|
||||
|
||||
@abstractmethod
|
||||
def add(self, texts: Dict[str, Document]) -> None:
|
||||
"""Add more documents."""
|
||||
@@ -0,0 +1,3 @@
|
||||
from langchain_core.documents import Document
|
||||
|
||||
__all__ = ["Document"]
|
||||
@@ -0,0 +1,51 @@
|
||||
"""Simple in memory docstore in the form of a dict."""
|
||||
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.docstore.base import AddableMixin, Docstore
|
||||
|
||||
|
||||
class InMemoryDocstore(Docstore, AddableMixin):
|
||||
"""Simple in memory docstore in the form of a dict."""
|
||||
|
||||
def __init__(self, _dict: Optional[Dict[str, Document]] = None):
|
||||
"""Initialize with dict."""
|
||||
self._dict = _dict if _dict is not None else {}
|
||||
|
||||
def add(self, texts: Dict[str, Document]) -> None:
|
||||
"""Add texts to in memory dictionary.
|
||||
|
||||
Args:
|
||||
texts: dictionary of id -> document.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
overlapping = set(texts).intersection(self._dict)
|
||||
if overlapping:
|
||||
raise ValueError(f"Tried to add ids that already exist: {overlapping}")
|
||||
self._dict = {**self._dict, **texts}
|
||||
|
||||
def delete(self, ids: List) -> None:
|
||||
"""Deleting IDs from in memory dictionary."""
|
||||
overlapping = set(ids).intersection(self._dict)
|
||||
if not overlapping:
|
||||
raise ValueError(f"Tried to delete ids that does not exist: {ids}")
|
||||
for _id in ids:
|
||||
self._dict.pop(_id)
|
||||
|
||||
def search(self, search: str) -> Union[str, Document]:
|
||||
"""Search via direct lookup.
|
||||
|
||||
Args:
|
||||
search: id of a document to search for.
|
||||
|
||||
Returns:
|
||||
Document if found, else error message.
|
||||
"""
|
||||
if search not in self._dict:
|
||||
return f"ID {search} not found."
|
||||
else:
|
||||
return self._dict[search]
|
||||
@@ -0,0 +1,46 @@
|
||||
"""Wrapper around wikipedia API."""
|
||||
|
||||
from typing import Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.docstore.base import Docstore
|
||||
|
||||
|
||||
class Wikipedia(Docstore):
|
||||
"""Wikipedia API."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Check that wikipedia package is installed."""
|
||||
try:
|
||||
import wikipedia # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import wikipedia python package. "
|
||||
"Please install it with `pip install wikipedia`."
|
||||
)
|
||||
|
||||
def search(self, search: str) -> Union[str, Document]:
|
||||
"""Try to search for wiki page.
|
||||
|
||||
If page exists, return the page summary, and a PageWithLookups object.
|
||||
If page does not exist, return similar entries.
|
||||
|
||||
Args:
|
||||
search: search string.
|
||||
|
||||
Returns: a Document object or error message.
|
||||
"""
|
||||
import wikipedia
|
||||
|
||||
try:
|
||||
page_content = wikipedia.page(search).content
|
||||
url = wikipedia.page(search).url
|
||||
result: Union[str, Document] = Document(
|
||||
page_content=page_content, metadata={"page": url}
|
||||
)
|
||||
except wikipedia.PageError:
|
||||
result = f"Could not find [{search}]. Similar: {wikipedia.search(search)}"
|
||||
except wikipedia.DisambiguationError:
|
||||
result = f"Could not find [{search}]. Similar: {wikipedia.search(search)}"
|
||||
return result
|
||||
Reference in New Issue
Block a user