initial commit
This commit is contained in:
@@ -0,0 +1,88 @@
|
||||
"""Util that calls bibtexparser."""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Mapping
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, model_validator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OPTIONAL_FIELDS = [
|
||||
"annotate",
|
||||
"booktitle",
|
||||
"editor",
|
||||
"howpublished",
|
||||
"journal",
|
||||
"keywords",
|
||||
"note",
|
||||
"organization",
|
||||
"publisher",
|
||||
"school",
|
||||
"series",
|
||||
"type",
|
||||
"doi",
|
||||
"issn",
|
||||
"isbn",
|
||||
]
|
||||
|
||||
|
||||
class BibtexparserWrapper(BaseModel):
|
||||
"""Wrapper around bibtexparser.
|
||||
|
||||
To use, you should have the ``bibtexparser`` python package installed.
|
||||
https://bibtexparser.readthedocs.io/en/master/
|
||||
|
||||
This wrapper will use bibtexparser to load a collection of references from
|
||||
a bibtex file and fetch document summaries.
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
extra="forbid",
|
||||
)
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_environment(cls, values: Dict) -> Any:
|
||||
"""Validate that the python package exists in environment."""
|
||||
try:
|
||||
import bibtexparser # noqa
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import bibtexparser python package. "
|
||||
"Please install it with `pip install bibtexparser`."
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
def load_bibtex_entries(self, path: str) -> List[Dict[str, Any]]:
|
||||
"""Load bibtex entries from the bibtex file at the given path."""
|
||||
import bibtexparser
|
||||
|
||||
with open(path) as file:
|
||||
entries = bibtexparser.load(file).entries
|
||||
return entries
|
||||
|
||||
def get_metadata(
|
||||
self, entry: Mapping[str, Any], load_extra: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""Get metadata for the given entry."""
|
||||
publication = entry.get("journal") or entry.get("booktitle")
|
||||
if "url" in entry:
|
||||
url = entry["url"]
|
||||
elif "doi" in entry:
|
||||
url = f"https://doi.org/{entry['doi']}"
|
||||
else:
|
||||
url = None
|
||||
meta = {
|
||||
"id": entry.get("ID"),
|
||||
"published_year": entry.get("year"),
|
||||
"title": entry.get("title"),
|
||||
"publication": publication,
|
||||
"authors": entry.get("author"),
|
||||
"abstract": entry.get("abstract"),
|
||||
"url": url,
|
||||
}
|
||||
if load_extra:
|
||||
for field in OPTIONAL_FIELDS:
|
||||
meta[field] = entry.get(field)
|
||||
return {k: v for k, v in meta.items() if v is not None}
|
||||
Reference in New Issue
Block a user