initial commit
This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
"""Edenai Tools."""
|
||||
|
||||
from langchain_community.tools.edenai.audio_speech_to_text import (
|
||||
EdenAiSpeechToTextTool,
|
||||
)
|
||||
from langchain_community.tools.edenai.audio_text_to_speech import (
|
||||
EdenAiTextToSpeechTool,
|
||||
)
|
||||
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
|
||||
from langchain_community.tools.edenai.image_explicitcontent import (
|
||||
EdenAiExplicitImageTool,
|
||||
)
|
||||
from langchain_community.tools.edenai.image_objectdetection import (
|
||||
EdenAiObjectDetectionTool,
|
||||
)
|
||||
from langchain_community.tools.edenai.ocr_identityparser import (
|
||||
EdenAiParsingIDTool,
|
||||
)
|
||||
from langchain_community.tools.edenai.ocr_invoiceparser import (
|
||||
EdenAiParsingInvoiceTool,
|
||||
)
|
||||
from langchain_community.tools.edenai.text_moderation import (
|
||||
EdenAiTextModerationTool,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"EdenAiExplicitImageTool",
|
||||
"EdenAiObjectDetectionTool",
|
||||
"EdenAiParsingIDTool",
|
||||
"EdenAiParsingInvoiceTool",
|
||||
"EdenAiTextToSpeechTool",
|
||||
"EdenAiSpeechToTextTool",
|
||||
"EdenAiTextModerationTool",
|
||||
"EdenaiTool",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,105 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import List, Optional, Type
|
||||
|
||||
import requests
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from pydantic import BaseModel, Field, HttpUrl, validator
|
||||
|
||||
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SpeechToTextInput(BaseModel):
|
||||
query: HttpUrl = Field(description="url of the audio to analyze")
|
||||
|
||||
|
||||
class EdenAiSpeechToTextTool(EdenaiTool):
|
||||
"""Tool that queries the Eden AI Speech To Text API.
|
||||
|
||||
for api reference check edenai documentation:
|
||||
https://app.edenai.run/bricks/speech/asynchronous-speech-to-text.
|
||||
|
||||
To use, you should have
|
||||
the environment variable ``EDENAI_API_KEY`` set with your API token.
|
||||
You can find your token here: https://app.edenai.run/admin/account/settings
|
||||
"""
|
||||
|
||||
name: str = "edenai_speech_to_text"
|
||||
description: str = (
|
||||
"A wrapper around edenai Services speech to text "
|
||||
"Useful for when you have to convert audio to text."
|
||||
"Input should be a url to an audio file."
|
||||
)
|
||||
args_schema: Type[BaseModel] = SpeechToTextInput
|
||||
is_async: bool = True
|
||||
|
||||
language: Optional[str] = "en"
|
||||
speakers: Optional[int]
|
||||
profanity_filter: bool = False
|
||||
custom_vocabulary: Optional[List[str]]
|
||||
|
||||
feature: str = "audio"
|
||||
subfeature: str = "speech_to_text_async"
|
||||
base_url: str = "https://api.edenai.run/v2/audio/speech_to_text_async/"
|
||||
|
||||
@validator("providers")
|
||||
def check_only_one_provider_selected(cls, v: List[str]) -> List[str]:
|
||||
"""
|
||||
This tool has no feature to combine providers results.
|
||||
Therefore we only allow one provider
|
||||
"""
|
||||
if len(v) > 1:
|
||||
raise ValueError(
|
||||
"Please select only one provider. "
|
||||
"The feature to combine providers results is not available "
|
||||
"for this tool."
|
||||
)
|
||||
return v
|
||||
|
||||
def _wait_processing(self, url: str) -> requests.Response:
|
||||
for _ in range(10):
|
||||
time.sleep(1)
|
||||
audio_analysis_result = self._get_edenai(url)
|
||||
temp = audio_analysis_result.json()
|
||||
if temp["status"] == "finished":
|
||||
if temp["results"][self.providers[0]]["error"] is not None:
|
||||
raise Exception(
|
||||
f"""EdenAI returned an unexpected response
|
||||
{temp["results"][self.providers[0]]["error"]}"""
|
||||
)
|
||||
else:
|
||||
return audio_analysis_result
|
||||
|
||||
raise Exception("Edenai speech to text job id processing Timed out")
|
||||
|
||||
def _parse_response(self, response: dict) -> str:
|
||||
return response["public_id"]
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
all_params = {
|
||||
"file_url": query,
|
||||
"language": self.language,
|
||||
"speakers": self.speakers,
|
||||
"profanity_filter": self.profanity_filter,
|
||||
"custom_vocabulary": self.custom_vocabulary,
|
||||
}
|
||||
|
||||
# filter so we don't send val to api when val is `None
|
||||
query_params = {k: v for k, v in all_params.items() if v is not None}
|
||||
|
||||
job_id = self._call_eden_ai(query_params)
|
||||
url = self.base_url + job_id
|
||||
audio_analysis_result = self._wait_processing(url)
|
||||
result = audio_analysis_result.text
|
||||
formatted_text = json.loads(result)
|
||||
return formatted_text["results"][self.providers[0]]["text"]
|
||||
@@ -0,0 +1,122 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Literal, Optional, Type
|
||||
|
||||
import requests
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from pydantic import BaseModel, Field, model_validator, validator
|
||||
|
||||
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TextToSpeechInput(BaseModel):
|
||||
query: str = Field(description="text to generate audio from")
|
||||
|
||||
|
||||
class EdenAiTextToSpeechTool(EdenaiTool):
|
||||
"""Tool that queries the Eden AI Text to speech API.
|
||||
for api reference check edenai documentation:
|
||||
https://docs.edenai.co/reference/audio_text_to_speech_create.
|
||||
|
||||
To use, you should have
|
||||
the environment variable ``EDENAI_API_KEY`` set with your API token.
|
||||
You can find your token here: https://app.edenai.run/admin/account/settings
|
||||
|
||||
"""
|
||||
|
||||
name: str = "edenai_text_to_speech"
|
||||
description: str = (
|
||||
"A wrapper around edenai Services text to speech."
|
||||
"Useful for when you need to convert text to speech."
|
||||
"""the output is a string representing the URL of the audio file,
|
||||
or the path to the downloaded wav file """
|
||||
)
|
||||
args_schema: Type[BaseModel] = TextToSpeechInput
|
||||
|
||||
language: Optional[str] = "en"
|
||||
"""
|
||||
language of the text passed to the model.
|
||||
"""
|
||||
|
||||
# optional params see api documentation for more info
|
||||
return_type: Literal["url", "wav"] = "url"
|
||||
rate: Optional[int] = None
|
||||
pitch: Optional[int] = None
|
||||
volume: Optional[int] = None
|
||||
audio_format: Optional[str] = None
|
||||
sampling_rate: Optional[int] = None
|
||||
voice_models: Dict[str, str] = Field(default_factory=dict)
|
||||
|
||||
voice: Literal["MALE", "FEMALE"]
|
||||
"""voice option : 'MALE' or 'FEMALE' """
|
||||
|
||||
feature: str = "audio"
|
||||
subfeature: str = "text_to_speech"
|
||||
|
||||
@validator("providers")
|
||||
def check_only_one_provider_selected(cls, v: List[str]) -> List[str]:
|
||||
"""
|
||||
This tool has no feature to combine providers results.
|
||||
Therefore we only allow one provider
|
||||
"""
|
||||
if len(v) > 1:
|
||||
raise ValueError(
|
||||
"Please select only one provider. "
|
||||
"The feature to combine providers results is not available "
|
||||
"for this tool."
|
||||
)
|
||||
return v
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_voice_models_key_is_provider_name(cls, values: dict) -> Any:
|
||||
for key in values.get("voice_models", {}).keys():
|
||||
if key not in values.get("providers", []):
|
||||
raise ValueError(
|
||||
"voice_model should be formatted like this "
|
||||
"{<provider_name>: <its_voice_model>}"
|
||||
)
|
||||
return values
|
||||
|
||||
def _download_wav(self, url: str, save_path: str) -> None:
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
with open(save_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
else:
|
||||
raise ValueError("Error while downloading wav file")
|
||||
|
||||
def _parse_response(self, response: list) -> str:
|
||||
result = response[0]
|
||||
if self.return_type == "url":
|
||||
return result["audio_resource_url"]
|
||||
else:
|
||||
self._download_wav(result["audio_resource_url"], "audio.wav")
|
||||
return "audio.wav"
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
all_params = {
|
||||
"text": query,
|
||||
"language": self.language,
|
||||
"option": self.voice,
|
||||
"return_type": self.return_type,
|
||||
"rate": self.rate,
|
||||
"pitch": self.pitch,
|
||||
"volume": self.volume,
|
||||
"audio_format": self.audio_format,
|
||||
"sampling_rate": self.sampling_rate,
|
||||
"settings": self.voice_models,
|
||||
}
|
||||
|
||||
# filter so we don't send val to api when val is `None
|
||||
query_params = {k: v for k, v in all_params.items() if v is not None}
|
||||
|
||||
return self._call_eden_ai(query_params)
|
||||
@@ -0,0 +1,150 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from abc import abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from langchain_core.tools import BaseTool
|
||||
from langchain_core.utils import secret_from_env
|
||||
from pydantic import Field, SecretStr
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EdenaiTool(BaseTool):
|
||||
"""
|
||||
the base tool for all the EdenAI Tools .
|
||||
you should have
|
||||
the environment variable ``EDENAI_API_KEY`` set with your API token.
|
||||
You can find your token here: https://app.edenai.run/admin/account/settings
|
||||
"""
|
||||
|
||||
feature: str
|
||||
subfeature: str
|
||||
edenai_api_key: Optional[SecretStr] = Field(
|
||||
default_factory=secret_from_env("EDENAI_API_KEY", default=None)
|
||||
)
|
||||
is_async: bool = False
|
||||
|
||||
providers: List[str]
|
||||
"""provider to use for the API call."""
|
||||
|
||||
@staticmethod
|
||||
def get_user_agent() -> str:
|
||||
from langchain_community import __version__
|
||||
|
||||
return f"langchain/{__version__}"
|
||||
|
||||
def _call_eden_ai(self, query_params: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Make an API call to the EdenAI service with the specified query parameters.
|
||||
|
||||
Args:
|
||||
query_params (dict): The parameters to include in the API call.
|
||||
|
||||
Returns:
|
||||
requests.Response: The response from the EdenAI API call.
|
||||
|
||||
"""
|
||||
api_key = self.edenai_api_key.get_secret_value() if self.edenai_api_key else ""
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"User-Agent": self.get_user_agent(),
|
||||
}
|
||||
|
||||
url = f"https://api.edenai.run/v2/{self.feature}/{self.subfeature}"
|
||||
|
||||
payload = {
|
||||
"providers": str(self.providers),
|
||||
"response_as_dict": False,
|
||||
"attributes_as_list": True,
|
||||
"show_original_response": False,
|
||||
}
|
||||
|
||||
payload.update(query_params)
|
||||
|
||||
response = requests.post(url, json=payload, headers=headers)
|
||||
|
||||
self._raise_on_error(response)
|
||||
|
||||
try:
|
||||
return self._parse_response(response.json())
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"An error occurred while running tool: {e}")
|
||||
|
||||
def _raise_on_error(self, response: requests.Response) -> None:
|
||||
if response.status_code >= 500:
|
||||
raise Exception(f"EdenAI Server: Error {response.status_code}")
|
||||
elif response.status_code >= 400:
|
||||
raise ValueError(f"EdenAI received an invalid payload: {response.text}")
|
||||
elif response.status_code != 200:
|
||||
raise Exception(
|
||||
f"EdenAI returned an unexpected response with status "
|
||||
f"{response.status_code}: {response.text}"
|
||||
)
|
||||
|
||||
# case where edenai call succeeded but provider returned an error
|
||||
# (eg: rate limit, server error, etc.)
|
||||
if self.is_async is False:
|
||||
# async call are different and only return a job_id,
|
||||
# not the provider response directly
|
||||
provider_response = response.json()[0]
|
||||
if provider_response.get("status") == "fail":
|
||||
err_msg = provider_response["error"]["message"]
|
||||
raise ValueError(err_msg)
|
||||
|
||||
@abstractmethod
|
||||
def _run(
|
||||
self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None
|
||||
) -> str:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _parse_response(self, response: Any) -> str:
|
||||
"""Take a dict response and condense it's data in a human readable string"""
|
||||
pass
|
||||
|
||||
def _get_edenai(self, url: str) -> requests.Response:
|
||||
headers = {
|
||||
"accept": "application/json",
|
||||
"authorization": f"Bearer {self.edenai_api_key}",
|
||||
"User-Agent": self.get_user_agent(),
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
self._raise_on_error(response)
|
||||
|
||||
return response
|
||||
|
||||
def _parse_json_multilevel(
|
||||
self, extracted_data: dict, formatted_list: list, level: int = 0
|
||||
) -> None:
|
||||
for section, subsections in extracted_data.items():
|
||||
indentation = " " * level
|
||||
if isinstance(subsections, str):
|
||||
subsections = subsections.replace("\n", ",")
|
||||
formatted_list.append(f"{indentation}{section} : {subsections}")
|
||||
|
||||
elif isinstance(subsections, list):
|
||||
formatted_list.append(f"{indentation}{section} : ")
|
||||
self._list_handling(subsections, formatted_list, level + 1)
|
||||
|
||||
elif isinstance(subsections, dict):
|
||||
formatted_list.append(f"{indentation}{section} : ")
|
||||
self._parse_json_multilevel(subsections, formatted_list, level + 1)
|
||||
|
||||
def _list_handling(
|
||||
self, subsection_list: list, formatted_list: list, level: int
|
||||
) -> None:
|
||||
for list_item in subsection_list:
|
||||
if isinstance(list_item, dict):
|
||||
self._parse_json_multilevel(list_item, formatted_list, level)
|
||||
|
||||
elif isinstance(list_item, list):
|
||||
self._list_handling(list_item, formatted_list, level + 1)
|
||||
|
||||
else:
|
||||
formatted_list.append(f"{' ' * level}{list_item}")
|
||||
@@ -0,0 +1,73 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Optional, Type
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExplicitImageInput(BaseModel):
|
||||
query: HttpUrl = Field(description="url of the image to analyze")
|
||||
|
||||
|
||||
class EdenAiExplicitImageTool(EdenaiTool):
|
||||
"""Tool that queries the Eden AI Explicit image detection.
|
||||
|
||||
for api reference check edenai documentation:
|
||||
https://docs.edenai.co/reference/image_explicit_content_create.
|
||||
|
||||
To use, you should have
|
||||
the environment variable ``EDENAI_API_KEY`` set with your API token.
|
||||
You can find your token here: https://app.edenai.run/admin/account/settings
|
||||
|
||||
"""
|
||||
|
||||
name: str = "edenai_image_explicit_content_detection"
|
||||
|
||||
description: str = (
|
||||
"A wrapper around edenai Services Explicit image detection. "
|
||||
"""Useful for when you have to extract Explicit Content from images.
|
||||
it detects adult only content in images,
|
||||
that is generally inappropriate for people under
|
||||
the age of 18 and includes nudity, sexual activity,
|
||||
pornography, violence, gore content, etc."""
|
||||
"Input should be the string url of the image ."
|
||||
)
|
||||
args_schema: Type[BaseModel] = ExplicitImageInput
|
||||
|
||||
combine_available: bool = True
|
||||
feature: str = "image"
|
||||
subfeature: str = "explicit_content"
|
||||
|
||||
def _parse_json(self, json_data: dict) -> str:
|
||||
result_str = f"nsfw_likelihood: {json_data['nsfw_likelihood']}\n"
|
||||
for idx, found_obj in enumerate(json_data["items"]):
|
||||
label = found_obj["label"].lower()
|
||||
likelihood = found_obj["likelihood"]
|
||||
result_str += f"{idx}: {label} likelihood {likelihood},\n"
|
||||
|
||||
return result_str[:-2]
|
||||
|
||||
def _parse_response(self, json_data: list) -> str:
|
||||
if len(json_data) == 1:
|
||||
result = self._parse_json(json_data[0])
|
||||
else:
|
||||
for entry in json_data:
|
||||
if entry.get("provider") == "eden-ai":
|
||||
result = self._parse_json(entry)
|
||||
|
||||
return result
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
query_params = {"file_url": query, "attributes_as_list": False}
|
||||
return self._call_eden_ai(query_params)
|
||||
@@ -0,0 +1,87 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Optional, Type
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ObjectDetectionInput(BaseModel):
|
||||
query: HttpUrl = Field(description="url of the image to analyze")
|
||||
|
||||
|
||||
class EdenAiObjectDetectionTool(EdenaiTool):
|
||||
"""Tool that queries the Eden AI Object detection API.
|
||||
|
||||
for api reference check edenai documentation:
|
||||
https://docs.edenai.co/reference/image_object_detection_create.
|
||||
|
||||
To use, you should have
|
||||
the environment variable ``EDENAI_API_KEY`` set with your API token.
|
||||
You can find your token here: https://app.edenai.run/admin/account/settings
|
||||
|
||||
"""
|
||||
|
||||
name: str = "edenai_object_detection"
|
||||
|
||||
description: str = (
|
||||
"A wrapper around edenai Services Object Detection . "
|
||||
"""Useful for when you have to do an to identify and locate
|
||||
(with bounding boxes) objects in an image """
|
||||
"Input should be the string url of the image to identify."
|
||||
)
|
||||
args_schema: Type[BaseModel] = ObjectDetectionInput
|
||||
|
||||
show_positions: bool = False
|
||||
|
||||
feature: str = "image"
|
||||
subfeature: str = "object_detection"
|
||||
|
||||
def _parse_json(self, json_data: dict) -> str:
|
||||
result = []
|
||||
label_info = []
|
||||
|
||||
for found_obj in json_data["items"]:
|
||||
label_str = f"{found_obj['label']} - Confidence {found_obj['confidence']}"
|
||||
x_min = found_obj.get("x_min")
|
||||
x_max = found_obj.get("x_max")
|
||||
y_min = found_obj.get("y_min")
|
||||
y_max = found_obj.get("y_max")
|
||||
if self.show_positions and all(
|
||||
[
|
||||
x_min,
|
||||
x_max,
|
||||
y_min,
|
||||
y_max,
|
||||
]
|
||||
): # some providers don't return positions
|
||||
label_str += f""",at the position x_min: {x_min}, x_max: {x_max},
|
||||
y_min: {y_min}, y_max: {y_max}"""
|
||||
label_info.append(label_str)
|
||||
|
||||
result.append("\n".join(label_info))
|
||||
return "\n\n".join(result)
|
||||
|
||||
def _parse_response(self, response: list) -> str:
|
||||
if len(response) == 1:
|
||||
result = self._parse_json(response[0])
|
||||
else:
|
||||
for entry in response:
|
||||
if entry.get("provider") == "eden-ai":
|
||||
result = self._parse_json(entry)
|
||||
|
||||
return result
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
query_params = {"file_url": query, "attributes_as_list": False}
|
||||
return self._call_eden_ai(query_params)
|
||||
@@ -0,0 +1,75 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Optional, Type
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IDParsingInput(BaseModel):
|
||||
query: HttpUrl = Field(description="url of the document to parse")
|
||||
|
||||
|
||||
class EdenAiParsingIDTool(EdenaiTool):
|
||||
"""Tool that queries the Eden AI Identity parsing API.
|
||||
|
||||
for api reference check edenai documentation:
|
||||
https://docs.edenai.co/reference/ocr_identity_parser_create.
|
||||
|
||||
To use, you should have
|
||||
the environment variable ``EDENAI_API_KEY`` set with your API token.
|
||||
You can find your token here: https://app.edenai.run/admin/account/settings
|
||||
|
||||
"""
|
||||
|
||||
name: str = "edenai_identity_parsing"
|
||||
|
||||
description: str = (
|
||||
"A wrapper around edenai Services Identity parsing. "
|
||||
"Useful for when you have to extract information from an ID Document "
|
||||
"Input should be the string url of the document to parse."
|
||||
)
|
||||
args_schema: Type[BaseModel] = IDParsingInput
|
||||
|
||||
feature: str = "ocr"
|
||||
subfeature: str = "identity_parser"
|
||||
|
||||
language: Optional[str] = None
|
||||
"""
|
||||
language of the text passed to the model.
|
||||
"""
|
||||
|
||||
def _parse_response(self, response: list) -> str:
|
||||
formatted_list: list = []
|
||||
|
||||
if len(response) == 1:
|
||||
self._parse_json_multilevel(
|
||||
response[0]["extracted_data"][0], formatted_list
|
||||
)
|
||||
else:
|
||||
for entry in response:
|
||||
if entry.get("provider") == "eden-ai":
|
||||
self._parse_json_multilevel(
|
||||
entry["extracted_data"][0], formatted_list
|
||||
)
|
||||
|
||||
return "\n".join(formatted_list)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
query_params = {
|
||||
"file_url": query,
|
||||
"language": self.language,
|
||||
"attributes_as_list": False,
|
||||
}
|
||||
|
||||
return self._call_eden_ai(query_params)
|
||||
@@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Optional, Type
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class InvoiceParsingInput(BaseModel):
|
||||
query: HttpUrl = Field(description="url of the document to parse")
|
||||
|
||||
|
||||
class EdenAiParsingInvoiceTool(EdenaiTool):
|
||||
"""Tool that queries the Eden AI Invoice parsing API.
|
||||
|
||||
for api reference check edenai documentation:
|
||||
https://docs.edenai.co/reference/ocr_invoice_parser_create.
|
||||
|
||||
To use, you should have
|
||||
the environment variable ``EDENAI_API_KEY`` set with your API token.
|
||||
You can find your token here: https://app.edenai.run/admin/account/settings
|
||||
|
||||
"""
|
||||
|
||||
name: str = "edenai_invoice_parsing"
|
||||
description: str = (
|
||||
"A wrapper around edenai Services invoice parsing. "
|
||||
"""Useful for when you have to extract information from
|
||||
an image it enables to take invoices
|
||||
in a variety of formats and returns the data in contains
|
||||
(items, prices, addresses, vendor name, etc.)
|
||||
in a structured format to automate the invoice processing """
|
||||
"Input should be the string url of the document to parse."
|
||||
)
|
||||
args_schema: Type[BaseModel] = InvoiceParsingInput
|
||||
|
||||
language: Optional[str] = None
|
||||
"""
|
||||
language of the image passed to the model.
|
||||
"""
|
||||
|
||||
feature: str = "ocr"
|
||||
subfeature: str = "invoice_parser"
|
||||
|
||||
def _parse_response(self, response: list) -> str:
|
||||
formatted_list: list = []
|
||||
|
||||
if len(response) == 1:
|
||||
self._parse_json_multilevel(
|
||||
response[0]["extracted_data"][0], formatted_list
|
||||
)
|
||||
else:
|
||||
for entry in response:
|
||||
if entry.get("provider") == "eden-ai":
|
||||
self._parse_json_multilevel(
|
||||
entry["extracted_data"][0], formatted_list
|
||||
)
|
||||
|
||||
return "\n".join(formatted_list)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
query_params = {
|
||||
"file_url": query,
|
||||
"language": self.language,
|
||||
"attributes_as_list": False,
|
||||
}
|
||||
|
||||
return self._call_eden_ai(query_params)
|
||||
@@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Optional, Type
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TextModerationInput(BaseModel):
|
||||
query: str = Field(description="Text to moderate")
|
||||
|
||||
|
||||
class EdenAiTextModerationTool(EdenaiTool):
|
||||
"""Tool that queries the Eden AI Explicit text detection.
|
||||
|
||||
for api reference check edenai documentation:
|
||||
https://docs.edenai.co/reference/image_explicit_content_create.
|
||||
|
||||
To use, you should have
|
||||
the environment variable ``EDENAI_API_KEY`` set with your API token.
|
||||
You can find your token here: https://app.edenai.run/admin/account/settings
|
||||
|
||||
"""
|
||||
|
||||
name: str = "edenai_explicit_content_detection_text"
|
||||
description: str = (
|
||||
"A wrapper around edenai Services explicit content detection for text. "
|
||||
"""Useful for when you have to scan text for offensive,
|
||||
sexually explicit or suggestive content,
|
||||
it checks also if there is any content of self-harm,
|
||||
violence, racist or hate speech."""
|
||||
"""the structure of the output is :
|
||||
'the type of the explicit content : the likelihood of it being explicit'
|
||||
the likelihood is a number
|
||||
between 1 and 5, 1 being the lowest and 5 the highest.
|
||||
something is explicit if the likelihood is equal or higher than 3.
|
||||
for example :
|
||||
nsfw_likelihood: 1
|
||||
this is not explicit.
|
||||
for example :
|
||||
nsfw_likelihood: 3
|
||||
this is explicit.
|
||||
"""
|
||||
"Input should be a string."
|
||||
)
|
||||
args_schema: Type[BaseModel] = TextModerationInput
|
||||
|
||||
language: str
|
||||
|
||||
feature: str = "text"
|
||||
subfeature: str = "moderation"
|
||||
|
||||
def _parse_response(self, response: list) -> str:
|
||||
formatted_result = []
|
||||
for result in response:
|
||||
if "nsfw_likelihood" in result.keys():
|
||||
formatted_result.append(
|
||||
"nsfw_likelihood: " + str(result["nsfw_likelihood"])
|
||||
)
|
||||
|
||||
for label, likelihood in zip(result["label"], result["likelihood"]):
|
||||
formatted_result.append(f'"{label}": {str(likelihood)}')
|
||||
|
||||
return "\n".join(formatted_result)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
query_params = {"text": query, "language": self.language}
|
||||
return self._call_eden_ai(query_params)
|
||||
Reference in New Issue
Block a user