initial commit

This commit is contained in:
2026-05-11 12:36:20 +05:30
commit 384cbe8019
15377 changed files with 2360544 additions and 0 deletions

View File

@@ -0,0 +1,35 @@
"""Edenai Tools."""
from langchain_community.tools.edenai.audio_speech_to_text import (
EdenAiSpeechToTextTool,
)
from langchain_community.tools.edenai.audio_text_to_speech import (
EdenAiTextToSpeechTool,
)
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
from langchain_community.tools.edenai.image_explicitcontent import (
EdenAiExplicitImageTool,
)
from langchain_community.tools.edenai.image_objectdetection import (
EdenAiObjectDetectionTool,
)
from langchain_community.tools.edenai.ocr_identityparser import (
EdenAiParsingIDTool,
)
from langchain_community.tools.edenai.ocr_invoiceparser import (
EdenAiParsingInvoiceTool,
)
from langchain_community.tools.edenai.text_moderation import (
EdenAiTextModerationTool,
)
__all__ = [
"EdenAiExplicitImageTool",
"EdenAiObjectDetectionTool",
"EdenAiParsingIDTool",
"EdenAiParsingInvoiceTool",
"EdenAiTextToSpeechTool",
"EdenAiSpeechToTextTool",
"EdenAiTextModerationTool",
"EdenaiTool",
]

View File

@@ -0,0 +1,105 @@
from __future__ import annotations
import json
import logging
import time
from typing import List, Optional, Type
import requests
from langchain_core.callbacks import CallbackManagerForToolRun
from pydantic import BaseModel, Field, HttpUrl, validator
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
logger = logging.getLogger(__name__)
class SpeechToTextInput(BaseModel):
query: HttpUrl = Field(description="url of the audio to analyze")
class EdenAiSpeechToTextTool(EdenaiTool):
"""Tool that queries the Eden AI Speech To Text API.
for api reference check edenai documentation:
https://app.edenai.run/bricks/speech/asynchronous-speech-to-text.
To use, you should have
the environment variable ``EDENAI_API_KEY`` set with your API token.
You can find your token here: https://app.edenai.run/admin/account/settings
"""
name: str = "edenai_speech_to_text"
description: str = (
"A wrapper around edenai Services speech to text "
"Useful for when you have to convert audio to text."
"Input should be a url to an audio file."
)
args_schema: Type[BaseModel] = SpeechToTextInput
is_async: bool = True
language: Optional[str] = "en"
speakers: Optional[int]
profanity_filter: bool = False
custom_vocabulary: Optional[List[str]]
feature: str = "audio"
subfeature: str = "speech_to_text_async"
base_url: str = "https://api.edenai.run/v2/audio/speech_to_text_async/"
@validator("providers")
def check_only_one_provider_selected(cls, v: List[str]) -> List[str]:
"""
This tool has no feature to combine providers results.
Therefore we only allow one provider
"""
if len(v) > 1:
raise ValueError(
"Please select only one provider. "
"The feature to combine providers results is not available "
"for this tool."
)
return v
def _wait_processing(self, url: str) -> requests.Response:
for _ in range(10):
time.sleep(1)
audio_analysis_result = self._get_edenai(url)
temp = audio_analysis_result.json()
if temp["status"] == "finished":
if temp["results"][self.providers[0]]["error"] is not None:
raise Exception(
f"""EdenAI returned an unexpected response
{temp["results"][self.providers[0]]["error"]}"""
)
else:
return audio_analysis_result
raise Exception("Edenai speech to text job id processing Timed out")
def _parse_response(self, response: dict) -> str:
return response["public_id"]
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
all_params = {
"file_url": query,
"language": self.language,
"speakers": self.speakers,
"profanity_filter": self.profanity_filter,
"custom_vocabulary": self.custom_vocabulary,
}
# filter so we don't send val to api when val is `None
query_params = {k: v for k, v in all_params.items() if v is not None}
job_id = self._call_eden_ai(query_params)
url = self.base_url + job_id
audio_analysis_result = self._wait_processing(url)
result = audio_analysis_result.text
formatted_text = json.loads(result)
return formatted_text["results"][self.providers[0]]["text"]

View File

@@ -0,0 +1,122 @@
from __future__ import annotations
import logging
from typing import Any, Dict, List, Literal, Optional, Type
import requests
from langchain_core.callbacks import CallbackManagerForToolRun
from pydantic import BaseModel, Field, model_validator, validator
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
logger = logging.getLogger(__name__)
class TextToSpeechInput(BaseModel):
query: str = Field(description="text to generate audio from")
class EdenAiTextToSpeechTool(EdenaiTool):
"""Tool that queries the Eden AI Text to speech API.
for api reference check edenai documentation:
https://docs.edenai.co/reference/audio_text_to_speech_create.
To use, you should have
the environment variable ``EDENAI_API_KEY`` set with your API token.
You can find your token here: https://app.edenai.run/admin/account/settings
"""
name: str = "edenai_text_to_speech"
description: str = (
"A wrapper around edenai Services text to speech."
"Useful for when you need to convert text to speech."
"""the output is a string representing the URL of the audio file,
or the path to the downloaded wav file """
)
args_schema: Type[BaseModel] = TextToSpeechInput
language: Optional[str] = "en"
"""
language of the text passed to the model.
"""
# optional params see api documentation for more info
return_type: Literal["url", "wav"] = "url"
rate: Optional[int] = None
pitch: Optional[int] = None
volume: Optional[int] = None
audio_format: Optional[str] = None
sampling_rate: Optional[int] = None
voice_models: Dict[str, str] = Field(default_factory=dict)
voice: Literal["MALE", "FEMALE"]
"""voice option : 'MALE' or 'FEMALE' """
feature: str = "audio"
subfeature: str = "text_to_speech"
@validator("providers")
def check_only_one_provider_selected(cls, v: List[str]) -> List[str]:
"""
This tool has no feature to combine providers results.
Therefore we only allow one provider
"""
if len(v) > 1:
raise ValueError(
"Please select only one provider. "
"The feature to combine providers results is not available "
"for this tool."
)
return v
@model_validator(mode="before")
@classmethod
def check_voice_models_key_is_provider_name(cls, values: dict) -> Any:
for key in values.get("voice_models", {}).keys():
if key not in values.get("providers", []):
raise ValueError(
"voice_model should be formatted like this "
"{<provider_name>: <its_voice_model>}"
)
return values
def _download_wav(self, url: str, save_path: str) -> None:
response = requests.get(url)
if response.status_code == 200:
with open(save_path, "wb") as f:
f.write(response.content)
else:
raise ValueError("Error while downloading wav file")
def _parse_response(self, response: list) -> str:
result = response[0]
if self.return_type == "url":
return result["audio_resource_url"]
else:
self._download_wav(result["audio_resource_url"], "audio.wav")
return "audio.wav"
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
all_params = {
"text": query,
"language": self.language,
"option": self.voice,
"return_type": self.return_type,
"rate": self.rate,
"pitch": self.pitch,
"volume": self.volume,
"audio_format": self.audio_format,
"sampling_rate": self.sampling_rate,
"settings": self.voice_models,
}
# filter so we don't send val to api when val is `None
query_params = {k: v for k, v in all_params.items() if v is not None}
return self._call_eden_ai(query_params)

View File

@@ -0,0 +1,150 @@
from __future__ import annotations
import logging
from abc import abstractmethod
from typing import Any, Dict, List, Optional
import requests
from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.tools import BaseTool
from langchain_core.utils import secret_from_env
from pydantic import Field, SecretStr
logger = logging.getLogger(__name__)
class EdenaiTool(BaseTool):
"""
the base tool for all the EdenAI Tools .
you should have
the environment variable ``EDENAI_API_KEY`` set with your API token.
You can find your token here: https://app.edenai.run/admin/account/settings
"""
feature: str
subfeature: str
edenai_api_key: Optional[SecretStr] = Field(
default_factory=secret_from_env("EDENAI_API_KEY", default=None)
)
is_async: bool = False
providers: List[str]
"""provider to use for the API call."""
@staticmethod
def get_user_agent() -> str:
from langchain_community import __version__
return f"langchain/{__version__}"
def _call_eden_ai(self, query_params: Dict[str, Any]) -> str:
"""
Make an API call to the EdenAI service with the specified query parameters.
Args:
query_params (dict): The parameters to include in the API call.
Returns:
requests.Response: The response from the EdenAI API call.
"""
api_key = self.edenai_api_key.get_secret_value() if self.edenai_api_key else ""
headers = {
"Authorization": f"Bearer {api_key}",
"User-Agent": self.get_user_agent(),
}
url = f"https://api.edenai.run/v2/{self.feature}/{self.subfeature}"
payload = {
"providers": str(self.providers),
"response_as_dict": False,
"attributes_as_list": True,
"show_original_response": False,
}
payload.update(query_params)
response = requests.post(url, json=payload, headers=headers)
self._raise_on_error(response)
try:
return self._parse_response(response.json())
except Exception as e:
raise RuntimeError(f"An error occurred while running tool: {e}")
def _raise_on_error(self, response: requests.Response) -> None:
if response.status_code >= 500:
raise Exception(f"EdenAI Server: Error {response.status_code}")
elif response.status_code >= 400:
raise ValueError(f"EdenAI received an invalid payload: {response.text}")
elif response.status_code != 200:
raise Exception(
f"EdenAI returned an unexpected response with status "
f"{response.status_code}: {response.text}"
)
# case where edenai call succeeded but provider returned an error
# (eg: rate limit, server error, etc.)
if self.is_async is False:
# async call are different and only return a job_id,
# not the provider response directly
provider_response = response.json()[0]
if provider_response.get("status") == "fail":
err_msg = provider_response["error"]["message"]
raise ValueError(err_msg)
@abstractmethod
def _run(
self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None
) -> str:
pass
@abstractmethod
def _parse_response(self, response: Any) -> str:
"""Take a dict response and condense it's data in a human readable string"""
pass
def _get_edenai(self, url: str) -> requests.Response:
headers = {
"accept": "application/json",
"authorization": f"Bearer {self.edenai_api_key}",
"User-Agent": self.get_user_agent(),
}
response = requests.get(url, headers=headers)
self._raise_on_error(response)
return response
def _parse_json_multilevel(
self, extracted_data: dict, formatted_list: list, level: int = 0
) -> None:
for section, subsections in extracted_data.items():
indentation = " " * level
if isinstance(subsections, str):
subsections = subsections.replace("\n", ",")
formatted_list.append(f"{indentation}{section} : {subsections}")
elif isinstance(subsections, list):
formatted_list.append(f"{indentation}{section} : ")
self._list_handling(subsections, formatted_list, level + 1)
elif isinstance(subsections, dict):
formatted_list.append(f"{indentation}{section} : ")
self._parse_json_multilevel(subsections, formatted_list, level + 1)
def _list_handling(
self, subsection_list: list, formatted_list: list, level: int
) -> None:
for list_item in subsection_list:
if isinstance(list_item, dict):
self._parse_json_multilevel(list_item, formatted_list, level)
elif isinstance(list_item, list):
self._list_handling(list_item, formatted_list, level + 1)
else:
formatted_list.append(f"{' ' * level}{list_item}")

View File

@@ -0,0 +1,73 @@
from __future__ import annotations
import logging
from typing import Optional, Type
from langchain_core.callbacks import CallbackManagerForToolRun
from pydantic import BaseModel, Field, HttpUrl
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
logger = logging.getLogger(__name__)
class ExplicitImageInput(BaseModel):
query: HttpUrl = Field(description="url of the image to analyze")
class EdenAiExplicitImageTool(EdenaiTool):
"""Tool that queries the Eden AI Explicit image detection.
for api reference check edenai documentation:
https://docs.edenai.co/reference/image_explicit_content_create.
To use, you should have
the environment variable ``EDENAI_API_KEY`` set with your API token.
You can find your token here: https://app.edenai.run/admin/account/settings
"""
name: str = "edenai_image_explicit_content_detection"
description: str = (
"A wrapper around edenai Services Explicit image detection. "
"""Useful for when you have to extract Explicit Content from images.
it detects adult only content in images,
that is generally inappropriate for people under
the age of 18 and includes nudity, sexual activity,
pornography, violence, gore content, etc."""
"Input should be the string url of the image ."
)
args_schema: Type[BaseModel] = ExplicitImageInput
combine_available: bool = True
feature: str = "image"
subfeature: str = "explicit_content"
def _parse_json(self, json_data: dict) -> str:
result_str = f"nsfw_likelihood: {json_data['nsfw_likelihood']}\n"
for idx, found_obj in enumerate(json_data["items"]):
label = found_obj["label"].lower()
likelihood = found_obj["likelihood"]
result_str += f"{idx}: {label} likelihood {likelihood},\n"
return result_str[:-2]
def _parse_response(self, json_data: list) -> str:
if len(json_data) == 1:
result = self._parse_json(json_data[0])
else:
for entry in json_data:
if entry.get("provider") == "eden-ai":
result = self._parse_json(entry)
return result
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
query_params = {"file_url": query, "attributes_as_list": False}
return self._call_eden_ai(query_params)

View File

@@ -0,0 +1,87 @@
from __future__ import annotations
import logging
from typing import Optional, Type
from langchain_core.callbacks import CallbackManagerForToolRun
from pydantic import BaseModel, Field, HttpUrl
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
logger = logging.getLogger(__name__)
class ObjectDetectionInput(BaseModel):
query: HttpUrl = Field(description="url of the image to analyze")
class EdenAiObjectDetectionTool(EdenaiTool):
"""Tool that queries the Eden AI Object detection API.
for api reference check edenai documentation:
https://docs.edenai.co/reference/image_object_detection_create.
To use, you should have
the environment variable ``EDENAI_API_KEY`` set with your API token.
You can find your token here: https://app.edenai.run/admin/account/settings
"""
name: str = "edenai_object_detection"
description: str = (
"A wrapper around edenai Services Object Detection . "
"""Useful for when you have to do an to identify and locate
(with bounding boxes) objects in an image """
"Input should be the string url of the image to identify."
)
args_schema: Type[BaseModel] = ObjectDetectionInput
show_positions: bool = False
feature: str = "image"
subfeature: str = "object_detection"
def _parse_json(self, json_data: dict) -> str:
result = []
label_info = []
for found_obj in json_data["items"]:
label_str = f"{found_obj['label']} - Confidence {found_obj['confidence']}"
x_min = found_obj.get("x_min")
x_max = found_obj.get("x_max")
y_min = found_obj.get("y_min")
y_max = found_obj.get("y_max")
if self.show_positions and all(
[
x_min,
x_max,
y_min,
y_max,
]
): # some providers don't return positions
label_str += f""",at the position x_min: {x_min}, x_max: {x_max},
y_min: {y_min}, y_max: {y_max}"""
label_info.append(label_str)
result.append("\n".join(label_info))
return "\n\n".join(result)
def _parse_response(self, response: list) -> str:
if len(response) == 1:
result = self._parse_json(response[0])
else:
for entry in response:
if entry.get("provider") == "eden-ai":
result = self._parse_json(entry)
return result
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
query_params = {"file_url": query, "attributes_as_list": False}
return self._call_eden_ai(query_params)

View File

@@ -0,0 +1,75 @@
from __future__ import annotations
import logging
from typing import Optional, Type
from langchain_core.callbacks import CallbackManagerForToolRun
from pydantic import BaseModel, Field, HttpUrl
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
logger = logging.getLogger(__name__)
class IDParsingInput(BaseModel):
query: HttpUrl = Field(description="url of the document to parse")
class EdenAiParsingIDTool(EdenaiTool):
"""Tool that queries the Eden AI Identity parsing API.
for api reference check edenai documentation:
https://docs.edenai.co/reference/ocr_identity_parser_create.
To use, you should have
the environment variable ``EDENAI_API_KEY`` set with your API token.
You can find your token here: https://app.edenai.run/admin/account/settings
"""
name: str = "edenai_identity_parsing"
description: str = (
"A wrapper around edenai Services Identity parsing. "
"Useful for when you have to extract information from an ID Document "
"Input should be the string url of the document to parse."
)
args_schema: Type[BaseModel] = IDParsingInput
feature: str = "ocr"
subfeature: str = "identity_parser"
language: Optional[str] = None
"""
language of the text passed to the model.
"""
def _parse_response(self, response: list) -> str:
formatted_list: list = []
if len(response) == 1:
self._parse_json_multilevel(
response[0]["extracted_data"][0], formatted_list
)
else:
for entry in response:
if entry.get("provider") == "eden-ai":
self._parse_json_multilevel(
entry["extracted_data"][0], formatted_list
)
return "\n".join(formatted_list)
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
query_params = {
"file_url": query,
"language": self.language,
"attributes_as_list": False,
}
return self._call_eden_ai(query_params)

View File

@@ -0,0 +1,78 @@
from __future__ import annotations
import logging
from typing import Optional, Type
from langchain_core.callbacks import CallbackManagerForToolRun
from pydantic import BaseModel, Field, HttpUrl
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
logger = logging.getLogger(__name__)
class InvoiceParsingInput(BaseModel):
query: HttpUrl = Field(description="url of the document to parse")
class EdenAiParsingInvoiceTool(EdenaiTool):
"""Tool that queries the Eden AI Invoice parsing API.
for api reference check edenai documentation:
https://docs.edenai.co/reference/ocr_invoice_parser_create.
To use, you should have
the environment variable ``EDENAI_API_KEY`` set with your API token.
You can find your token here: https://app.edenai.run/admin/account/settings
"""
name: str = "edenai_invoice_parsing"
description: str = (
"A wrapper around edenai Services invoice parsing. "
"""Useful for when you have to extract information from
an image it enables to take invoices
in a variety of formats and returns the data in contains
(items, prices, addresses, vendor name, etc.)
in a structured format to automate the invoice processing """
"Input should be the string url of the document to parse."
)
args_schema: Type[BaseModel] = InvoiceParsingInput
language: Optional[str] = None
"""
language of the image passed to the model.
"""
feature: str = "ocr"
subfeature: str = "invoice_parser"
def _parse_response(self, response: list) -> str:
formatted_list: list = []
if len(response) == 1:
self._parse_json_multilevel(
response[0]["extracted_data"][0], formatted_list
)
else:
for entry in response:
if entry.get("provider") == "eden-ai":
self._parse_json_multilevel(
entry["extracted_data"][0], formatted_list
)
return "\n".join(formatted_list)
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
query_params = {
"file_url": query,
"language": self.language,
"attributes_as_list": False,
}
return self._call_eden_ai(query_params)

View File

@@ -0,0 +1,78 @@
from __future__ import annotations
import logging
from typing import Optional, Type
from langchain_core.callbacks import CallbackManagerForToolRun
from pydantic import BaseModel, Field
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
logger = logging.getLogger(__name__)
class TextModerationInput(BaseModel):
query: str = Field(description="Text to moderate")
class EdenAiTextModerationTool(EdenaiTool):
"""Tool that queries the Eden AI Explicit text detection.
for api reference check edenai documentation:
https://docs.edenai.co/reference/image_explicit_content_create.
To use, you should have
the environment variable ``EDENAI_API_KEY`` set with your API token.
You can find your token here: https://app.edenai.run/admin/account/settings
"""
name: str = "edenai_explicit_content_detection_text"
description: str = (
"A wrapper around edenai Services explicit content detection for text. "
"""Useful for when you have to scan text for offensive,
sexually explicit or suggestive content,
it checks also if there is any content of self-harm,
violence, racist or hate speech."""
"""the structure of the output is :
'the type of the explicit content : the likelihood of it being explicit'
the likelihood is a number
between 1 and 5, 1 being the lowest and 5 the highest.
something is explicit if the likelihood is equal or higher than 3.
for example :
nsfw_likelihood: 1
this is not explicit.
for example :
nsfw_likelihood: 3
this is explicit.
"""
"Input should be a string."
)
args_schema: Type[BaseModel] = TextModerationInput
language: str
feature: str = "text"
subfeature: str = "moderation"
def _parse_response(self, response: list) -> str:
formatted_result = []
for result in response:
if "nsfw_likelihood" in result.keys():
formatted_result.append(
"nsfw_likelihood: " + str(result["nsfw_likelihood"])
)
for label, likelihood in zip(result["label"], result["likelihood"]):
formatted_result.append(f'"{label}": {str(likelihood)}')
return "\n".join(formatted_result)
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
query_params = {"text": query, "language": self.language}
return self._call_eden_ai(query_params)