initial commit
This commit is contained in:
@@ -0,0 +1,17 @@
|
||||
"""Gmail tools."""
|
||||
|
||||
from langchain_community.tools.gmail.create_draft import GmailCreateDraft
|
||||
from langchain_community.tools.gmail.get_message import GmailGetMessage
|
||||
from langchain_community.tools.gmail.get_thread import GmailGetThread
|
||||
from langchain_community.tools.gmail.search import GmailSearch
|
||||
from langchain_community.tools.gmail.send_message import GmailSendMessage
|
||||
from langchain_community.tools.gmail.utils import get_gmail_credentials
|
||||
|
||||
__all__ = [
|
||||
"GmailCreateDraft",
|
||||
"GmailSendMessage",
|
||||
"GmailSearch",
|
||||
"GmailGetMessage",
|
||||
"GmailGetThread",
|
||||
"get_gmail_credentials",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,38 @@
|
||||
"""Base class for Gmail tools."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from langchain_core.tools import BaseTool
|
||||
from pydantic import Field
|
||||
|
||||
from langchain_community.tools.gmail.utils import build_resource_service
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# This is for linting and IDE typehints
|
||||
from googleapiclient.discovery import Resource
|
||||
else:
|
||||
try:
|
||||
# We do this so pydantic can resolve the types when instantiating
|
||||
from googleapiclient.discovery import Resource
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
class GmailBaseTool(BaseTool):
|
||||
"""Base class for Gmail tools."""
|
||||
|
||||
api_resource: Resource = Field(default_factory=build_resource_service)
|
||||
|
||||
@classmethod
|
||||
def from_api_resource(cls, api_resource: Resource) -> "GmailBaseTool":
|
||||
"""Create a tool from an api resource.
|
||||
|
||||
Args:
|
||||
api_resource: The api resource to use.
|
||||
|
||||
Returns:
|
||||
A tool.
|
||||
"""
|
||||
return cls(service=api_resource) # type: ignore[call-arg]
|
||||
@@ -0,0 +1,87 @@
|
||||
import base64
|
||||
from email.message import EmailMessage
|
||||
from typing import List, Optional, Type
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_community.tools.gmail.base import GmailBaseTool
|
||||
|
||||
|
||||
class CreateDraftSchema(BaseModel):
|
||||
"""Input for CreateDraftTool."""
|
||||
|
||||
message: str = Field(
|
||||
...,
|
||||
description="The message to include in the draft.",
|
||||
)
|
||||
to: List[str] = Field(
|
||||
...,
|
||||
description="The list of recipients.",
|
||||
)
|
||||
subject: str = Field(
|
||||
...,
|
||||
description="The subject of the message.",
|
||||
)
|
||||
cc: Optional[List[str]] = Field(
|
||||
None,
|
||||
description="The list of CC recipients.",
|
||||
)
|
||||
bcc: Optional[List[str]] = Field(
|
||||
None,
|
||||
description="The list of BCC recipients.",
|
||||
)
|
||||
|
||||
|
||||
class GmailCreateDraft(GmailBaseTool):
|
||||
"""Tool that creates a draft email for Gmail."""
|
||||
|
||||
name: str = "create_gmail_draft"
|
||||
description: str = (
|
||||
"Use this tool to create a draft email with the provided message fields."
|
||||
)
|
||||
args_schema: Type[CreateDraftSchema] = CreateDraftSchema
|
||||
|
||||
def _prepare_draft_message(
|
||||
self,
|
||||
message: str,
|
||||
to: List[str],
|
||||
subject: str,
|
||||
cc: Optional[List[str]] = None,
|
||||
bcc: Optional[List[str]] = None,
|
||||
) -> dict:
|
||||
draft_message = EmailMessage()
|
||||
draft_message.set_content(message)
|
||||
|
||||
draft_message["To"] = ", ".join(to)
|
||||
draft_message["Subject"] = subject
|
||||
if cc is not None:
|
||||
draft_message["Cc"] = ", ".join(cc)
|
||||
|
||||
if bcc is not None:
|
||||
draft_message["Bcc"] = ", ".join(bcc)
|
||||
|
||||
encoded_message = base64.urlsafe_b64encode(draft_message.as_bytes()).decode()
|
||||
return {"message": {"raw": encoded_message}}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
message: str,
|
||||
to: List[str],
|
||||
subject: str,
|
||||
cc: Optional[List[str]] = None,
|
||||
bcc: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
try:
|
||||
create_message = self._prepare_draft_message(message, to, subject, cc, bcc)
|
||||
draft = (
|
||||
self.api_resource.users()
|
||||
.drafts()
|
||||
.create(userId="me", body=create_message)
|
||||
.execute()
|
||||
)
|
||||
output = f"Draft created. Draft Id: {draft['id']}"
|
||||
return output
|
||||
except Exception as e:
|
||||
raise Exception(f"An error occurred: {e}")
|
||||
@@ -0,0 +1,70 @@
|
||||
import base64
|
||||
import email
|
||||
from typing import Dict, Optional, Type
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_community.tools.gmail.base import GmailBaseTool
|
||||
from langchain_community.tools.gmail.utils import clean_email_body
|
||||
|
||||
|
||||
class SearchArgsSchema(BaseModel):
|
||||
"""Input for GetMessageTool."""
|
||||
|
||||
message_id: str = Field(
|
||||
...,
|
||||
description="The unique ID of the email message, retrieved from a search.",
|
||||
)
|
||||
|
||||
|
||||
class GmailGetMessage(GmailBaseTool):
|
||||
"""Tool that gets a message by ID from Gmail."""
|
||||
|
||||
name: str = "get_gmail_message"
|
||||
description: str = (
|
||||
"Use this tool to fetch an email by message ID."
|
||||
" Returns the thread ID, snippet, body, subject, and sender."
|
||||
)
|
||||
args_schema: Type[SearchArgsSchema] = SearchArgsSchema
|
||||
|
||||
def _run(
|
||||
self,
|
||||
message_id: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> Dict:
|
||||
"""Run the tool."""
|
||||
query = (
|
||||
self.api_resource.users()
|
||||
.messages()
|
||||
.get(userId="me", format="raw", id=message_id)
|
||||
)
|
||||
message_data = query.execute()
|
||||
raw_message = base64.urlsafe_b64decode(message_data["raw"])
|
||||
|
||||
email_msg = email.message_from_bytes(raw_message)
|
||||
|
||||
subject = email_msg["Subject"]
|
||||
sender = email_msg["From"]
|
||||
|
||||
message_body = ""
|
||||
if email_msg.is_multipart():
|
||||
for part in email_msg.walk():
|
||||
ctype = part.get_content_type()
|
||||
cdispo = str(part.get("Content-Disposition"))
|
||||
if ctype == "text/plain" and "attachment" not in cdispo:
|
||||
message_body = part.get_payload(decode=True).decode("utf-8") # type: ignore[union-attr]
|
||||
break
|
||||
else:
|
||||
message_body = email_msg.get_payload(decode=True).decode("utf-8") # type: ignore[union-attr]
|
||||
|
||||
body = clean_email_body(message_body)
|
||||
|
||||
return {
|
||||
"id": message_id,
|
||||
"threadId": message_data["threadId"],
|
||||
"snippet": message_data["snippet"],
|
||||
"body": body,
|
||||
"subject": subject,
|
||||
"sender": sender,
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
from typing import Dict, Optional, Type
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_community.tools.gmail.base import GmailBaseTool
|
||||
|
||||
|
||||
class GetThreadSchema(BaseModel):
|
||||
"""Input for GetMessageTool."""
|
||||
|
||||
# From https://support.google.com/mail/answer/7190?hl=en
|
||||
thread_id: str = Field(
|
||||
...,
|
||||
description="The thread ID.",
|
||||
)
|
||||
|
||||
|
||||
class GmailGetThread(GmailBaseTool):
|
||||
"""Tool that gets a thread by ID from Gmail."""
|
||||
|
||||
name: str = "get_gmail_thread"
|
||||
description: str = (
|
||||
"Use this tool to search for email messages."
|
||||
" The input must be a valid Gmail query."
|
||||
" The output is a JSON list of messages."
|
||||
)
|
||||
args_schema: Type[GetThreadSchema] = GetThreadSchema
|
||||
|
||||
def _run(
|
||||
self,
|
||||
thread_id: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> Dict:
|
||||
"""Run the tool."""
|
||||
query = self.api_resource.users().threads().get(userId="me", id=thread_id)
|
||||
thread_data = query.execute()
|
||||
if not isinstance(thread_data, dict):
|
||||
raise ValueError("The output of the query must be a list.")
|
||||
messages = thread_data["messages"]
|
||||
thread_data["messages"] = []
|
||||
keys_to_keep = ["id", "snippet", "snippet"]
|
||||
# TODO: Parse body.
|
||||
for message in messages:
|
||||
thread_data["messages"].append(
|
||||
{k: message[k] for k in keys_to_keep if k in message}
|
||||
)
|
||||
return thread_data
|
||||
149
venv/Lib/site-packages/langchain_community/tools/gmail/search.py
Normal file
149
venv/Lib/site-packages/langchain_community/tools/gmail/search.py
Normal file
@@ -0,0 +1,149 @@
|
||||
import base64
|
||||
import email
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_community.tools.gmail.base import GmailBaseTool
|
||||
from langchain_community.tools.gmail.utils import clean_email_body
|
||||
|
||||
|
||||
class Resource(str, Enum):
|
||||
"""Enumerator of Resources to search."""
|
||||
|
||||
THREADS = "threads"
|
||||
MESSAGES = "messages"
|
||||
|
||||
|
||||
class SearchArgsSchema(BaseModel):
|
||||
"""Input for SearchGmailTool."""
|
||||
|
||||
# From https://support.google.com/mail/answer/7190?hl=en
|
||||
query: str = Field(
|
||||
...,
|
||||
description="The Gmail query. Example filters include from:sender,"
|
||||
" to:recipient, subject:subject, -filtered_term,"
|
||||
" in:folder, is:important|read|starred, after:year/mo/date, "
|
||||
"before:year/mo/date, label:label_name"
|
||||
' "exact phrase".'
|
||||
" Search newer/older than using d (day), m (month), and y (year): "
|
||||
"newer_than:2d, older_than:1y."
|
||||
" Attachments with extension example: filename:pdf. Multiple term"
|
||||
" matching example: from:amy OR from:david.",
|
||||
)
|
||||
resource: Resource = Field(
|
||||
default=Resource.MESSAGES,
|
||||
description="Whether to search for threads or messages.",
|
||||
)
|
||||
max_results: int = Field(
|
||||
default=10,
|
||||
description="The maximum number of results to return.",
|
||||
)
|
||||
|
||||
|
||||
class GmailSearch(GmailBaseTool):
|
||||
"""Tool that searches for messages or threads in Gmail."""
|
||||
|
||||
name: str = "search_gmail"
|
||||
description: str = (
|
||||
"Use this tool to search for email messages or threads."
|
||||
" The input must be a valid Gmail query."
|
||||
" The output is a JSON list of the requested resource."
|
||||
)
|
||||
args_schema: Type[SearchArgsSchema] = SearchArgsSchema
|
||||
|
||||
def _parse_threads(self, threads: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
# Add the thread message snippets to the thread results
|
||||
results = []
|
||||
for thread in threads:
|
||||
thread_id = thread["id"]
|
||||
thread_data = (
|
||||
self.api_resource.users()
|
||||
.threads()
|
||||
.get(userId="me", id=thread_id)
|
||||
.execute()
|
||||
)
|
||||
messages = thread_data["messages"]
|
||||
thread["messages"] = []
|
||||
for message in messages:
|
||||
snippet = message["snippet"]
|
||||
thread["messages"].append({"snippet": snippet, "id": message["id"]})
|
||||
results.append(thread)
|
||||
|
||||
return results
|
||||
|
||||
def _parse_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
results = []
|
||||
for message in messages:
|
||||
message_id = message["id"]
|
||||
message_data = (
|
||||
self.api_resource.users()
|
||||
.messages()
|
||||
.get(userId="me", format="raw", id=message_id)
|
||||
.execute()
|
||||
)
|
||||
|
||||
raw_message = base64.urlsafe_b64decode(message_data["raw"])
|
||||
|
||||
email_msg = email.message_from_bytes(raw_message)
|
||||
|
||||
subject = email_msg["Subject"]
|
||||
sender = email_msg["From"]
|
||||
|
||||
message_body = ""
|
||||
if email_msg.is_multipart():
|
||||
for part in email_msg.walk():
|
||||
ctype = part.get_content_type()
|
||||
cdispo = str(part.get("Content-Disposition"))
|
||||
if ctype == "text/plain" and "attachment" not in cdispo:
|
||||
try:
|
||||
message_body = part.get_payload(decode=True).decode("utf-8") # type: ignore[union-attr]
|
||||
except UnicodeDecodeError:
|
||||
message_body = part.get_payload(decode=True).decode( # type: ignore[union-attr]
|
||||
"latin-1"
|
||||
)
|
||||
break
|
||||
else:
|
||||
message_body = email_msg.get_payload(decode=True).decode("utf-8") # type: ignore[union-attr]
|
||||
|
||||
body = clean_email_body(message_body)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"id": message["id"],
|
||||
"threadId": message_data["threadId"],
|
||||
"snippet": message_data["snippet"],
|
||||
"body": body,
|
||||
"subject": subject,
|
||||
"sender": sender,
|
||||
"from": email_msg["From"],
|
||||
"date": email_msg["Date"],
|
||||
"to": email_msg["To"],
|
||||
"cc": email_msg["Cc"],
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
resource: Resource = Resource.MESSAGES,
|
||||
max_results: int = 10,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Run the tool."""
|
||||
results = (
|
||||
self.api_resource.users()
|
||||
.messages()
|
||||
.list(userId="me", q=query, maxResults=max_results)
|
||||
.execute()
|
||||
.get(resource.value, [])
|
||||
)
|
||||
if resource == Resource.THREADS:
|
||||
return self._parse_threads(results)
|
||||
elif resource == Resource.MESSAGES:
|
||||
return self._parse_messages(results)
|
||||
else:
|
||||
raise NotImplementedError(f"Resource of type {resource} not implemented.")
|
||||
@@ -0,0 +1,91 @@
|
||||
"""Send Gmail messages."""
|
||||
|
||||
import base64
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from typing import Any, Dict, List, Optional, Type, Union
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_community.tools.gmail.base import GmailBaseTool
|
||||
|
||||
|
||||
class SendMessageSchema(BaseModel):
|
||||
"""Input for SendMessageTool."""
|
||||
|
||||
message: str = Field(
|
||||
...,
|
||||
description="The message to send.",
|
||||
)
|
||||
to: Union[str, List[str]] = Field(
|
||||
...,
|
||||
description="The list of recipients.",
|
||||
)
|
||||
subject: str = Field(
|
||||
...,
|
||||
description="The subject of the message.",
|
||||
)
|
||||
cc: Optional[Union[str, List[str]]] = Field(
|
||||
None,
|
||||
description="The list of CC recipients.",
|
||||
)
|
||||
bcc: Optional[Union[str, List[str]]] = Field(
|
||||
None,
|
||||
description="The list of BCC recipients.",
|
||||
)
|
||||
|
||||
|
||||
class GmailSendMessage(GmailBaseTool):
|
||||
"""Tool that sends a message to Gmail."""
|
||||
|
||||
name: str = "send_gmail_message"
|
||||
description: str = (
|
||||
"Use this tool to send email messages. The input is the message, recipients"
|
||||
)
|
||||
args_schema: Type[SendMessageSchema] = SendMessageSchema
|
||||
|
||||
def _prepare_message(
|
||||
self,
|
||||
message: str,
|
||||
to: Union[str, List[str]],
|
||||
subject: str,
|
||||
cc: Optional[Union[str, List[str]]] = None,
|
||||
bcc: Optional[Union[str, List[str]]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Create a message for an email."""
|
||||
mime_message = MIMEMultipart()
|
||||
mime_message.attach(MIMEText(message, "html"))
|
||||
|
||||
mime_message["To"] = ", ".join(to if isinstance(to, list) else [to])
|
||||
mime_message["Subject"] = subject
|
||||
if cc is not None:
|
||||
mime_message["Cc"] = ", ".join(cc if isinstance(cc, list) else [cc])
|
||||
|
||||
if bcc is not None:
|
||||
mime_message["Bcc"] = ", ".join(bcc if isinstance(bcc, list) else [bcc])
|
||||
|
||||
encoded_message = base64.urlsafe_b64encode(mime_message.as_bytes()).decode()
|
||||
return {"raw": encoded_message}
|
||||
|
||||
def _run(
|
||||
self,
|
||||
message: str,
|
||||
to: Union[str, List[str]],
|
||||
subject: str,
|
||||
cc: Optional[Union[str, List[str]]] = None,
|
||||
bcc: Optional[Union[str, List[str]]] = None,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Run the tool."""
|
||||
try:
|
||||
create_message = self._prepare_message(message, to, subject, cc=cc, bcc=bcc)
|
||||
send_message = (
|
||||
self.api_resource.users()
|
||||
.messages()
|
||||
.send(userId="me", body=create_message)
|
||||
)
|
||||
sent_message = send_message.execute()
|
||||
return f"Message sent. Message Id: {sent_message['id']}"
|
||||
except Exception as error:
|
||||
raise Exception(f"An error occurred: {error}")
|
||||
124
venv/Lib/site-packages/langchain_community/tools/gmail/utils.py
Normal file
124
venv/Lib/site-packages/langchain_community/tools/gmail/utils.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""Gmail tool utils."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import TYPE_CHECKING, List, Optional, Tuple
|
||||
|
||||
from langchain_core.utils import guard_import
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from google.auth.transport.requests import Request
|
||||
from google.oauth2.credentials import Credentials
|
||||
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||
from googleapiclient.discovery import Resource
|
||||
from googleapiclient.discovery import build as build_resource
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def import_google() -> Tuple[Request, Credentials]:
|
||||
"""Import google libraries.
|
||||
|
||||
Returns:
|
||||
Tuple[Request, Credentials]: Request and Credentials classes.
|
||||
"""
|
||||
return (
|
||||
guard_import(
|
||||
module_name="google.auth.transport.requests",
|
||||
pip_name="google-auth-httplib2",
|
||||
).Request,
|
||||
guard_import(
|
||||
module_name="google.oauth2.credentials", pip_name="google-auth-httplib2"
|
||||
).Credentials,
|
||||
)
|
||||
|
||||
|
||||
def import_installed_app_flow() -> InstalledAppFlow:
|
||||
"""Import InstalledAppFlow class.
|
||||
|
||||
Returns:
|
||||
InstalledAppFlow: InstalledAppFlow class.
|
||||
"""
|
||||
return guard_import(
|
||||
module_name="google_auth_oauthlib.flow", pip_name="google-auth-oauthlib"
|
||||
).InstalledAppFlow
|
||||
|
||||
|
||||
def import_googleapiclient_resource_builder() -> build_resource:
|
||||
"""Import googleapiclient.discovery.build function.
|
||||
|
||||
Returns:
|
||||
build_resource: googleapiclient.discovery.build function.
|
||||
"""
|
||||
return guard_import(
|
||||
module_name="googleapiclient.discovery", pip_name="google-api-python-client"
|
||||
).build
|
||||
|
||||
|
||||
DEFAULT_SCOPES = ["https://mail.google.com/"]
|
||||
DEFAULT_CREDS_TOKEN_FILE = "token.json"
|
||||
DEFAULT_CLIENT_SECRETS_FILE = "credentials.json"
|
||||
|
||||
|
||||
def get_gmail_credentials(
|
||||
token_file: Optional[str] = None,
|
||||
client_secrets_file: Optional[str] = None,
|
||||
scopes: Optional[List[str]] = None,
|
||||
) -> Credentials:
|
||||
"""Get credentials."""
|
||||
# From https://developers.google.com/gmail/api/quickstart/python
|
||||
Request, Credentials = import_google()
|
||||
InstalledAppFlow = import_installed_app_flow()
|
||||
creds = None
|
||||
scopes = scopes or DEFAULT_SCOPES
|
||||
token_file = token_file or DEFAULT_CREDS_TOKEN_FILE
|
||||
client_secrets_file = client_secrets_file or DEFAULT_CLIENT_SECRETS_FILE
|
||||
# The file token.json stores the user's access and refresh tokens, and is
|
||||
# created automatically when the authorization flow completes for the first
|
||||
# time.
|
||||
if os.path.exists(token_file):
|
||||
creds = Credentials.from_authorized_user_file(token_file, scopes)
|
||||
# If there are no (valid) credentials available, let the user log in.
|
||||
if not creds or not creds.valid:
|
||||
if creds and creds.expired and creds.refresh_token:
|
||||
creds.refresh(Request())
|
||||
else:
|
||||
# https://developers.google.com/gmail/api/quickstart/python#authorize_credentials_for_a_desktop_application # noqa
|
||||
flow = InstalledAppFlow.from_client_secrets_file(
|
||||
client_secrets_file, scopes
|
||||
)
|
||||
creds = flow.run_local_server(port=0, open_browser=False)
|
||||
# Save the credentials for the next run
|
||||
with open(token_file, "w") as token:
|
||||
token.write(creds.to_json())
|
||||
return creds
|
||||
|
||||
|
||||
def build_resource_service(
|
||||
credentials: Optional[Credentials] = None,
|
||||
service_name: str = "gmail",
|
||||
service_version: str = "v1",
|
||||
) -> Resource:
|
||||
"""Build a Gmail service."""
|
||||
credentials = credentials or get_gmail_credentials()
|
||||
builder = import_googleapiclient_resource_builder()
|
||||
return builder(service_name, service_version, credentials=credentials)
|
||||
|
||||
|
||||
def clean_email_body(body: str) -> str:
|
||||
"""Clean email body."""
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
try:
|
||||
soup = BeautifulSoup(str(body), "html.parser")
|
||||
body = soup.get_text()
|
||||
return str(body)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
return str(body)
|
||||
except ImportError:
|
||||
logger.warning("BeautifulSoup not installed. Skipping cleaning.")
|
||||
return str(body)
|
||||
Reference in New Issue
Block a user