initial commit
This commit is contained in:
@@ -0,0 +1,21 @@
|
||||
"""Browser tools and toolkit."""
|
||||
|
||||
from langchain_community.tools.playwright.click import ClickTool
|
||||
from langchain_community.tools.playwright.current_page import CurrentWebPageTool
|
||||
from langchain_community.tools.playwright.extract_hyperlinks import (
|
||||
ExtractHyperlinksTool,
|
||||
)
|
||||
from langchain_community.tools.playwright.extract_text import ExtractTextTool
|
||||
from langchain_community.tools.playwright.get_elements import GetElementsTool
|
||||
from langchain_community.tools.playwright.navigate import NavigateTool
|
||||
from langchain_community.tools.playwright.navigate_back import NavigateBackTool
|
||||
|
||||
__all__ = [
|
||||
"NavigateTool",
|
||||
"NavigateBackTool",
|
||||
"ExtractTextTool",
|
||||
"ExtractHyperlinksTool",
|
||||
"GetElementsTool",
|
||||
"ClickTool",
|
||||
"CurrentWebPageTool",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,58 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Optional, Tuple, Type
|
||||
|
||||
from langchain_core.tools import BaseTool
|
||||
from langchain_core.utils import guard_import
|
||||
from pydantic import model_validator
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from playwright.async_api import Browser as AsyncBrowser
|
||||
from playwright.sync_api import Browser as SyncBrowser
|
||||
else:
|
||||
try:
|
||||
# We do this so pydantic can resolve the types when instantiating
|
||||
from playwright.async_api import Browser as AsyncBrowser
|
||||
from playwright.sync_api import Browser as SyncBrowser
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def lazy_import_playwright_browsers() -> Tuple[Type[AsyncBrowser], Type[SyncBrowser]]:
|
||||
"""
|
||||
Lazy import playwright browsers.
|
||||
|
||||
Returns:
|
||||
Tuple[Type[AsyncBrowser], Type[SyncBrowser]]:
|
||||
AsyncBrowser and SyncBrowser classes.
|
||||
"""
|
||||
return (
|
||||
guard_import(module_name="playwright.async_api").Browser,
|
||||
guard_import(module_name="playwright.sync_api").Browser,
|
||||
)
|
||||
|
||||
|
||||
class BaseBrowserTool(BaseTool):
|
||||
"""Base class for browser tools."""
|
||||
|
||||
sync_browser: Optional["SyncBrowser"] = None
|
||||
async_browser: Optional["AsyncBrowser"] = None
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_browser_provided(cls, values: dict) -> Any:
|
||||
"""Check that the arguments are valid."""
|
||||
lazy_import_playwright_browsers()
|
||||
if values.get("async_browser") is None and values.get("sync_browser") is None:
|
||||
raise ValueError("Either async_browser or sync_browser must be specified.")
|
||||
return values
|
||||
|
||||
@classmethod
|
||||
def from_browser(
|
||||
cls,
|
||||
sync_browser: Optional[SyncBrowser] = None,
|
||||
async_browser: Optional[AsyncBrowser] = None,
|
||||
) -> BaseBrowserTool:
|
||||
"""Instantiate the tool."""
|
||||
lazy_import_playwright_browsers()
|
||||
return cls(sync_browser=sync_browser, async_browser=async_browser) # type: ignore[call-arg]
|
||||
@@ -0,0 +1,87 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Type
|
||||
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_community.tools.playwright.base import BaseBrowserTool
|
||||
from langchain_community.tools.playwright.utils import (
|
||||
aget_current_page,
|
||||
get_current_page,
|
||||
)
|
||||
|
||||
|
||||
class ClickToolInput(BaseModel):
|
||||
"""Input for ClickTool."""
|
||||
|
||||
selector: str = Field(..., description="CSS selector for the element to click")
|
||||
|
||||
|
||||
class ClickTool(BaseBrowserTool):
|
||||
"""Tool for clicking on an element with the given CSS selector."""
|
||||
|
||||
name: str = "click_element"
|
||||
description: str = "Click on an element with the given CSS selector"
|
||||
args_schema: Type[BaseModel] = ClickToolInput
|
||||
|
||||
visible_only: bool = True
|
||||
"""Whether to consider only visible elements."""
|
||||
playwright_strict: bool = False
|
||||
"""Whether to employ Playwright's strict mode when clicking on elements."""
|
||||
playwright_timeout: float = 1_000
|
||||
"""Timeout (in ms) for Playwright to wait for element to be ready."""
|
||||
|
||||
def _selector_effective(self, selector: str) -> str:
|
||||
if not self.visible_only:
|
||||
return selector
|
||||
return f"{selector} >> visible=1"
|
||||
|
||||
def _run(
|
||||
self,
|
||||
selector: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
if self.sync_browser is None:
|
||||
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||
page = get_current_page(self.sync_browser)
|
||||
# Navigate to the desired webpage before using this tool
|
||||
selector_effective = self._selector_effective(selector=selector)
|
||||
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
|
||||
|
||||
try:
|
||||
page.click(
|
||||
selector_effective,
|
||||
strict=self.playwright_strict,
|
||||
timeout=self.playwright_timeout,
|
||||
)
|
||||
except PlaywrightTimeoutError:
|
||||
return f"Unable to click on element '{selector}'"
|
||||
return f"Clicked element '{selector}'"
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
selector: str,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
if self.async_browser is None:
|
||||
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||
page = await aget_current_page(self.async_browser)
|
||||
# Navigate to the desired webpage before using this tool
|
||||
selector_effective = self._selector_effective(selector=selector)
|
||||
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
|
||||
|
||||
try:
|
||||
await page.click(
|
||||
selector_effective,
|
||||
strict=self.playwright_strict,
|
||||
timeout=self.playwright_timeout,
|
||||
)
|
||||
except PlaywrightTimeoutError:
|
||||
return f"Unable to click on element '{selector}'"
|
||||
return f"Clicked element '{selector}'"
|
||||
@@ -0,0 +1,47 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Type
|
||||
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain_community.tools.playwright.base import BaseBrowserTool
|
||||
from langchain_community.tools.playwright.utils import (
|
||||
aget_current_page,
|
||||
get_current_page,
|
||||
)
|
||||
|
||||
|
||||
class CurrentWebPageToolInput(BaseModel):
|
||||
"""Explicit no-args input for CurrentWebPageTool."""
|
||||
|
||||
|
||||
class CurrentWebPageTool(BaseBrowserTool):
|
||||
"""Tool for getting the URL of the current webpage."""
|
||||
|
||||
name: str = "current_webpage"
|
||||
description: str = "Returns the URL of the current page"
|
||||
args_schema: Type[BaseModel] = CurrentWebPageToolInput
|
||||
|
||||
def _run(
|
||||
self,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
if self.sync_browser is None:
|
||||
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||
page = get_current_page(self.sync_browser)
|
||||
return str(page.url)
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
if self.async_browser is None:
|
||||
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||
page = await aget_current_page(self.async_browser)
|
||||
return str(page.url)
|
||||
@@ -0,0 +1,93 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import TYPE_CHECKING, Any, Optional, Type
|
||||
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
from langchain_community.tools.playwright.base import BaseBrowserTool
|
||||
from langchain_community.tools.playwright.utils import (
|
||||
aget_current_page,
|
||||
get_current_page,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
|
||||
|
||||
class ExtractHyperlinksToolInput(BaseModel):
|
||||
"""Input for ExtractHyperlinksTool."""
|
||||
|
||||
absolute_urls: bool = Field(
|
||||
default=False,
|
||||
description="Return absolute URLs instead of relative URLs",
|
||||
)
|
||||
|
||||
|
||||
class ExtractHyperlinksTool(BaseBrowserTool):
|
||||
"""Extract all hyperlinks on the page."""
|
||||
|
||||
name: str = "extract_hyperlinks"
|
||||
description: str = "Extract all hyperlinks on the current webpage"
|
||||
args_schema: Type[BaseModel] = ExtractHyperlinksToolInput
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_bs_import(cls, values: dict) -> Any:
|
||||
"""Check that the arguments are valid."""
|
||||
try:
|
||||
from bs4 import BeautifulSoup # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The 'beautifulsoup4' package is required to use this tool."
|
||||
" Please install it with 'pip install beautifulsoup4'."
|
||||
)
|
||||
return values
|
||||
|
||||
@staticmethod
|
||||
def scrape_page(page: Any, html_content: str, absolute_urls: bool) -> str:
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# Parse the HTML content with BeautifulSoup
|
||||
soup = BeautifulSoup(html_content, "lxml")
|
||||
|
||||
# Find all the anchor elements and extract their href attributes
|
||||
anchors = soup.find_all("a")
|
||||
if absolute_urls:
|
||||
base_url = page.url
|
||||
links = [urljoin(base_url, anchor.get("href", "")) for anchor in anchors]
|
||||
else:
|
||||
links = [anchor.get("href", "") for anchor in anchors]
|
||||
# Return the list of links as a JSON string. Duplicated link
|
||||
# only appears once in the list
|
||||
return json.dumps(list(set(links)))
|
||||
|
||||
def _run(
|
||||
self,
|
||||
absolute_urls: bool = False,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
if self.sync_browser is None:
|
||||
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||
page = get_current_page(self.sync_browser)
|
||||
html_content = page.content()
|
||||
return self.scrape_page(page, html_content, absolute_urls)
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
absolute_urls: bool = False,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool asynchronously."""
|
||||
if self.async_browser is None:
|
||||
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||
page = await aget_current_page(self.async_browser)
|
||||
html_content = await page.content()
|
||||
return self.scrape_page(page, html_content, absolute_urls)
|
||||
@@ -0,0 +1,73 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from pydantic import BaseModel, model_validator
|
||||
|
||||
from langchain_community.tools.playwright.base import BaseBrowserTool
|
||||
from langchain_community.tools.playwright.utils import (
|
||||
aget_current_page,
|
||||
get_current_page,
|
||||
)
|
||||
|
||||
|
||||
class ExtractTextToolInput(BaseModel):
|
||||
"""Explicit no-args input for ExtractTextTool."""
|
||||
|
||||
|
||||
class ExtractTextTool(BaseBrowserTool):
|
||||
"""Tool for extracting all the text on the current webpage."""
|
||||
|
||||
name: str = "extract_text"
|
||||
description: str = "Extract all the text on the current webpage"
|
||||
args_schema: Type[BaseModel] = ExtractTextToolInput
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_acheck_bs_importrgs(cls, values: dict) -> Any:
|
||||
"""Check that the arguments are valid."""
|
||||
try:
|
||||
from bs4 import BeautifulSoup # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The 'beautifulsoup4' package is required to use this tool."
|
||||
" Please install it with 'pip install beautifulsoup4'."
|
||||
)
|
||||
return values
|
||||
|
||||
def _run(self, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:
|
||||
"""Use the tool."""
|
||||
# Use Beautiful Soup since it's faster than looping through the elements
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
if self.sync_browser is None:
|
||||
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||
|
||||
page = get_current_page(self.sync_browser)
|
||||
html_content = page.content()
|
||||
|
||||
# Parse the HTML content with BeautifulSoup
|
||||
soup = BeautifulSoup(html_content, "lxml")
|
||||
|
||||
return " ".join(text for text in soup.stripped_strings)
|
||||
|
||||
async def _arun(
|
||||
self, run_manager: Optional[AsyncCallbackManagerForToolRun] = None
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
if self.async_browser is None:
|
||||
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||
# Use Beautiful Soup since it's faster than looping through the elements
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
page = await aget_current_page(self.async_browser)
|
||||
html_content = await page.content()
|
||||
|
||||
# Parse the HTML content with BeautifulSoup
|
||||
soup = BeautifulSoup(html_content, "lxml")
|
||||
|
||||
return " ".join(text for text in soup.stripped_strings)
|
||||
@@ -0,0 +1,111 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import TYPE_CHECKING, List, Optional, Sequence, Type
|
||||
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain_community.tools.playwright.base import BaseBrowserTool
|
||||
from langchain_community.tools.playwright.utils import (
|
||||
aget_current_page,
|
||||
get_current_page,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from playwright.async_api import Page as AsyncPage
|
||||
from playwright.sync_api import Page as SyncPage
|
||||
|
||||
|
||||
class GetElementsToolInput(BaseModel):
|
||||
"""Input for GetElementsTool."""
|
||||
|
||||
selector: str = Field(
|
||||
...,
|
||||
description="CSS selector, such as '*', 'div', 'p', 'a', #id, .classname",
|
||||
)
|
||||
attributes: List[str] = Field(
|
||||
default_factory=lambda: ["innerText"],
|
||||
description="Set of attributes to retrieve for each element",
|
||||
)
|
||||
|
||||
|
||||
async def _aget_elements(
|
||||
page: AsyncPage, selector: str, attributes: Sequence[str]
|
||||
) -> List[dict]:
|
||||
"""Get elements matching the given CSS selector."""
|
||||
elements = await page.query_selector_all(selector)
|
||||
results = []
|
||||
for element in elements:
|
||||
result = {}
|
||||
for attribute in attributes:
|
||||
if attribute == "innerText":
|
||||
val: Optional[str] = await element.inner_text()
|
||||
else:
|
||||
val = await element.get_attribute(attribute)
|
||||
if val is not None and val.strip() != "":
|
||||
result[attribute] = val
|
||||
if result:
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
|
||||
def _get_elements(
|
||||
page: SyncPage, selector: str, attributes: Sequence[str]
|
||||
) -> List[dict]:
|
||||
"""Get elements matching the given CSS selector."""
|
||||
elements = page.query_selector_all(selector)
|
||||
results = []
|
||||
for element in elements:
|
||||
result = {}
|
||||
for attribute in attributes:
|
||||
if attribute == "innerText":
|
||||
val: Optional[str] = element.inner_text()
|
||||
else:
|
||||
val = element.get_attribute(attribute)
|
||||
if val is not None and val.strip() != "":
|
||||
result[attribute] = val
|
||||
if result:
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
|
||||
class GetElementsTool(BaseBrowserTool):
|
||||
"""Tool for getting elements in the current web page matching a CSS selector."""
|
||||
|
||||
name: str = "get_elements"
|
||||
description: str = (
|
||||
"Retrieve elements in the current web page matching the given CSS selector"
|
||||
)
|
||||
args_schema: Type[BaseModel] = GetElementsToolInput
|
||||
|
||||
def _run(
|
||||
self,
|
||||
selector: str,
|
||||
attributes: Sequence[str] = ["innerText"],
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
if self.sync_browser is None:
|
||||
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||
page = get_current_page(self.sync_browser)
|
||||
# Navigate to the desired webpage before using this tool
|
||||
results = _get_elements(page, selector, attributes)
|
||||
return json.dumps(results, ensure_ascii=False)
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
selector: str,
|
||||
attributes: Sequence[str] = ["innerText"],
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
if self.async_browser is None:
|
||||
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||
page = await aget_current_page(self.async_browser)
|
||||
# Navigate to the desired webpage before using this tool
|
||||
results = await _aget_elements(page, selector, attributes)
|
||||
return json.dumps(results, ensure_ascii=False)
|
||||
@@ -0,0 +1,83 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Type
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
from langchain_community.tools.playwright.base import BaseBrowserTool
|
||||
from langchain_community.tools.playwright.utils import (
|
||||
aget_current_page,
|
||||
get_current_page,
|
||||
)
|
||||
|
||||
|
||||
class NavigateToolInput(BaseModel):
|
||||
"""Input for NavigateToolInput."""
|
||||
|
||||
url: str = Field(..., description="url to navigate to")
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_url_scheme(cls, values: dict) -> dict:
|
||||
"""Check that the URL scheme is valid."""
|
||||
url = values.get("url")
|
||||
parsed_url = urlparse(url)
|
||||
if parsed_url.scheme not in ("http", "https"):
|
||||
raise ValueError("URL scheme must be 'http' or 'https'")
|
||||
return values
|
||||
|
||||
|
||||
class NavigateTool(BaseBrowserTool):
|
||||
"""Tool for navigating a browser to a URL.
|
||||
|
||||
**Security Note**: This tool provides code to control web-browser navigation.
|
||||
|
||||
This tool can navigate to any URL, including internal network URLs, and
|
||||
URLs exposed on the server itself.
|
||||
|
||||
However, if exposing this tool to end-users, consider limiting network
|
||||
access to the server that hosts the agent.
|
||||
|
||||
By default, the URL scheme has been limited to 'http' and 'https' to
|
||||
prevent navigation to local file system URLs (or other schemes).
|
||||
|
||||
If access to the local file system is required, consider creating a custom
|
||||
tool or providing a custom args_schema that allows the desired URL schemes.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
name: str = "navigate_browser"
|
||||
description: str = "Navigate a browser to the specified URL"
|
||||
args_schema: Type[BaseModel] = NavigateToolInput
|
||||
|
||||
def _run(
|
||||
self,
|
||||
url: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
if self.sync_browser is None:
|
||||
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||
page = get_current_page(self.sync_browser)
|
||||
response = page.goto(url)
|
||||
status = response.status if response else "unknown"
|
||||
return f"Navigating to {url} returned status code {status}"
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
url: str,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
if self.async_browser is None:
|
||||
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||
page = await aget_current_page(self.async_browser)
|
||||
response = await page.goto(url)
|
||||
status = response.status if response else "unknown"
|
||||
return f"Navigating to {url} returned status code {status}"
|
||||
@@ -0,0 +1,60 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Type
|
||||
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain_community.tools.playwright.base import BaseBrowserTool
|
||||
from langchain_community.tools.playwright.utils import (
|
||||
aget_current_page,
|
||||
get_current_page,
|
||||
)
|
||||
|
||||
|
||||
class NavigateBackToolInput(BaseModel):
|
||||
"""Explicit no-args input for NavigateBackTool."""
|
||||
|
||||
|
||||
class NavigateBackTool(BaseBrowserTool):
|
||||
"""Navigate back to the previous page in the browser history."""
|
||||
|
||||
name: str = "previous_webpage"
|
||||
description: str = "Navigate back to the previous page in the browser history"
|
||||
args_schema: Type[BaseModel] = NavigateBackToolInput
|
||||
|
||||
def _run(self, run_manager: Optional[CallbackManagerForToolRun] = None) -> str:
|
||||
"""Use the tool."""
|
||||
if self.sync_browser is None:
|
||||
raise ValueError(f"Synchronous browser not provided to {self.name}")
|
||||
page = get_current_page(self.sync_browser)
|
||||
response = page.go_back()
|
||||
|
||||
if response:
|
||||
return (
|
||||
f"Navigated back to the previous page with URL '{response.url}'."
|
||||
f" Status code {response.status}"
|
||||
)
|
||||
else:
|
||||
return "Unable to navigate back; no previous page in the history"
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
if self.async_browser is None:
|
||||
raise ValueError(f"Asynchronous browser not provided to {self.name}")
|
||||
page = await aget_current_page(self.async_browser)
|
||||
response = await page.go_back()
|
||||
|
||||
if response:
|
||||
return (
|
||||
f"Navigated back to the previous page with URL '{response.url}'."
|
||||
f" Status code {response.status}"
|
||||
)
|
||||
else:
|
||||
return "Unable to navigate back; no previous page in the history"
|
||||
@@ -0,0 +1,105 @@
|
||||
"""Utilities for the Playwright browser tools."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import TYPE_CHECKING, Any, Coroutine, List, Optional, TypeVar
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from playwright.async_api import Browser as AsyncBrowser
|
||||
from playwright.async_api import Page as AsyncPage
|
||||
from playwright.sync_api import Browser as SyncBrowser
|
||||
from playwright.sync_api import Page as SyncPage
|
||||
|
||||
|
||||
async def aget_current_page(browser: AsyncBrowser) -> AsyncPage:
|
||||
"""
|
||||
Asynchronously get the current page of the browser.
|
||||
|
||||
Args:
|
||||
browser: The browser (AsyncBrowser) to get the current page from.
|
||||
|
||||
Returns:
|
||||
AsyncPage: The current page.
|
||||
"""
|
||||
if not browser.contexts:
|
||||
context = await browser.new_context()
|
||||
return await context.new_page()
|
||||
context = browser.contexts[0] # Assuming you're using the default browser context
|
||||
if not context.pages:
|
||||
return await context.new_page()
|
||||
# Assuming the last page in the list is the active one
|
||||
return context.pages[-1]
|
||||
|
||||
|
||||
def get_current_page(browser: SyncBrowser) -> SyncPage:
|
||||
"""
|
||||
Get the current page of the browser.
|
||||
Args:
|
||||
browser: The browser to get the current page from.
|
||||
|
||||
Returns:
|
||||
SyncPage: The current page.
|
||||
"""
|
||||
if not browser.contexts:
|
||||
context = browser.new_context()
|
||||
return context.new_page()
|
||||
context = browser.contexts[0] # Assuming you're using the default browser context
|
||||
if not context.pages:
|
||||
return context.new_page()
|
||||
# Assuming the last page in the list is the active one
|
||||
return context.pages[-1]
|
||||
|
||||
|
||||
def create_async_playwright_browser(
|
||||
headless: bool = True, args: Optional[List[str]] = None
|
||||
) -> AsyncBrowser:
|
||||
"""
|
||||
Create an async playwright browser.
|
||||
|
||||
Args:
|
||||
headless: Whether to run the browser in headless mode. Defaults to True.
|
||||
args: arguments to pass to browser.chromium.launch
|
||||
|
||||
Returns:
|
||||
AsyncBrowser: The playwright browser.
|
||||
"""
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
browser = run_async(async_playwright().start())
|
||||
return run_async(browser.chromium.launch(headless=headless, args=args))
|
||||
|
||||
|
||||
def create_sync_playwright_browser(
|
||||
headless: bool = True, args: Optional[List[str]] = None
|
||||
) -> SyncBrowser:
|
||||
"""
|
||||
Create a playwright browser.
|
||||
|
||||
Args:
|
||||
headless: Whether to run the browser in headless mode. Defaults to True.
|
||||
args: arguments to pass to browser.chromium.launch
|
||||
|
||||
Returns:
|
||||
SyncBrowser: The playwright browser.
|
||||
"""
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
browser = sync_playwright().start()
|
||||
return browser.chromium.launch(headless=headless, args=args)
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def run_async(coro: Coroutine[Any, Any, T]) -> T:
|
||||
"""Run an async coroutine.
|
||||
|
||||
Args:
|
||||
coro: The coroutine to run. Coroutine[Any, Any, T]
|
||||
|
||||
Returns:
|
||||
T: The result of the coroutine.
|
||||
"""
|
||||
event_loop = asyncio.get_event_loop()
|
||||
return event_loop.run_until_complete(coro)
|
||||
Reference in New Issue
Block a user