From 7249a812f5ebd8bbeff96ee4ebb70f74185c043a Mon Sep 17 00:00:00 2001 From: Daniel Rosel Date: Tue, 4 Nov 2025 18:44:06 +0100 Subject: [PATCH] feat: new implementation of simple AI agent that can follow a goal and return --- experiments/agents/agent.py | 62 ++++++++++++++++++++++++------------- experiments/agents/base.py | 34 ++++++++------------ requirements.txt | 1 + 3 files changed, 54 insertions(+), 43 deletions(-) diff --git a/experiments/agents/agent.py b/experiments/agents/agent.py index f0dadb3..bf6ff67 100644 --- a/experiments/agents/agent.py +++ b/experiments/agents/agent.py @@ -1,26 +1,44 @@ -from base import Agent +from os import environ +from base import Agent as BaseAgent +from browser_use import Browser, Agent, ChatOpenAI +from enum import Enum +class AgentTypes(str, Enum): + GENERIC_BROWSER_USE_AGENT = "generic_browser_use_agent" -class GenericComputerUseAgent(Agent): - # TODO: implement code using computer-use library - """ - async def do_job(jtbd): - browser = Browser(headless=True) - llm = ChatOpenAI(model="gpt-5-mini") - task = "You are an agent for doing anything on http://localhost:3000 (wait for the website to load) for the user. The user wants you to do the following job: "+jtbd + ". Use the browser to do it. Be careful to follow all instructions and complete the job fully. You have the power to purchase or to do anything." - agent = Agent(task=task, llm=llm, browser=browser) - return await agent.run() +def _build_prompt(goal : str, environment_url : str) -> str: + #TODO: Improve prompt engineering here and experiment with + return f"""You are an autonomous agent tasked with achieving the following goal: {goal} +You have access to a web browser to interact with the environment at {environment_url}. +Use the browser to navigate, gather information, and perform actions necessary to accomplish your goal. +Be thorough and ensure you complete the task fully.""" + +class GenericBrowserUseAgent(BaseAgent): + def __init__(self, + goal: str, + url: str = "http://localhost:3000", + timeout: int = 300, + llm_model: str = "gpt-5-mini", + headless: bool = True): + super().__init__(goal, url, timeout) + self.llm_model = ChatOpenAI(model=llm_model) + self.browser = Browser(headless=headless) + self.agent = Agent(task=_build_prompt(goal, url), + llm=self.llm_model, + browser=self.browser) + async def act(self) -> str: + self.result = await self.agent.run() + return self.result.final_result() + +def get_agent(agent_type: AgentTypes, **kwargs) -> Agent: + if agent_type == AgentTypes.GENERIC_BROWSER_USE_AGENT: + return GenericBrowserUseAgent(**kwargs) + else: + raise ValueError(f"Unknown agent type: {agent_type}") if __name__ == "__main__": - JTBD= "Book the best room" - R=asyncio.run(do_job(JTBD)) - print(R.final_result()) - """ - pass - - - - -def get_agent(): - # construct - pass + import asyncio + JTBD= "Name of the company of this website" + agent = get_agent(AgentTypes.GENERIC_BROWSER_USE_AGENT, goal=JTBD, url="https://ie.edu", timeout=300) + R=asyncio.run(agent.act()) + print(R) diff --git a/experiments/agents/base.py b/experiments/agents/base.py index 87c0ad0..d9800e5 100644 --- a/experiments/agents/base.py +++ b/experiments/agents/base.py @@ -1,27 +1,19 @@ -import asyncio +from abc import ABC, abstractmethod +from typing import Optional -class Agent: - def __init__(self, - goal : str = "Get Information on All Prices", - environment_url : str = "https://www.example.com", - timeout : int = 60 * 5): +class Agent(ABC): + """Base interface for browser automation agents""" + + def __init__(self, goal: str, url: str = "http://localhost:3000", timeout: int = 300): self.goal = goal - self.environment_url = environment_url + self.url = url self.timeout = timeout - self.result = None - # TODO: implement agent initialization + self.result: Optional[str] = None + + @abstractmethod + async def act(self) -> str: + """Execute goal and return result text""" pass - async def act(self): - # set the self.result to whatever text result the agents returns - pass # return await _async_method() - def final_result(self) -> str|None: + def final_result(self) -> Optional[str]: return self.result - - -# asyncio.run(Agent(...).act()) -if __name__ == "__main__": - print("Testing Agent...") - agent = Agent(goal="Find the best price for a laptop", environment_url="https://www.example.com") - asyncio.run(agent.act()) - print(f"Agent Result: {agent.final_result()}") diff --git a/requirements.txt b/requirements.txt index 99cb58e..50d8559 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ jupyter ipykernel matplotlib graphviz +browser-use