agent操作本机电脑工具函数

import time
import aiohttp
import asyncio
import logging
from typing import Optional, Dict
import os
from backend.agent.agentpress.tool import Tool, ToolResult, openapi_schema, usage_example
import pyautogui
from pynput.keyboard import Controller as KeyboardController
from pynput.mouse import Controller as MouseController

KEYBOARD_KEYS = [
    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
    'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
    'enter', 'esc', 'backspace', 'tab', 'space', 'delete',
    'ctrl', 'alt', 'shift', 'win',
    'up', 'down', 'left', 'right',
    'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12',
    'ctrl+c', 'ctrl+v', 'ctrl+x', 'ctrl+z', 'ctrl+a', 'ctrl+s',
    'alt+tab', 'alt+f4', 'ctrl+alt+delete'
]

class ComputerUseTool(Tool):
    """Computer automation tool for controlling the local computer and GUI."""

    def __init__(self, project_id: str, thread_manager):
        """Initialize automation tool with sandbox connection."""
        super().__init__()
        self.project_id = project_id
        self.thread_manager = thread_manager
        self.mouse_x = 0  # Track current mouse position
        self.mouse_y = 0
        self.keyboard = KeyboardController()
        self.mouse = MouseController()
        logging.info(f"Initialized Computer Use Tool")


    @openapi_schema({
        "type": "function",
        "function": {
            "name": "move_to",
            "description": "Move cursor to specified position",
            "parameters": {
                "type": "object",
                "properties": {
                    "x": {"type": "number", "description": "X coordinate"},
                    "y": {"type": "number", "description": "Y coordinate"}
                },
                "required": ["x", "y"]
            }
        }
    })
    @usage_example('''
        <function_calls>
        <invoke name="move_to">
        <parameter name="x">100</parameter>
        <parameter name="y">200</parameter>
        </invoke>
        </function_calls>
        ''')
    async def move_to(self, x: float, y: float) -> ToolResult:
        """Move cursor to specified position."""
        try:
            x_int = int(round(float(x)))
            y_int = int(round(float(y)))
            pyautogui.moveTo(x_int, y_int)
            self.mouse_x = x_int
            self.mouse_y = y_int
            return ToolResult(success=True, output=f"Moved to ({x_int}, {y_int})")
        except Exception as e:
            return ToolResult(success=False, output=f"Failed to move: {str(e)}")

    @openapi_schema({
        "type": "function",
        "function": {
            "name": "click",
            "description": "Click at current or specified position",
            "parameters": {
                "type": "object",
                "properties": {
                    "button": {"type": "string", "description": "Mouse button to click", "enum": ["left", "right", "middle"], "default": "left"},
                    "x": {"type": "number", "description": "Optional X coordinate"},
                    "y": {"type": "number", "description": "Optional Y coordinate"},
                    "num_clicks": {"type": "integer", "description": "Number of clicks", "enum": [1, 2, 3], "default": 1}
                }
            }
        }
    })
    @usage_example('''
        <function_calls>
        <invoke name="click">
        <parameter name="x">100</parameter>
        <parameter name="y">200</parameter>
        <parameter name="button">left</parameter>
        <parameter name="num_clicks">1</parameter>
        </invoke>
        </function_calls>
        ''')
    async def click(self, x: Optional[float] = None, y: Optional[float] = None, button: str = "left", num_clicks: int = 1) -> ToolResult:
        """Click at current or specified position."""
        try:
            x_val = int(round(float(x))) if x is not None else self.mouse_x
            y_val = int(round(float(y))) if y is not None else self.mouse_y
            pyautogui.click(x=x_val, y=y_val, clicks=num_clicks, button=button)
            self.mouse_x = x_val
            self.mouse_y = y_val
            return ToolResult(success=True, output=f"{num_clicks} {button} click(s) performed at ({x_val}, {y_val})")
        except Exception as e:
            return ToolResult(success=False, output=f"Failed to click: {str(e)}")

    @openapi_schema({
        "type": "function",
        "function": {
            "name": "scroll",
            "description": "Scroll the mouse wheel at current position",
            "parameters": {
                "type": "object",
                "properties": {
                    "amount": {"type": "integer", "description": "Scroll amount (positive for up, negative for down)", "minimum": -10, "maximum": 10}
                },
                "required": ["amount"]
            }
        }
    })
    @usage_example('''
        <function_calls>
        <invoke name="scroll">
        <parameter name="amount">-3</parameter>
        </invoke>
        </function_calls>
        ''')
    async def scroll(self, amount: int) -> ToolResult:
        """
        Scroll the mouse wheel at current position.
        Positive values scroll up, negative values scroll down.
        """
        try:
            amount = int(float(amount))
            amount = max(-10, min(10, amount))
            pyautogui.scroll(amount * 100)  # 100像素为1步
            return ToolResult(success=True, output=f"Scrolled {'up' if amount > 0 else 'down'} {abs(amount)} step(s) at position ({self.mouse_x}, {self.mouse_y})")
        except Exception as e:
            return ToolResult(success=False, output=f"Failed to scroll: {str(e)}")

    @openapi_schema({
        "type": "function",
        "function": {
            "name": "typing",
            "description": "Type specified text",
            "parameters": {
                "type": "object",
                "properties": {
                    "text": {"type": "string", "description": "Text to type"}
                },
                "required": ["text"]
            }
        }
    })
    @usage_example('''
        <function_calls>
        <invoke name="typing">
        <parameter name="text">Hello World!</parameter>
        </invoke>
        </function_calls>
        ''')
    async def typing(self, text: str) -> ToolResult:
        """Type specified text."""
        try:
            pyautogui.write(str(text), interval=0.01)
            return ToolResult(success=True, output=f"Typed: {text}")
        except Exception as e:
            return ToolResult(success=False, output=f"Failed to type: {str(e)}")

    @openapi_schema({
        "type": "function",
        "function": {
            "name": "press",
            "description": "Press and release a key",
            "parameters": {
                "type": "object",
                "properties": {
                    "key": {"type": "string", "description": "Key to press", "enum": KEYBOARD_KEYS}
                },
                "required": ["key"]
            }
        }
    })
    @usage_example('''
        <function_calls>
        <invoke name="press">
        <parameter name="key">enter</parameter>
        </invoke>
        </function_calls>
        ''')
    async def press(self, key: str) -> ToolResult:
        """Press and release a key."""
        try:
            pyautogui.press(str(key).lower())
            return ToolResult(success=True, output=f"Pressed key: {key}")
        except Exception as e:
            return ToolResult(success=False, output=f"Failed to press key: {str(e)}")

    @openapi_schema({
        "type": "function",
        "function": {
            "name": "wait",
            "description": "Wait for specified duration",
            "parameters": {
                "type": "object",
                "properties": {
                    "duration": {"type": "number", "description": "Duration in seconds", "default": 0.5}
                }
            }
        }
    })
    @usage_example('''
        <function_calls>
        <invoke name="wait">
        <parameter name="duration">1.5</parameter>
        </invoke>
        </function_calls>
        ''')
    async def wait(self, duration: float = 0.5) -> ToolResult:
        """Wait for specified duration."""
        try:
            duration = float(duration)
            duration = max(0, min(10, duration))
            await asyncio.sleep(duration)
            return ToolResult(success=True, output=f"Waited {duration} seconds")
        except Exception as e:
            return ToolResult(success=False, output=f"Failed to wait: {str(e)}")

    @openapi_schema({
        "type": "function",
        "function": {
            "name": "mouse_down",
            "description": "Press a mouse button",
            "parameters": {
                "type": "object",
                "properties": {
                    "button": {"type": "string",
                               "description": "Mouse button to press",
                               "enum": ["left", "right", "middle"],
                               "default": "left"
                    }
                }
            }
        }
    })
    @usage_example('''
        <function_calls>
        <invoke name="mouse_down">
        <parameter name="button">left</parameter>
        </invoke>
        </function_calls>
        ''')
    async def mouse_down(self, button: str = "left", x: Optional[float] = None, y: Optional[float] = None) -> ToolResult:
        """Press a mouse button at current or specified position."""
        try:
            x_val = int(round(float(x))) if x is not None else self.mouse_x
            y_val = int(round(float(y))) if y is not None else self.mouse_y
            pyautogui.mouseDown(x=x_val, y=y_val, button=button)
            self.mouse_x = x_val
            self.mouse_y = y_val
            return ToolResult(success=True, output=f"{button} button pressed at ({x_val}, {y_val})")
        except Exception as e:
            return ToolResult(success=False, output=f"Failed to press button: {str(e)}")

    @openapi_schema({
        "type": "function",
        "function": {
            "name": "mouse_up",
            "description": "Release a mouse button",
            "parameters": {
                "type": "object",
                "properties": {
                    "button": {"type": "string", "description": "Mouse button to release", "enum": ["left", "right", "middle"], "default": "left"}
                }
            }
        }
    })
    @usage_example('''
        <function_calls>
        <invoke name="mouse_up">
        <parameter name="button">left</parameter>
        </invoke>
        </function_calls>
        ''')
    async def mouse_up(self, button: str = "left", x: Optional[float] = None, y: Optional[float] = None) -> ToolResult:
        """Release a mouse button at current or specified position."""
        try:
            x_val = int(round(float(x))) if x is not None else self.mouse_x
            y_val = int(round(float(y))) if y is not None else self.mouse_y
            pyautogui.mouseUp(x=x_val, y=y_val, button=button)
            self.mouse_x = x_val
            self.mouse_y = y_val
            return ToolResult(success=True, output=f"{button} button released at ({x_val}, {y_val})")
        except Exception as e:
            return ToolResult(success=False, output=f"Failed to release button: {str(e)}")

    @openapi_schema({
        "type": "function",
        "function": {
            "name": "drag_to",
            "description": "Drag cursor to specified position",
            "parameters": {
                "type": "object",
                "properties": {
                    "x": {"type": "number", "description": "Target X coordinate"},
                    "y": {"type": "number", "description": "Target Y coordinate"}
                },
                "required": ["x", "y"]
            }
        }
    })
    @usage_example('''
        <function_calls>
        <invoke name="drag_to">
        <parameter name="x">500</parameter>
        <parameter name="y">50</parameter>
        </invoke>
        </function_calls>
        ''')
    async def drag_to(self, x: float, y: float) -> ToolResult:
        """Click and drag from current position to target position."""
        try:
            target_x = int(round(float(x)))
            target_y = int(round(float(y)))
            pyautogui.dragTo(target_x, target_y, duration=0.3, button="left")
            self.mouse_x = target_x
            self.mouse_y = target_y
            return ToolResult(success=True, output=f"Dragged to ({target_x}, {target_y})")
        except Exception as e:
            return ToolResult(success=False, output=f"Failed to drag: {str(e)}")

    async def get_screenshot_base64(self) -> Optional[dict]:
        """Capture screen and return as base64 encoded image."""
        try:
            import io, base64
            from PIL import ImageGrab
            img = ImageGrab.grab()
            buf = io.BytesIO()
            img.save(buf, format='PNG')
            base64_str = base64.b64encode(buf.getvalue()).decode()
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            screenshots_dir = "screenshots"
            if not os.path.exists(screenshots_dir):
                os.makedirs(screenshots_dir)
            timestamped_filename = os.path.join(screenshots_dir, f"screenshot_{timestamp}.png")
            img.save(timestamped_filename)
            latest_filename = "latest_screenshot.png"
            img.save(latest_filename)
            return {
                "content_type": "image/png",
                "base64": base64_str,
                "timestamp": timestamp,
                "filename": timestamped_filename
            }
        except Exception as e:
            print(f"[Screenshot] Error during screenshot process: {str(e)}")
            return None

    @openapi_schema({
        "type": "function",
        "function": {
            "name": "hotkey",
            "description": "Press a key combination",
            "parameters": {
                "type": "object",
                "properties": {
                    "keys": {"type": "string", "description": "Key combination to press", "enum": KEYBOARD_KEYS}
                },
                "required": ["keys"]
            }
        }
    })
    @usage_example('''
        <function_calls>
        <invoke name="hotkey">
        <parameter name="keys">ctrl+a</parameter>
        </invoke>
        </function_calls>
        ''')
    async def hotkey(self, keys: str) -> ToolResult:
        """Press a key combination."""
        try:
            key_sequence = [k.strip() for k in keys.lower().split('+')]
            pyautogui.hotkey(*key_sequence)
            return ToolResult(success=True, output=f"Pressed key combination: {keys}")
        except Exception as e:
            return ToolResult(success=False, output=f"Failed to press keys: {str(e)}")


# 示例用法(仅调试)
if __name__ == "__main__":
    print("This module should be imported, not run directly.")
    import asyncio

    async def demo():
        tool = ComputerUseTool()
        print(await tool.move_to(200, 300))
        print(await tool.click())
        print(await tool.typing("Hello, world!"))
        print(await tool.scroll(-2))
        print(await tool.hotkey("ctrl+s"))
        print(await tool.get_screenshot_base64())

    asyncio.run(demo())


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值