Lybic Docs

使用指南

Lybic Python SDK 使用指南

Lybic Python SDK 提供了一组类,用于与 Lybic API 进行交互。

将 LLM 的输出转换为 pyautogui 格式

为了便于执行由大型语言模型(LLM)生成的 GUI 自动化脚本(这些脚本通常使用流行的 pyautogui 库进行训练),Lybic SDK 提供了一个 Pyautogui 兼容类。此类复刻了 pyautogui 接口,允许您几乎不用更改,执行 LLM 生成的代码。

使用方法

首先,初始化 LybicClient,然后创建一个 Pyautogui 实例,将其绑定到特定沙箱。然后您可以像使用 pyautogui 模块一样使用此实例。

import asyncio
from lybic import LybicClient, Pyautogui

async def main():
    async with LybicClient() as client:
        # 假设您有一个沙箱
        sandbox_id = "your_sandbox_id"

        # 创建 Pyautogui 实例
        pyautogui = Pyautogui(client, sandbox_id)

        # 现在您可以执行 pyautogui 风格的命令
        # 例如,如果 LLM 输出以下字符串:
        llm_output = "pyautogui.moveTo(100, 150)"

        # 您可以这样执行:
        # 警告:对不受信任的输入使用 eval() 存在安全风险。
        # 始终清理和验证 LLM 输出。
        eval(llm_output)

        # 或直接调用方法
        pyautogui.click(x=200, y=200)
        pyautogui.write("Hello from Lybic!")
        pyautogui.press("enter")

if __name__ == "__main__":
    asyncio.run(main())

特殊场景:以同步模式运行

import asyncio
from lybic import LybicClient, Pyautogui

sandbox_id = "your_sandbox_id"
llm_output = "pyautogui.moveTo(100, 150)"

client = LybicClient()
pyautogui = Pyautogui(client, sandbox_id)

# 警告:对不受信任的输入使用 eval() 存在安全风险。
# 始终清理和验证 LLM 输出。
eval(llm_output)

# 建议:您需要手动管理对象生命周期
pyautogui.close()
asyncio.run(client.close())

支持的函数

lybic.Pyautogui 类支持最常用的 pyautogui 函数的子集。

函数支持备注
position()
moveTo()
move()
click()
rightClick()
middleClick()
doubleClick()
tripleClick()
dragTo()仅支持左键拖拽
scroll()
write()typewrite() 的封装。
typewrite()支持字符串和字符串列表。
press()支持单键和多键。
hotkey()
keyDown()
keyUp()

组织统计

Stats 用于获取描述组织统计信息。

获取组织统计

  • 方法get()
  • 参数:无
  • 返回dto.StatsResponseDto
import asyncio
from lybic import LybicClient, Stats

async def main():
    async with LybicClient() as client:
        stats = Stats(client)
        result = await stats.get()
        print(result)

if __name__ == '__main__':
    asyncio.run(main())

示例输出:

mcpServers=3 sandboxes=8 projects=4

用于沙箱管理的 Lybic 项目

Project 是一个用于描述项目并管理其沙箱的类。

列出所有项目

  • 方法list()
  • 参数:无
  • 返回list[dto.ProjectResponseDto]
import asyncio
from lybic import LybicClient, Project

async def main():
    async with LybicClient() as client:
        project = Project(client)
        list_result = await project.list()
        for p in list_result:
            print(p)

if __name__ == '__main__':
    asyncio.run(main())

示例输出:

id='PRJ-xxxx' name='test_project' createdAt='2025-07-10T08:03:36.375Z' defaultProject=False
id='PRJ-xxxx' name='Default Project' createdAt='2025-07-08T16:42:30.226Z' defaultProject=True

创建项目

  • 方法create(data: dto.CreateProjectDto)
  • 参数
    • name (str):项目名称。
  • 返回dto.SingleProjectResponseDto
import asyncio
from lybic import LybicClient, Project

async def main():
    async with LybicClient() as client:
        project = Project(client)
        new_project = await project.create(name="test_project")
        print(new_project)

if __name__ == '__main__':
    asyncio.run(main())

删除项目

  • 方法delete(project_id: str)
  • 参数
    • project_id (str):要删除的项目 ID。
  • 返回None
import asyncio
from lybic import LybicClient, Project

async def main():
    async with LybicClient() as client:
        project = Project(client)
        await project.delete(project_id="PRJ-xxxx")

if __name__ == '__main__':
    asyncio.run(main())

沙箱管理

Sandbox 提供用于管理和交互沙箱的方法。

列出所有沙箱

  • 方法list()
  • 参数:无
  • 返回list[dto.SandboxResponseDto]
import asyncio
from lybic import LybicClient, Sandbox

async def main():
    async with LybicClient() as client:
        sandbox = Sandbox(client)
        sandboxes = await sandbox.list()
        for s in sandboxes:
            print(s)

if __name__ == '__main__':
    asyncio.run(main())

创建新沙箱

  • 方法create(data: dto.CreateSandboxDto)
  • 参数
    • shape (str,必需):沙箱规格。
    • name (str, 可选):沙箱名称。
    • maxLifeSeconds (int, 可选):生命周期(秒)(默认值:3600)。
    • projectId (str, 可选):项目 ID。
  • 返回dto.Sandbox
import asyncio
from lybic import LybicClient, Sandbox

async def main():
    async with LybicClient() as client:
        sandbox = Sandbox(client)
        new_sandbox = await sandbox.create(name="my-sandbox",shape="standard-4c8g")
        print(new_sandbox)

if __name__ == '__main__':
    asyncio.run(main())

获取特定沙箱

  • 方法get(sandbox_id: str)
  • 参数
    • sandbox_id (str):沙箱 ID。
  • 返回dto.GetSandboxResponseDto
import asyncio
from lybic import LybicClient, Sandbox

async def main():
    async with LybicClient() as client:
        sandbox = Sandbox(client)
        details = await sandbox.get(sandbox_id="SBX-xxxx")
        print(details)

if __name__ == '__main__':
    asyncio.run(main())

删除沙箱

  • 方法delete(sandbox_id: str)
  • 参数
    • sandbox_id (str):要删除的沙箱 ID。
  • 返回None
import asyncio
from lybic import LybicClient, Sandbox

async def main():
    async with LybicClient() as client:
        sandbox = Sandbox(client)
        await sandbox.delete(sandbox_id="SBX-xxxx")

if __name__ == '__main__':
    asyncio.run(main())

获取沙箱截图

  • 方法get_screenshot(sandbox_id: str)
  • 参数
    • sandbox_id (str):沙箱 ID。
  • 返回tuple (screenshot_url, PIL.Image.Image, webp_image_base64_string)
import asyncio
from lybic import LybicClient, Sandbox

async def main():
    async with LybicClient() as client:
        sandbox = Sandbox(client)
        url, image, b64_str = await sandbox.get_screenshot(sandbox_id="SBX-xxxx")
        print(f"截图 URL: {url}")
        image.show()

if __name__ == '__main__':
    asyncio.run(main())

执行沙箱动作

此接口使 Planner 能够通过 Restful 调用在沙箱上执行动作。它支持Computer Use(电脑端使用)和Mobile Use (移动端使用)动作。

  • 方法execute_sandbox_action(sandbox_id: str, data: dto.ExecuteSandboxActionDto)execute_sandbox_action(sandbox_id: str, **kwargs)
  • 参数:- *sandbox_id: str 沙箱 ID - *data: class dto.ExecuteSandboxActionDto 要执行的动作
  • 返回:class dto.SandboxActionResponseDto
import asyncio
from lybic import dto, Sandbox, ComputerUse, LybicClient, LybicAuth
async def main():
   async with LybicClient(
     LybicAuth(
        org_id="ORG-xxxx",
        api_key="lysk-xxxxxxxxxxx",
        endpoint="https://api.lybic.cn/",
   )
 ) as client:
       computer_use = ComputerUse(client)
       parsed_result = await computer_use.parse_llm_output(
           model_type="ui-tars",
           llm_output="""Thought: The task requires double-left-clicking the "images" folder. In the File Explorer window, the "images" folder is visible under the Desktop directory. The target element is the folder named "images" with a yellow folder icon. Double-left-clicking this folder will open it.

       Next action: Left - double - click on the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.
       Action: left_double(point='<point>213 257</point>')"""
       )
       actions = parsed_result.actions
       if actions:
           sandbox = Sandbox(client)
           # 使用 DTO
           response = await sandbox.execute_sandbox_action(
               sandbox_id="SBX-xxxx",
               data=dto.ExecuteSandboxActionDto(action=actions[0])
           )
           print(response)
           # 使用关键字参数
           response_2 = await sandbox.execute_sandbox_action(
               sandbox_id="SBX-xxxx",
               action=actions[0]
           )
           print(response_2)
if __name__ == "__main__":
   asyncio.run(main())

电脑端使用

ComputerUse 是 Lybic ComputerUse API 的客户端,用于解析模型输出执行动作

将定位模型输出解析为电脑端动作

支持 ui-tarsseedglm-4.1vglm-4.5vqwen-2.5-vlpyautogui

对于相对坐标系(如 ui-tars),请使用 ui-tars

对于绝对坐标系(如 doubao-1.6-seed、openCUA),请使用 seed

对于其他情况,请根据模型类型或输出格式选择 model

如果您想解析模型输出,可以使用此方法。

  • 方法parse_llm_output(model_type,llm_output)
  • 参数
    • *model_type: ModelType|str 要使用的模型(例如,"ui-tars")
    • *llm_output: str 要解析的文本内容
  • 返回:class dto.ComputerUseActionResponseDto

示例:

  1. ui-tars / seed:如果您使用的模型是 "ui-tars" 或 "seed",提示词如下:
You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.

## Output Format

Thought: ...
Action: ...

## Action Space

click(point='<point>x1 y1</point>')
left_double(point='<point>x1 y1</point>')
right_single(point='<point>x1 y1</point>')
drag(start_point='<point>x1 y1</point>', end_point='<point>x2 y2</point>')
hotkey(key='ctrl c') # Split keys with a space and use lowercase. Also, do not use more than 3 keys in one hotkey action.
type(content='xxx') # Use escape characters \', \", and \n in content part to ensure we can parse the content in normal python string format. If you want to submit your input, use \n at the end of content.
scroll(point='<point>x1 y1</point>', direction='down or up or right or left') # Show more information on the `direction` side.
wait() #Sleep for 5s and take a screenshot to check for any changes.
finished(content='xxx') # Use escape characters \', \", and \n in content part to ensure we can parse the content in normal python string format.

## Note

- Use {language} in `Thought` part.
- Write a small plan and finally summarize your next action (with its target element) in one sentence in `Thought` part.

## User Instruction

{instruction}

模型输出如下:

Thought: The task requires double-left-clicking the "images" folder. In the File Explorer window, the "images" folder is visible under the Desktop directory. The target element is the folder named "images" with a yellow folder icon. Double-left-clicking this folder will open it.

Next action: Left - double - click on the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.
Action: left_double(point='<point>213 257</point>')

此 API 将解析此模型输出格式并返回电脑端使用的动作列表。

import asyncio
from lybic import LybicClient, dto, ComputerUse

async def main():
    async with LybicClient() as client:
        computer_use = ComputerUse(client)
        actions = await computer_use.parse_llm_output(
                model_type="ui-tars",
                llm_output="""Thought: The task requires double-left-clicking the "images" folder. In the File Explorer window, the "images" folder is visible under the Desktop directory. The target element is the folder named "images" with a yellow folder icon. Double-left-clicking this folder will open it.

Next action: Left - double - click on the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.
Action: left_double(point='<point>213 257</point>')"""
        )
        print(actions)

if __name__ == '__main__':
    asyncio.run(main())

它将输出类似下面的内容:(一个动作列表对象,长度为 1)

actions=[MouseDoubleClickAction(type='mouse:doubleClick', x=FractionalLength(type='/', numerator=213, denominator=1000), y=FractionalLength(type='/', numerator=257, denominator=1000), button=1)]
  1. GLM-4.1v:如果您使用的模型是 "glm-4.1v",提示词如下:
You are a GUI operation agent. You will be given a task and your action history, with recent screenshots. You should help me control the computer, output the best action step by step to accomplish the task.
The actions you output must be in the following action space:
left_click(start_box='[x,y]', element_info='')

# left single click at [x,y]

right_click(start_box='[x,y]', element_info='')

# right single click at [x,y]

middle_click(start_box='[x,y]', element_info='')

# middle single click at [x,y]

hover(start_box='[x,y]', element_info='')

# hover the mouse at [x,y]

left_double_click(start_box='[x,y]', element_info='')

# left double click at [x,y]

left_drag(start_box='[x1,y1]', end_box='[x2,y2]', element_info='')

# left drag from [x1,y1] to [x2,y2]

key(keys='')

# press a single key or a key combination/shortcut, if it's a key combination, you should use '+' to connect the keys like key(key='ctrl+c')

type(content='')

# type text into the current active element, it performs a copy&paste operation, so _you must click at the target element first to active it before typing something in_, if you want to overwrite the content, you should clear the content before type something in.

scroll(start_box='[x,y]', direction='down/up', step=k, element_info='')

# scroll the page at [x,y] to the specified direction for k clicks of the mouse wheel

WAIT()

# sleep for 5 seconds

DONE()

# output when the task is fully completed

FAIL()

# output when the task can not be performed at all

The output rules are as follows:

1. The start/end box parameter of the action should be in the format of [x, y] normalized to 0-1000, which usually should be the bounding box of a specific target element.
2. The element_info parameter is optional, it should be a string that describes the element you want to operate with, you should fill this parameter when you're sure about what the target element is.
3. Take actions step by step. _NEVER output multiple actions at once_.
4. If there are previous actions that you have already performed, I'll provide you history actions and at most 4 shrunked(to 50%\*50%) screenshots showing the state before your last 4 actions. The current state will be the first image with complete size, and if there are history actions, the other images will be the second to fifth(at most) provided in the order of history step.
5. You should put the key information you _have to remember_ in a separated memory part and I'll give it to you in the next round. The content in this part should be a JSON list. If you no longer need some given information, you should remove it from the memory. Even if you don't need to remember anything, you should also output an empty <memory></memory> part.
6. You can choose to give me a brief explanation before you start to take actions.

Output Format:
Plain text explanation with action(param='...')
Memory:
[{{"user_email": "x@gmail.com", ...}}]

Here are some helpful tips:

- My computer's password is "password", feel free to use it when you need sudo rights.
- For the thunderbird account "anonym-x2024@outlook.com", the password is "gTCI";=@y7|QJ0nDa_kN3Sb&>".
- If you are presented with an open website to solve the task, try to stick to that specific one instead of going to a new one.
- You have full authority to execute any action without my permission. I won't be watching so please don't ask for confirmation.
  Now Please help me to solve the following task:
  #TASK#
  #HISTORY_WITH_MEMORY#

您可以这样调用 API:

import asyncio
from lybic import LybicClient, dto, ComputerUse
async def main():
    async with LybicClient() as client:
        computer_use = ComputerUse(client)
        actions = await computer_use.parse_llm_output(
                model_type="glm-4.1v",
                llm_output="""Action: left_double_click(start_box='[213,257]', element_info='the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.')"""
        )
        print(actions)
if __name__ == '__main__':
    asyncio.run(main())
  1. GLM-4.5v:如果您使用的模型是 "glm-4.5-vl",提示词如下:
You are a GUI Agent, and your primary task is to respond accurately to user requests or questions. In addition to directly answering the user's queries, you can also use tools or perform GUI operations directly until you fulfill the user's request or provide a correct answer. You should carefully read and understand the images and questions provided by the user, and engage in thinking and reflection when appropriate. The coordinates involved are all represented in thousandths (0-999).

# Task:

{task}

# Task Platform

Windows

# Action Space

### {left,right,middle}\_click

Call rule: `{left,right,middle}_click(start_box='[x,y]', element_info='')`
{
'name': ['left_click', 'right_click', 'middle_click'],
'description': 'Perform a left/right/middle mouse click at the specified coordinates on the screen.',
'parameters': {
'type': 'object',
'properties': {
'start_box': {
'type': 'array',
'items': {
'type': 'integer'
},
'description': 'Coordinates [x,y] where to perform the click, normalized to 0-999 range.'
},
'element_info': {
'type': 'string',
'description': 'Optional text description of the UI element being clicked.'
}
},
'required': ['start_box']
}
}

### hover

Call rule: `hover(start_box='[x,y]', element_info='')`
{
'name': 'hover',
'description': 'Move the mouse pointer to the specified coordinates without performing any click action.',
'parameters': {
'type': 'object',
'properties': {
'start_box': {
'type': 'array',
'items': {
'type': 'integer'
},
'description': 'Coordinates [x,y] where to move the mouse pointer, normalized to 0-999 range.'
},
'element_info': {
'type': 'string',
'description': 'Optional text description of the UI element being hovered over.'
}
},
'required': ['start_box']
}
}

### left_double_click

Call rule: `left_double_click(start_box='[x,y]', element_info='')`
{
'name': 'left_double_click',
'description': 'Perform a left mouse double-click at the specified coordinates on the screen.',
'parameters': {
'type': 'object',
'properties': {
'start_box': {
'type': 'array',
'items': {
'type': 'integer'
},
'description': 'Coordinates [x,y] where to perform the double-click, normalized to 0-999 range.'
},
'element_info': {
'type': 'string',
'description': 'Optional text description of the UI element being double-clicked.'
}
},
'required': ['start_box']
}
}

### left_drag

Call rule: `left_drag(start_box='[x1,y1]', end_box='[x2,y2]', element_info='')`
{
'name': 'left_drag',
'description': 'Drag the mouse from starting coordinates to ending coordinates while holding the left mouse button.',
'parameters': {
'type': 'object',
'properties': {
'start_box': {
'type': 'array',
'items': {
'type': 'integer'
},
'description': 'Starting coordinates [x1,y1] for the drag operation, normalized to 0-999 range.'
},
'end_box': {
'type': 'array',
'items': {
'type': 'integer'
},
'description': 'Ending coordinates [x2,y2] for the drag operation, normalized to 0-999 range.'
},
'element_info': {
'type': 'string',
'description': 'Optional text description of the UI element being dragged.'
}
},
'required': ['start_box', 'end_box']
}
}

### key

Call rule: `key(keys='')`
{
'name': 'key',
'description': 'Simulate pressing a single key or combination of keys on the keyboard.',
'parameters': {
'type': 'object',
'properties': {
'keys': {
'type': 'string',
'description': 'The key or key combination to press. Use '+' to separate keys in combinations (e.g., 'ctrl+c', 'alt+tab').'
}
},
'required': ['keys']
}
}

### type

Call rule: `type(content='')`
{
'name': 'type',
'description': 'Type text content into the currently focused text input field. This action only performs typing and does not handle field activation or clearing.',
'parameters': {
'type': 'object',
'properties': {
'content': {
'type': 'string',
'description': 'The text content to be typed into the active text field.'
}
},
'required': ['content']
}
}

### scroll

Call rule: `scroll(start_box='[x,y]', direction='', step=5, element_info='')`
{
'name': 'scroll',
'description': 'Scroll an element at the specified coordinates in the specified direction by a given number of wheel steps.',
'parameters': {
'type': 'object',
'properties': {
'start_box': {
'type': 'array',
'items': {
'type': 'integer'
},
'description': 'Coordinates [x,y] of the element or area to scroll, normalized to 0-999 range.'
},
'direction': {
'type': 'string',
'enum': ['down', 'up'],
'description': 'The direction to scroll: 'down' or 'up'.'
},
'step': {
'type': 'integer',
'default': 5,
'description': 'Number of wheel steps to scroll, default is 5.'
},
'element_info': {
'type': 'string',
'description': 'Optional text description of the UI element being scrolled.'
}
},
'required': ['start_box', 'direction']
}
}

### WAIT

Call rule: `WAIT()`
{
'name': 'WAIT',
'description': 'Wait for 5 seconds before proceeding to the next action.',
'parameters': {
'type': 'object',
'properties': {},
'required': []
}
}

### DONE

Call rule: `DONE()`
{
'name': 'DONE',
'description': 'Indicate that the current task has been completed successfully and no further actions are needed.',
'parameters': {
'type': 'object',
'properties': {},
'required': []
}
}

### FAIL

Call rule: `FAIL()`
{
'name': 'FAIL',
'description': 'Indicate that the current task cannot be completed or is impossible to accomplish.',
'parameters': {
'type': 'object',
'properties': {},
'required': []
}
}

# Historical Actions and Current Memory

History:

Thought: {bot_thought}
Action: {action}
step {step_k+1}: Screenshot:

Memory:
{memory}

# Output Format

Plain text explanation with action(param='...')
Memory:
[{{"key": "value"}}, ...]

# Some Additional Notes

- I'll give you the most recent 4 history screenshots(shrunked to 50%\*50%) along with the historical action steps.
- You should put the key information you _have to remember_ in a seperated memory part and I'll give it to you in the next round. The content in this part should be a dict list. If you no longer need some given information, you should remove it from the memory. Even if you don't need to remember anything, you should also output an empty list.
- My computer's password is "password", feel free to use it when you need sudo rights.
- For the thunderbird account "anonym-x2024@outlook.com", the password is "gTCI";=@y7|QJ0nDa_kN3Sb&>".

Current Screenshot:

您可以这样调用 API:

import asyncio
from lybic import LybicClient, dto, ComputerUse
async def main():
    async with LybicClient() as client:
        computer_use = ComputerUse(client)
        actions = await computer_use.parse_llm_output(
                model_type="glm-4.5-vl",
                llm_output="""Action: left_double_click(start_box='[213,257]', element_info
              ='the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.')"""
        )
        print(actions)
if __name__ == '__main__':
    asyncio.run(main())
  1. qwen-2.5-vl:如果您使用的模型是 "qwen-2.5-vl",提示词如下:
# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name_for_human": "computer_use", "name": "computer_use", "description": "Use a mouse and keyboard to interact with a computer, and take screenshots.\n* This is an interface to a desktop GUI. You do not have access to a terminal or applications menu. You must click on desktop icons to start applications.\n* Some applications may take time to start or process actions, so you may need to wait and take successive screenshots to see the results of your actions. E.g. if you click on Firefox and a window doesn't open, try wait and taking another screenshot.\n* The screen's resolution is 1280x720.\n* Whenever you intend to move the cursor to click on an element like an icon, you should consult a screenshot to determine the coordinates of the element before moving the cursor.\n* If you tried clicking on a program or link but it failed to load, even after waiting, try adjusting your cursor position so that the tip of the cursor visually falls on the element that you want to click.\n* Make sure to click any buttons, links, icons, etc with the cursor tip in the center of the element. Don't click boxes on their edges unless asked.", "parameters": {"properties": {"action": {"description": "The action to perform. The available actions are:\n* `key`: Performs key down presses on the arguments passed in order, then performs key releases in reverse order.\n* `type`: Type a string of text on the keyboard.\n* `mouse_move`: Move the cursor to a specified (x, y) pixel coordinate on the screen.\n* `left_click`: Click the left mouse button.\n* `left_click_drag`: Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.\n* `right_click`: Click the right mouse button.\n* `middle_click`: Click the middle mouse button.\n* `double_click`: Double-click the left mouse button.\n* `scroll`: Performs a scroll of the mouse scroll wheel.\n* `wait`: Wait specified seconds for the change to happen.\n\* `terminate`: Terminate the current task and report its completion status.", "enum": ["key", "type", "mouse_move", "left_click", "left_click_drag", "right_click", "middle_click", "double_click", "scroll", "wait", "terminate"], "type": "string"}, "keys": {"description": "Required only by `action=key`.", "type": "array"}, "text": {"description": "Required only by `action=type`.", "type": "string"}, "coordinate": {"description": "(x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates to move the mouse to. Required only by `action=mouse_move` and `action=left_click_drag`.", "type": "array"}, "pixels": {"description": "The amount of scrolling to perform. Positive values scroll up, negative values scroll down. Required only by `action=scroll`.", "type": "number"}, "time": {"description": "The seconds to wait. Required only by `action=wait`.", "type": "number"}, "status": {"description": "The status of the task. Required only by `action=terminate`.", "type": "string", "enum": ["success", "failure"]}}, "required": ["action"], "type": "object"}, "args_format": "Format the arguments as a JSON object."}}
</tools>

For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>"

您可以这样调用 API:

import asyncio
from lybic import LybicClient, dto, ComputerUse
async def main():
    async with LybicClient() as client:
        computer_use = ComputerUse(client)
        actions = await computer_use.parse_llm_output(
                model_type="qwen-2.5-vl",
                llm_output="""<tool_call>
{"name": "computer_use", "arguments": {"action": "double_click", "coordinate": [213, 257]}}
</tool_call>"""
        )
        print(actions)
if __name__ == '__main__':
    asyncio.run(main())
  1. pyautogui:

如果您使用其他定位模型、上面未列出的模型,或符合我们内置 pyautogui 动作处理引擎规则的输出格式,则可以将模型设置为 pyautogui 来解析 pyautogui 动作。

pyautogui 动作解析适用于以下代码块:

     # Text 1
     Text you want to type
     ` ` `python
     pyautogui.typewrite("text")
     ` ` `

执行电脑端使用动作

此接口使 Planner 能够通过 Restful 调用在沙箱上执行动作

  • 方法execute_computer_use_action(sandbox_id: str, data: dto.ComputerUseActionDto)
  • 参数
    • *sandbox_id: str 沙箱 ID
    • *data: class dto.ComputerUseActionDto 要执行的动作
  • 返回:class dto.SandboxActionResponseDto
import asyncio
from lybic import LybicClient, dto, ComputerUse, Sandbox

async def main():
    async with LybicClient() as client:
        computer_use = ComputerUse(client)
        sandbox = Sandbox(client)
        actions = await computer_use.parse_llm_output(
                model_type="ui-tars",
                llm_output="""Thought: The task requires double-left-clicking the "images" folder. In the File Explorer window, the "images" folder is visible under the Desktop directory. The target element is the folder named "images" with a yellow folder icon. Double-left-clicking this folder will open it.

Next action: Left - double - click on the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.
Action: left_double(point='<point>213 257</point>')"""
        )

        response = await sandbox.execute_sandbox_action(
            sandbox_id="SBX-xxxx",
            data=dto.ComputerUseActionDto(action=actions.actions[0])
        )
        print(response)

if __name__ == '__main__':
    asyncio.run(main())

dto.ExecuteSandboxActionDto (dto.ComputerUseActionDto升级)

此类是用于封装智能体可以执行的单个电脑端使用动作的数据传输对象。它指定动作本身和响应的选项。

属性:

  • action (Union[MouseClickAction, MouseDoubleClickAction, ..., FailedAction]):要执行的具体动作。这是所有可能动作类型的联合。
  • includeScreenShot (bool, 可选):如果为 True(默认值),响应将包含动作执行后截取的截图的 URL。
  • includeCursorPosition (bool, 可选):如果为 True(默认值),响应将包含动作后光标的位置。
  • callId (str, 可选):动作调用的唯一标识符。

动作类型:

action 属性可以是以下 Pydantic 模型之一:

  • MouseClickAction:模拟鼠标单击。
  • MouseTripleClickAction: 模拟鼠标三击。
  • MouseDoubleClickAction:模拟鼠标双击。
  • MouseMoveAction:将鼠标光标移动到指定位置。
  • MouseScrollAction:滚动鼠标滚轮。
  • MouseDragAction:模拟从起点到终点拖动鼠标。
  • KeyboardTypeAction:输入文本字符串。
  • KeyDownAction: 模拟按下一个键。
  • KeyUpAction: 模拟释放一个键。
  • KeyboardHotkeyAction:模拟键盘快捷键(例如,Ctrl+C)。
  • ScreenshotAction:获取截图。
  • WaitAction:暂停执行指定的持续时间。
  • FinishedAction:表示任务已成功完成。
  • FailedAction:表示任务已失败。
  • ClientUserTakeoverAction: 表示用户接管控制。

每个动作都有其特定的参数集。例如,MouseClickAction 需要 xy 坐标。

示例:

以下是如何创建和执行 MouseClickAction 的示例。

import asyncio
from lybic import LybicClient, dto, Sandbox

async def main():
    async with LybicClient() as client:
        sandbox = Sandbox(client)

        # 1. 定义动作:在位置 (500, 300) 进行左键点击
        click_action = dto.MouseClickAction(
            type="mouse:click",
            x=dto.PixelLength(type="px", value=500),
            y=dto.PixelLength(type="px", value=300),
            button=1  # 1 表示左键
        )

        # 2. 将动作包装在 ExecuteSandboxActionDto 中
        action_dto = dto.ExecuteSandboxActionDto(
            action=click_action,
            includeScreenShot=True,
            includeCursorPosition=True
        )

        # 3. 在特定沙箱上执行动作
        sandbox_id = "SBX-xxxx"  # 替换为您的沙箱 ID
        response = await sandbox.execute_sandbox_action(
                sandbox_id=sandbox_id,
                data=action_dto
            )

        # 响应将包含截图 URL 和光标位置
        print(response)

        # 输入文本的示例
        type_action = dto.KeyboardTypeAction(
            type="keyboard:type",
            content="Hello, Lybic!"
        )

        action_dto_typing = dto.ExecuteSandboxActionDto(action=type_action)

        response_typing = await sandbox.execute_sandbox_action(
                sandbox_id=sandbox_id,
                data=action_dto_typing
            )
        print(response_typing)

if __name__ == '__main__':
    asyncio.run(main())

移动端使用:

MobileUse 是 Lybic MobileUse API 的客户端,用于在移动端沙箱上解析模型输出执行动作

将定位模型输出解析为移动端动作动作

与 ComputerUse 类似,您可以使用此方法将模型输出解析为移动端动作。

  import asyncio
  from lybic import LybicClient, MobileUse

  async def main():
      async with LybicClient() as client:
          mobile_use = MobileUse(client)
          actions = await mobile_use.parse_llm_output(
                  model_type="ui-tars",
                  llm_output="""Thought: The task requires double-left-clicking the "images" folder. In the File Explorer window, the "images" folder is visible under the Desktop directory. The target element is the folder named "images" with a yellow folder icon. Double-left-clicking this folder will open it.

  Next action: Left - double - click on the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.
  Action: left_double(point='<point>213 257</point>')"""
          )
          print(actions)

  if __name__ == '__main__':
      asyncio.run(main())

动作空间:

from typing import Union

MobileUseAction = Union[
    ScreenshotAction,        # generalActionScreenshotSchema
    WaitAction,              # generalActionWaitSchema
    FinishedAction,          # generalActionFinishedSchema
    FailedAction,            # generalActionFailedSchema
    ClientUserTakeoverAction, # generalActionUserTakeoverSchema

    KeyboardTypeAction, # generalActionKeyboardTypeSchema
    KeyboardHotkeyAction, # generalActionKeyboardHotkeySchema

    TouchTapAction, # mobileUseActionTapSchema
    TouchDragAction,  # mobileUseActionDragSchema
    TouchSwipeAction, # mobileUseActionSwipeSchema
    TouchLongPressAction, # mobileUseActionLongPressSchema
    AndroidBackAction, # mobileUseActionPressBackSchema
    AndroidHomeAction, # mobileUseActionPressHomeSchema

    OsStartAppAction, # mobileUseActionStartAppSchema
    OsStartAppByNameAction, # mobileUseActionStartAppByNameSchema
    OsCloseAppAction, # mobileUseActionCloseAppSchema
    OsListAppsAction, # mobileUseActionListAppsSchema
]

移动端使用和移动端的动作执行

移动端使用没有专用的动作使用接口;它与电脑使用共享新的动作执行接口。

有关更多详细信息,请参阅执行沙箱动作

在本地存储、对象存储和沙箱之间传输文件

sandbox.copy_files 方法提供了一种统一的方式,在沙箱和外部位置(HTTP/S3)之间双向传输文件。它支持多种文件位置类型和批量动作。

method: copy_files(sandbox_id: str, data: dto.SandboxFileCopyRequestDto)copy_files(sandbox_id: str, **kwargs)

  • args:
    • data: dto.SandboxFileCopyRequestDto
    • files: List[dto.FileCopyItem]
    • index: int(用于跟踪每个文件操作的唯一标识符)
    • src: FileLocation(源位置)
    • dest: FileLocation(目标位置)
  • return: dto.SandboxFileCopyResponseDto
  • results: List[dto.FileCopyResult]
  • index: int
  • success: bool
  • error: Optional[str]

支持的文件位置类型:

  • SandboxFileLocation:沙箱内的文件路径
  • HttpPutLocation:HTTP PUT 上传 URL(用于上传文件)
  • HttpGetLocation:HTTP GET 下载 URL(用于下载文件)
  • HttpPostFormLocation:HTTP POST 多部分表单上传(用于需要表单上传的服务)

1. 从本地机器上传文件到沙箱(MinIO 端到端示例)

工作流: 总体过程:上传文件到对象存储 → 生成预签名 GET URL → 沙箱从 URL 下载。

先决条件:

  • 安装 minio SDK:pip install minio
  • 您有一个 MinIO 实例和存储桶(例如 agent-data

完整工作流:

  1. 使用 MinIO SDK 将本地文件上传到 MinIO
  2. 为上传的对象生成预签名 GET URL
  3. 使用 HttpGetLocation(源)和 SandboxFileLocation(目标)调用 Lybic sandbox.copy_files()
  4. 沙箱从 URL 下载文件并保存到指定路径
import asyncio
from datetime import timedelta
from minio import Minio
from lybic import Sandbox, LybicClient, LybicAuth
from lybic.dto import (
   SandboxFileCopyRequestDto,
   FileCopyItem,
   SandboxFileLocation,
   HttpGetLocation
)

# MinIO 配置
MINIO_ENDPOINT = 'play.min.io'  # 替换为您的 MinIO 端点
ACCESS_KEY = 'Q3AM3UQ867SPQQA43P2F'
SECRET_KEY = 'zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG'
USE_SECURE = True
BUCKET = 'agent-data'

# 文件配置
LOCAL_FILE_PATH = './local_input.txt'  # 要上传的本地文件
OBJECT_NAME = 'uploads/input.txt'  # MinIO 中的对象键
SANDBOX_PATH = '/home/agent/input.txt'  # 沙箱中的目标路径

async def upload_file_to_sandbox():
   # 步骤 1:将本地文件上传到 MinIO
   minio_client = Minio(MINIO_ENDPOINT, ACCESS_KEY, SECRET_KEY, secure=USE_SECURE)

   # 确保存储桶存在
   if not minio_client.bucket_exists(BUCKET):
       minio_client.make_bucket(BUCKET)
       print(f"已创建存储桶: {BUCKET}")

   # 将文件上传到 MinIO
   minio_client.fput_object(BUCKET, OBJECT_NAME, LOCAL_FILE_PATH)
   print(f"已将 {LOCAL_FILE_PATH} 上传到 MinIO 作为 {OBJECT_NAME}")

   # 步骤 2:生成预签名 GET URL(有效期为 1 小时)
   presigned_url = minio_client.presigned_get_object(
       BUCKET, OBJECT_NAME, expires=timedelta(minutes=60)
   )
   print(f"已生成预签名 URL: {presigned_url}")

   # 步骤 3:使用 Lybic SDK 将文件从 URL 复制到沙箱
   async with LybicClient(
       LybicAuth(
           org_id='ORG-xxxx',
           api_key='lysk-xxxxxxxxxxx',
           endpoint='https://api.lybic.cn/'
       )
   ) as client:
       sandbox = Sandbox(client)

       response = await sandbox.copy_files(
           'BOX-xxxx',  # 您的沙箱 ID
           SandboxFileCopyRequestDto(files=[
               FileCopyItem(
                   src=HttpGetLocation(url=presigned_url),
                   dest=SandboxFileLocation(path=SANDBOX_PATH)
               )
           ])
       )

       print("复制结果:", response)
       for result in response.results:
           if result.success:
               print(f"✓ 文件已成功复制到沙箱 (index: {result.id})")
           else:
               print(f"✗ 复制文件失败 (index: {result.id}): {result.error}")

if __name__ == '__main__':
   asyncio.run(upload_file_to_sandbox())

2. 从沙箱下载文件到本地机器(MinIO 端到端示例)

工作流:

完整工作流:

  1. 使用 MinIO SDK 生成预签名 PUT URL
  2. 使用 SandboxFileLocation(源)和 HttpPutLocation(目标)调用 Lybic sandbox.copy_files()
  3. 沙箱将其本地文件上传到预签名 URL
  4. 从 MinIO 下载文件到您的本地机器
import asyncio
from datetime import timedelta
from minio import Minio
from lybic import Sandbox, LybicClient, LybicAuth
from lybic.dto import (
   SandboxFileCopyRequestDto,
   FileCopyItem,
   SandboxFileLocation,
   HttpPutLocation
)

# MinIO 配置
MINIO_ENDPOINT = 'play.min.io'
ACCESS_KEY = 'Q3AM3UQ867SPQQA43P2F'
SECRET_KEY = 'zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG'
USE_SECURE = True
BUCKET = 'agent-data'

# 文件配置
SANDBOX_FILE_PATH = '/home/agent/data/output.txt'  # 沙箱中的文件路径
OBJECT_NAME = 'downloads/output.txt'  # MinIO 中的目标对象键
LOCAL_DOWNLOAD_PATH = './downloaded_output.txt'  # 本地目标

async def download_file_from_sandbox():
   minio_client = Minio(MINIO_ENDPOINT, ACCESS_KEY, SECRET_KEY, secure=USE_SECURE)

   # 确保存储桶存在
   if not minio_client.bucket_exists(BUCKET):
       minio_client.make_bucket(BUCKET)
       print(f"已创建存储桶: {BUCKET}")

   # 步骤 1:生成预签名 PUT URL(有效期为 1 小时)
   presigned_put_url = minio_client.presigned_put_object(
       BUCKET, OBJECT_NAME, expires=timedelta(minutes=60)
   )
   print(f"已生成预签名 PUT URL: {presigned_put_url}")

   # 步骤 2:使用 Lybic SDK 将文件从沙箱复制到 URL
   async with LybicClient(
       LybicAuth(
           org_id='ORG-xxxx',
           api_key='lysk-xxxxxxxxxxx',
           endpoint='https://api.lybic.cn/'
       )
   ) as client:
       sandbox = Sandbox(client)

       response = await sandbox.copy_files(
           'BOX-xxxx',  # 您的沙箱 ID
           SandboxFileCopyRequestDto(files=[
               FileCopyItem(
                   src=SandboxFileLocation(path=SANDBOX_FILE_PATH),
                   dest=HttpPutLocation(url=presigned_put_url)
               )
           ])
       )

       print("复制结果:", response)
       for result in response.results:
           if result.success:
               print(f"✓ 文件已成功从沙箱复制 (index: {result.id})")
           else:
               print(f"✗ 复制文件失败 (index: {result.id}): {result.error}")

   # 步骤 3:从 MinIO 下载文件到本地机器
   minio_client.fget_object(BUCKET, OBJECT_NAME, LOCAL_DOWNLOAD_PATH)
   print(f"已从 MinIO 下载文件到 {LOCAL_DOWNLOAD_PATH}")

   # 步骤 4:验证文件
   with open(LOCAL_DOWNLOAD_PATH, 'r') as f:
       content = f.read()
       print(f"文件内容预览: {content[:100]}...")

if __name__ == '__main__':
   asyncio.run(download_file_from_sandbox())

3. 批量复制多个文件

在单个请求中复制多个文件(混合方向):

from lybic.dto import (
   SandboxFileCopyRequestDto,
   FileCopyItem,
   SandboxFileLocation,
   HttpGetLocation,
   HttpPutLocation
)

# 复制多个文件:一些从外部到沙箱,一些从沙箱到外部
response = await sandbox.copy_files(
   'SBX-xxxx',
   SandboxFileCopyRequestDto(files=[
       # 从 URL 下载到沙箱
       FileCopyItem(
           src=HttpGetLocation(url='https://example.com/file1.txt'),
           dest=SandboxFileLocation(path='/home/agent/file1.txt')
       ),
       # 从沙箱上传到 URL
       FileCopyItem(
           src=SandboxFileLocation(path='/home/agent/output.log'),
           dest=HttpPutLocation(url='https://s3.example.com/output.log')
       ),
       # 另一个下载
       FileCopyItem(
           src=HttpGetLocation(url='https://example.com/file2.txt'),
           dest=SandboxFileLocation(path='/home/agent/file2.txt')
       )
   ])
)

# 按索引检查结果
for result in response.results:
   if result.success:
       print(f"✓ 文件 {result.id} 复制成功")
   else:
       print(f"✗ 文件 {result.id} 失败: {result.error}")

4. 使用 HTTP POST 多部分表单上传

对于需要多部分表单上传的服务(例如,某些 AWS S3 预签名 POST 策略):

from minio import Minio, PostPolicy
from datetime import datetime, timedelta
from lybic.dto import (
   SandboxFileCopyRequestDto,
   FileCopyItem,
   SandboxFileLocation,
   HttpPostFormLocation
)

# 使用 MinIO 生成 POST 策略
minio_client = Minio(MINIO_ENDPOINT, ACCESS_KEY, SECRET_KEY, secure=USE_SECURE)

policy = PostPolicy()
policy.set_bucket(BUCKET)
policy.set_key('uploads/report.pdf')
policy.set_expires(datetime.now(datetime.UTC) + timedelta(hours=1))

form_data = minio_client.presigned_post_policy(policy)

# 使用 POST 表单上传
response = await sandbox.copy_files(
   'BOX-xxxx',
   SandboxFileCopyRequestDto(files=[
       FileCopyItem(
           src=SandboxFileLocation(path='/home/agent/report.pdf'),
           dest=HttpPostFormLocation(
               url=form_data['url'],
               form={k: v for k, v in form_data.items() if k != 'url'},
               fileField='file'  # 文件的表单字段名称
           )
       )
   ])
)

5. 使用自定义请求头

添加自定义请求头,用于认证或其他用途:

from lybic.dto import HttpPutLocation, HttpGetLocation

# 使用自定义请求头的 GET(例如,身份验证)
response = await sandbox.copy_files(
   'SBX-xxxx',
   SandboxFileCopyRequestDto(files=[
       FileCopyItem(
           src=HttpGetLocation(
               url='https://api.example.com/files/data.json',
               headers={
                   'Authorization': 'Bearer YOUR_TOKEN',
                   'X-Custom-Header': 'value'
               }
           ),
           dest=SandboxFileLocation(path='/home/agent/data.json')
       )
   ])
)

# 使用自定义请求头的 PUT
response = await sandbox.copy_files(
   'SBX-xxxx',
   SandboxFileCopyRequestDto(files=[
       FileCopyItem(
           src=SandboxFileLocation(path='/home/agent/result.json'),
           dest=HttpPutLocation(
               url='https://storage.example.com/uploads/result.json',
               headers={
                   'Content-Type': 'application/json',
                   'X-Upload-Id': 'unique-id'
               }
           )
       )
   ])
)

在沙箱内执行进程

运行带参数的可执行文件;捕获 stdout/stderr(base64 编码)和退出代码。

method: sandbox.execute_process(sandbox_id: str, data: dto.SandboxProcessRequestDto)execute_process(sandbox_id: str, executable=..., ...)

  • args:
    • executable: str(沙箱中的绝对路径或可解析路径,例如 /usr/bin/python3
    • args: List[str]
    • workingDirectory: Optional[str]
    • stdinBase64: Optional[str](要输入到 stdin 的 base64 编码字节)
  • return: dto.SandboxProcessResponseDto { stdoutBase64, stderrBase64, exitCode }
import asyncio
import base64
from lybic import dto, Sandbox, LybicClient, LybicAuth

async def run_process_example():
  async with LybicClient(LybicAuth(org_id='ORG-xxxx', api_key='lysk-xxxxxxxxxxx')) as client:
      sandbox = Sandbox(client)

      # 示例 1:简单命令
      result = await sandbox.execute_process(
          'SBX-xxxx',
          executable='/bin/echo',
          args=['Hello', 'World']
      )
      print(f"退出代码: {result.exitCode}")
      stdout = base64.b64decode(result.stdoutBase64 or '').decode(errors='ignore')
      print(f"输出: {stdout}")

      # 示例 2:带 stdin 的 Python 脚本
      stdin_data = base64.b64encode(b"print('Hello from stdin')\n").decode()
      proc_req = dto.SandboxProcessRequestDto(
          executable='/usr/bin/python3',
          args=['-c', 'import sys; exec(sys.stdin.read())'],
          workingDirectory='/home/agent',
          stdinBase64=stdin_data
      )
      result = await sandbox.execute_process('SBX-xxxx', data=proc_req)
      print(f"退出: {result.exitCode}")
      print(f"STDOUT: {base64.b64decode(result.stdoutBase64 or '').decode(errors='ignore')}")
      print(f"STDERR: {base64.b64decode(result.stderrBase64 or '').decode(errors='ignore')}")

if __name__ == '__main__':
  asyncio.run(run_process_example())

有关 SDK 的更多 API,请参阅 GithubRepo

本页内容