Usage
Lybic Python SDK Usage Guide
The Lybic Python SDK provides several classes to interact with the Lybic API.
Adapt LLM output pyautogui format
To facilitate the execution of GUI automation scripts generated by Large Language Models (LLMs), which are often trained using the popular pyautogui library, the Lybic SDK provides a Pyautogui compatibility class. This class mirrors the pyautogui interface, allowing you to execute LLM-generated code with minimal changes.
Usage
First, initialize the LybicClient and then create a Pyautogui instance, binding it to a specific sandbox. You can then use this instance as if it were the pyautogui module.
import asyncio
from lybic import LybicClient, Pyautogui
async def main():
async with LybicClient() as client:
# Assume you have a sandbox
sandbox_id = "your_sandbox_id"
# Create a Pyautogui instance
pyautogui = Pyautogui(client, sandbox_id)
# Now you can execute pyautogui-style commands
# For example, if an LLM outputs the following string:
llm_output = "pyautogui.moveTo(100, 150)"
# You can execute it like this:
# Warning: Using eval() on untrusted input is a security risk.
# Always sanitize and validate LLM output.
eval(llm_output)
# Or call methods directly
pyautogui.click(x=200, y=200)
pyautogui.write("Hello from Lybic!")
pyautogui.press("enter")
if __name__ == "__main__":
asyncio.run(main())Special scenario: If your script runs in synchronous mode
import asyncio
from lybic import LybicClient, Pyautogui
sandbox_id = "your_sandbox_id"
llm_output = "pyautogui.moveTo(100, 150)"
client = LybicClient()
pyautogui = Pyautogui(client, sandbox_id)
# Warning: Using eval() on untrusted input is a security risk.
# Always sanitize and validate LLM output.
eval(llm_output)
# Recommendation: You need to manually manage object lifecycles
pyautogui.close()
asyncio.run(client.close())Supported Functions
The lybic.Pyautogui class supports a subset of the most common pyautogui functions.
| Function | Supported | Notes |
|---|---|---|
position() | ✅ | |
moveTo() | ✅ | |
move() | ✅ | |
click() | ✅ | |
rightClick() | ✅ | |
middleClick() | ✅ | |
doubleClick() | ✅ | |
tripleClick() | ✅ | |
dragTo() | ✅ | Only supports left-button dragging. |
scroll() | ✅ | |
write() | ✅ | A wrapper for typewrite(). |
typewrite() | ✅ | Supports both strings and lists of strings. |
press() | ✅ | Supports single key and list of keys. |
hotkey() | ✅ | |
keyDown() | ✅ | |
keyUp() | ✅ |
Organization Stats
Stats is a class for describing the stats of the organization.
Get Organization Stats
- Method:
get() - Arguments: None
- Returns:
dto.StatsResponseDto
import asyncio
from lybic import LybicClient, Stats
async def main():
async with LybicClient() as client:
stats = Stats(client)
result = await stats.get()
print(result)
if __name__ == '__main__':
asyncio.run(main())Example Output:
mcpServers=3 sandboxes=8 projects=4Lybic Project for sandbox management
Project is a class for describing the project and used to manage the sandbox of the project.
List All Projects
- Method:
list() - Arguments: None
- Returns:
list[dto.ProjectResponseDto]
import asyncio
from lybic import LybicClient, Project
async def main():
async with LybicClient() as client:
project = Project(client)
list_result = await project.list()
for p in list_result:
print(p)
if __name__ == '__main__':
asyncio.run(main())Example Output:
id='PRJ-xxxx' name='test_project' createdAt='2025-07-10T08:03:36.375Z' defaultProject=False
id='PRJ-xxxx' name='Default Project' createdAt='2025-07-08T16:42:30.226Z' defaultProject=TrueCreate a Project
- Method:
create(data: dto.CreateProjectDto) - Arguments:
name(str): Project name.
- Returns:
dto.SingleProjectResponseDto
import asyncio
from lybic import LybicClient, Project
async def main():
async with LybicClient() as client:
project = Project(client)
new_project = await project.create(name="test_project")
print(new_project)
if __name__ == '__main__':
asyncio.run(main())Delete a Project
- Method:
delete(project_id: str) - Arguments:
project_id(str): ID of the project to delete.
- Returns:
None
import asyncio
from lybic import LybicClient, Project
async def main():
async with LybicClient() as client:
project = Project(client)
await project.delete(project_id="PRJ-xxxx")
if __name__ == '__main__':
asyncio.run(main())Sandbox Management
Sandbox provides methods to manage and interact with sandboxes.
List All Sandboxes
- Method:
list() - Arguments: None
- Returns:
list[dto.SandboxResponseDto]
import asyncio
from lybic import LybicClient, Sandbox
async def main():
async with LybicClient() as client:
sandbox = Sandbox(client)
sandboxes = await sandbox.list()
for s in sandboxes:
print(s)
if __name__ == '__main__':
asyncio.run(main())Create a New Sandbox
- Method:
create(data: dto.CreateSandboxDto) - Arguments:
shape(str,required): Sandbox shape.name(str, optional): Name for the sandbox.maxLifeSeconds(int, optional): Lifetime in seconds (default: 3600).projectId(str, optional): Project ID.
- Returns:
dto.Sandbox
import asyncio
from lybic import LybicClient, Sandbox
async def main():
async with LybicClient() as client:
sandbox = Sandbox(client)
new_sandbox = await sandbox.create(name="my-sandbox",shape="standard-4c8g")
print(new_sandbox)
if __name__ == '__main__':
asyncio.run(main())Get a Specific Sandbox
- Method:
get(sandbox_id: str) - Arguments:
sandbox_id(str): ID of the sandbox.
- Returns:
dto.GetSandboxResponseDto
import asyncio
from lybic import LybicClient, Sandbox
async def main():
async with LybicClient() as client:
sandbox = Sandbox(client)
details = await sandbox.get(sandbox_id="SBX-xxxx")
print(details)
if __name__ == '__main__':
asyncio.run(main())Delete a Sandbox
- Method:
delete(sandbox_id: str) - Arguments:
sandbox_id(str): ID of the sandbox to delete.
- Returns:
None
import asyncio
from lybic import LybicClient, Sandbox
async def main():
async with LybicClient() as client:
sandbox = Sandbox(client)
await sandbox.delete(sandbox_id="SBX-xxxx")
if __name__ == '__main__':
asyncio.run(main())Get a Sandbox Screenshot
- Method:
get_screenshot(sandbox_id: str) - Arguments:
sandbox_id(str): ID of the sandbox.
- Returns:
tuple(screenshot_url, PIL.Image.Image, webp_image_base64_string)
import asyncio
from lybic import LybicClient, Sandbox
async def main():
async with LybicClient() as client:
sandbox = Sandbox(client)
url, image, b64_str = await sandbox.get_screenshot(sandbox_id="SBX-xxxx")
print(f"Screenshot URL: {url}")
image.show()
if __name__ == '__main__':
asyncio.run(main())Execute sandbox action
This interface enables Planner to perform actions on the sandbox through Restful calls. It supports both computer use and mobile use actions.
- Method:
execute_sandbox_action(sandbox_id: str, data: dto.ExecuteSandboxActionDto)orexecute_sandbox_action(sandbox_id: str, **kwargs) - Arguments:
- *sandbox_id: str ID of the sandbox
- *data: class dto.ExecuteSandboxActionDto The action to execute
- Returns: class dto.SandboxActionResponseDto
import asyncio
from lybic import dto, Sandbox, ComputerUse, LybicClient, LybicAuth
async def main():
async with LybicClient(
LybicAuth(
org_id="ORG-xxxx",
api_key="lysk-xxxxxxxxxxx",
endpoint="https://api.lybic.cn/",
)
) as client:
computer_use = ComputerUse(client)
parsed_result = await computer_use.parse_llm_output(
model_type="ui-tars",
llm_output="""Thought: The task requires double-left-clicking the "images" folder. In the File Explorer window, the "images" folder is visible under the Desktop directory. The target element is the folder named "images" with a yellow folder icon. Double-left-clicking this folder will open it.
Next action: Left - double - click on the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.
Action: left_double(point='<point>213 257</point>')"""
)
actions = parsed_result.actions
if actions:
sandbox = Sandbox(client)
# Using DTO
response = await sandbox.execute_sandbox_action(
sandbox_id="SBX-xxxx",
data=dto.ExecuteSandboxActionDto(action=actions[0])
)
print(response)
# Using keyword arguments
response_2 = await sandbox.execute_sandbox_action(
sandbox_id="SBX-xxxx",
action=actions[0]
)
print(response_2)
if __name__ == "__main__":
asyncio.run(main())Computer-Use
ComputerUse is a client for the Lybic ComputerUse API, used for parsing model outputs and executing actions.
Parse grounding model output into computer action
support ui-tars,seed,glm-4.1v,glm-4.5v,qwen-2.5-vl and pyautogui.
For relative coordinate systems such as ui-tars, please use
ui-tarsFor absolute coordinate systems, such as doubao-1.6-seed, openCUA, please use
seedFor the others, please select
modelaccording to the model type or output format.
if you want to parse the model output, you can use this method.
- Method:
parse_llm_output(model_type: ModelType|str, llm_output: str) - Arguments:
- *model_type: ModelType|str The model to use (e.g., "ui-tars")
- *llm_output: str The text content to parse
- Returns: class dto.ComputerUseActionResponseDto
Examples:
- ui-tars / seed: if the model you use is "ui-tars" or "seed", and its prompts like this:
You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
## Output Format
Thought: ...
Action: ...
## Action Space
click(point='<point>x1 y1</point>')
left_double(point='<point>x1 y1</point>')
right_single(point='<point>x1 y1</point>')
drag(start_point='<point>x1 y1</point>', end_point='<point>x2 y2</point>')
hotkey(key='ctrl c') # Split keys with a space and use lowercase. Also, do not use more than 3 keys in one hotkey action.
type(content='xxx') # Use escape characters \', \", and \n in content part to ensure we can parse the content in normal python string format. If you want to submit your input, use \n at the end of content.
scroll(point='<point>x1 y1</point>', direction='down or up or right or left') # Show more information on the `direction` side.
wait() #Sleep for 5s and take a screenshot to check for any changes.
finished(content='xxx') # Use escape characters \', \", and \n in content part to ensure we can parse the content in normal python string format.
## Note
- Use {language} in `Thought` part.
- Write a small plan and finally summarize your next action (with its target element) in one sentence in `Thought` part.
## User Instruction
{instruction}The model output like this:
Thought: The task requires double-left-clicking the "images" folder. In the File Explorer window, the "images" folder is visible under the Desktop directory. The target element is the folder named "images" with a yellow folder icon. Double-left-clicking this folder will open it.
Next action: Left - double - click on the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.
Action: left_double(point='<point>213 257</point>')This api will parse this model output format and return a list of computer use actions.
import asyncio
from lybic import LybicClient, dto, ComputerUse
async def main():
async with LybicClient() as client:
computer_use = ComputerUse(client)
actions = await computer_use.parse_llm_output(
model_type="ui-tars",
llm_output="""Thought: The task requires double-left-clicking the "images" folder. In the File Explorer window, the "images" folder is visible under the Desktop directory. The target element is the folder named "images" with a yellow folder icon. Double-left-clicking this folder will open it.
Next action: Left - double - click on the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.
Action: left_double(point='<point>213 257</point>')"""
)
print(actions)
if __name__ == '__main__':
asyncio.run(main())It will out put something like this:(an action list object,and length is 1)
actions=[MouseDoubleClickAction(type='mouse:doubleClick', x=FractionalLength(type='/', numerator=213, denominator=1000), y=FractionalLength(type='/', numerator=257, denominator=1000), button=1)]- GLM-4.1v: if the model you use is "glm-4.1v", and its prompts like this:
You are a GUI operation agent. You will be given a task and your action history, with recent screenshots. You should help me control the computer, output the best action step by step to accomplish the task.
The actions you output must be in the following action space:
left_click(start_box='[x,y]', element_info='')
# left single click at [x,y]
right_click(start_box='[x,y]', element_info='')
# right single click at [x,y]
middle_click(start_box='[x,y]', element_info='')
# middle single click at [x,y]
hover(start_box='[x,y]', element_info='')
# hover the mouse at [x,y]
left_double_click(start_box='[x,y]', element_info='')
# left double click at [x,y]
left_drag(start_box='[x1,y1]', end_box='[x2,y2]', element_info='')
# left drag from [x1,y1] to [x2,y2]
key(keys='')
# press a single key or a key combination/shortcut, if it's a key combination, you should use '+' to connect the keys like key(key='ctrl+c')
type(content='')
# type text into the current active element, it performs a copy&paste operation, so *you must click at the target element first to active it before typing something in*, if you want to overwrite the content, you should clear the content before type something in.
scroll(start_box='[x,y]', direction='down/up', step=k, element_info='')
# scroll the page at [x,y] to the specified direction for k clicks of the mouse wheel
WAIT()
# sleep for 5 seconds
DONE()
# output when the task is fully completed
FAIL()
# output when the task can not be performed at all
The output rules are as follows:
1. The start/end box parameter of the action should be in the format of [x, y] normalized to 0-1000, which usually should be the bounding box of a specific target element.
2. The element_info parameter is optional, it should be a string that describes the element you want to operate with, you should fill this parameter when you're sure about what the target element is.
3. Take actions step by step. *NEVER output multiple actions at once*.
4. If there are previous actions that you have already performed, I'll provide you history actions and at most 4 shrunked(to 50%*50%) screenshots showing the state before your last 4 actions. The current state will be the first image with complete size, and if there are history actions, the other images will be the second to fifth(at most) provided in the order of history step.
5. You should put the key information you *have to remember* in a separated memory part and I'll give it to you in the next round. The content in this part should be a JSON list. If you no longer need some given information, you should remove it from the memory. Even if you don't need to remember anything, you should also output an empty <memory></memory> part.
6. You can choose to give me a brief explanation before you start to take actions.
Output Format:
Plain text explanation with action(param='...')
Memory:
[{{"user_email": "x@gmail.com", ...}}]
Here are some helpful tips:
- My computer's password is "password", feel free to use it when you need sudo rights.
- For the thunderbird account "anonym-x2024@outlook.com", the password is "gTCI";=@y7|QJ0nDa_kN3Sb&>".
- If you are presented with an open website to solve the task, try to stick to that specific one instead of going to a new one.
- You have full authority to execute any action without my permission. I won't be watching so please don't ask for confirmation.
Now Please help me to solve the following task:
#TASK#
#HISTORY_WITH_MEMORY#You can call the api like this:
import asyncio
from lybic import LybicClient, dto, ComputerUse
async def main():
async with LybicClient() as client:
computer_use = ComputerUse(client)
actions = await computer_use.parse_llm_output(
model_type="glm-4.1v",
llm_output="""Action: left_double_click(start_box='[213,257]', element_info='the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.')"""
)
print(actions)
if __name__ == '__main__':
asyncio.run(main())- GLM-4.5v: if the model you use is "glm-4.5-vl", and its prompts like this:
You are a GUI Agent, and your primary task is to respond accurately to user requests or questions. In addition to directly answering the user's queries, you can also use tools or perform GUI operations directly until you fulfill the user's request or provide a correct answer. You should carefully read and understand the images and questions provided by the user, and engage in thinking and reflection when appropriate. The coordinates involved are all represented in thousandths (0-999).
# Task:
{task}
# Task Platform
Windows
# Action Space
### {left,right,middle}_click
Call rule: `{left,right,middle}_click(start_box='[x,y]', element_info='')`
{
'name': ['left_click', 'right_click', 'middle_click'],
'description': 'Perform a left/right/middle mouse click at the specified coordinates on the screen.',
'parameters': {
'type': 'object',
'properties': {
'start_box': {
'type': 'array',
'items': {
'type': 'integer'
},
'description': 'Coordinates [x,y] where to perform the click, normalized to 0-999 range.'
},
'element_info': {
'type': 'string',
'description': 'Optional text description of the UI element being clicked.'
}
},
'required': ['start_box']
}
}
### hover
Call rule: `hover(start_box='[x,y]', element_info='')`
{
'name': 'hover',
'description': 'Move the mouse pointer to the specified coordinates without performing any click action.',
'parameters': {
'type': 'object',
'properties': {
'start_box': {
'type': 'array',
'items': {
'type': 'integer'
},
'description': 'Coordinates [x,y] where to move the mouse pointer, normalized to 0-999 range.'
},
'element_info': {
'type': 'string',
'description': 'Optional text description of the UI element being hovered over.'
}
},
'required': ['start_box']
}
}
### left_double_click
Call rule: `left_double_click(start_box='[x,y]', element_info='')`
{
'name': 'left_double_click',
'description': 'Perform a left mouse double-click at the specified coordinates on the screen.',
'parameters': {
'type': 'object',
'properties': {
'start_box': {
'type': 'array',
'items': {
'type': 'integer'
},
'description': 'Coordinates [x,y] where to perform the double-click, normalized to 0-999 range.'
},
'element_info': {
'type': 'string',
'description': 'Optional text description of the UI element being double-clicked.'
}
},
'required': ['start_box']
}
}
### left_drag
Call rule: `left_drag(start_box='[x1,y1]', end_box='[x2,y2]', element_info='')`
{
'name': 'left_drag',
'description': 'Drag the mouse from starting coordinates to ending coordinates while holding the left mouse button.',
'parameters': {
'type': 'object',
'properties': {
'start_box': {
'type': 'array',
'items': {
'type': 'integer'
},
'description': 'Starting coordinates [x1,y1] for the drag operation, normalized to 0-999 range.'
},
'end_box': {
'type': 'array',
'items': {
'type': 'integer'
},
'description': 'Ending coordinates [x2,y2] for the drag operation, normalized to 0-999 range.'
},
'element_info': {
'type': 'string',
'description': 'Optional text description of the UI element being dragged.'
}
},
'required': ['start_box', 'end_box']
}
}
### key
Call rule: `key(keys='')`
{
'name': 'key',
'description': 'Simulate pressing a single key or combination of keys on the keyboard.',
'parameters': {
'type': 'object',
'properties': {
'keys': {
'type': 'string',
'description': 'The key or key combination to press. Use '+' to separate keys in combinations (e.g., 'ctrl+c', 'alt+tab').'
}
},
'required': ['keys']
}
}
### type
Call rule: `type(content='')`
{
'name': 'type',
'description': 'Type text content into the currently focused text input field. This action only performs typing and does not handle field activation or clearing.',
'parameters': {
'type': 'object',
'properties': {
'content': {
'type': 'string',
'description': 'The text content to be typed into the active text field.'
}
},
'required': ['content']
}
}
### scroll
Call rule: `scroll(start_box='[x,y]', direction='', step=5, element_info='')`
{
'name': 'scroll',
'description': 'Scroll an element at the specified coordinates in the specified direction by a given number of wheel steps.',
'parameters': {
'type': 'object',
'properties': {
'start_box': {
'type': 'array',
'items': {
'type': 'integer'
},
'description': 'Coordinates [x,y] of the element or area to scroll, normalized to 0-999 range.'
},
'direction': {
'type': 'string',
'enum': ['down', 'up'],
'description': 'The direction to scroll: 'down' or 'up'.'
},
'step': {
'type': 'integer',
'default': 5,
'description': 'Number of wheel steps to scroll, default is 5.'
},
'element_info': {
'type': 'string',
'description': 'Optional text description of the UI element being scrolled.'
}
},
'required': ['start_box', 'direction']
}
}
### WAIT
Call rule: `WAIT()`
{
'name': 'WAIT',
'description': 'Wait for 5 seconds before proceeding to the next action.',
'parameters': {
'type': 'object',
'properties': {},
'required': []
}
}
### DONE
Call rule: `DONE()`
{
'name': 'DONE',
'description': 'Indicate that the current task has been completed successfully and no further actions are needed.',
'parameters': {
'type': 'object',
'properties': {},
'required': []
}
}
### FAIL
Call rule: `FAIL()`
{
'name': 'FAIL',
'description': 'Indicate that the current task cannot be completed or is impossible to accomplish.',
'parameters': {
'type': 'object',
'properties': {},
'required': []
}
}
# Historical Actions and Current Memory
History:
Thought: {bot_thought}
Action: {action}
step {step_k+1}: Screenshot:
Memory:
{memory}
# Output Format
Plain text explanation with action(param='...')
Memory:
[{{"key": "value"}}, ...]
# Some Additional Notes
- I'll give you the most recent 4 history screenshots(shrunked to 50%*50%) along with the historical action steps.
- You should put the key information you *have to remember* in a seperated memory part and I'll give it to you in the next round. The content in this part should be a dict list. If you no longer need some given information, you should remove it from the memory. Even if you don't need to remember anything, you should also output an empty list.
- My computer's password is "password", feel free to use it when you need sudo rights.
- For the thunderbird account "anonym-x2024@outlook.com", the password is "gTCI";=@y7|QJ0nDa_kN3Sb&>".
Current Screenshot:You can call the api like this:
import asyncio
from lybic import LybicClient, dto, ComputerUse
async def main():
async with LybicClient() as client:
computer_use = ComputerUse(client)
actions = await computer_use.parse_llm_output(
model_type="glm-4.5-vl",
llm_output="""Action: left_double_click(start_box='[213,257]', element_info
='the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.')"""
)
print(actions)
if __name__ == '__main__':
asyncio.run(main())- qwen-2.5-vl: if the model you use is "qwen-2.5-vl", and its prompts like this:
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name_for_human": "computer_use", "name": "computer_use", "description": "Use a mouse and keyboard to interact with a computer, and take screenshots.\n* This is an interface to a desktop GUI. You do not have access to a terminal or applications menu. You must click on desktop icons to start applications.\n* Some applications may take time to start or process actions, so you may need to wait and take successive screenshots to see the results of your actions. E.g. if you click on Firefox and a window doesn't open, try wait and taking another screenshot.\n* The screen's resolution is 1280x720.\n* Whenever you intend to move the cursor to click on an element like an icon, you should consult a screenshot to determine the coordinates of the element before moving the cursor.\n* If you tried clicking on a program or link but it failed to load, even after waiting, try adjusting your cursor position so that the tip of the cursor visually falls on the element that you want to click.\n* Make sure to click any buttons, links, icons, etc with the cursor tip in the center of the element. Don't click boxes on their edges unless asked.", "parameters": {"properties": {"action": {"description": "The action to perform. The available actions are:\n* `key`: Performs key down presses on the arguments passed in order, then performs key releases in reverse order.\n* `type`: Type a string of text on the keyboard.\n* `mouse_move`: Move the cursor to a specified (x, y) pixel coordinate on the screen.\n* `left_click`: Click the left mouse button.\n* `left_click_drag`: Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.\n* `right_click`: Click the right mouse button.\n* `middle_click`: Click the middle mouse button.\n* `double_click`: Double-click the left mouse button.\n* `scroll`: Performs a scroll of the mouse scroll wheel.\n* `wait`: Wait specified seconds for the change to happen.\n* `terminate`: Terminate the current task and report its completion status.", "enum": ["key", "type", "mouse_move", "left_click", "left_click_drag", "right_click", "middle_click", "double_click", "scroll", "wait", "terminate"], "type": "string"}, "keys": {"description": "Required only by `action=key`.", "type": "array"}, "text": {"description": "Required only by `action=type`.", "type": "string"}, "coordinate": {"description": "(x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates to move the mouse to. Required only by `action=mouse_move` and `action=left_click_drag`.", "type": "array"}, "pixels": {"description": "The amount of scrolling to perform. Positive values scroll up, negative values scroll down. Required only by `action=scroll`.", "type": "number"}, "time": {"description": "The seconds to wait. Required only by `action=wait`.", "type": "number"}, "status": {"description": "The status of the task. Required only by `action=terminate`.", "type": "string", "enum": ["success", "failure"]}}, "required": ["action"], "type": "object"}, "args_format": "Format the arguments as a JSON object."}}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call>"You can call the api like this:
import asyncio
from lybic import LybicClient, dto, ComputerUse
async def main():
async with LybicClient() as client:
computer_use = ComputerUse(client)
actions = await computer_use.parse_llm_output(
model_type="qwen-2.5-vl",
llm_output="""<tool_call>
{"name": "computer_use", "arguments": {"action": "double_click", "coordinate": [213, 257]}}
</tool_call>"""
)
print(actions)
if __name__ == '__main__':
asyncio.run(main())- pyautogui:
If you are using other grounding models, models not listed above, or output formats that conform to the rules of our built-in pyautogui action processing engine, then you can set the model to pyautogui to parse pyautogui actions.
pyautogui action parsing is suitable for the following code blocks:
# Text 1
Text you want to type
` ` `python
pyautogui.typewrite("text")
` ` `Execute a computer use action
This interface enables Planner to perform actions on the sandbox through Restful calls
- Method:
execute_computer_use_action(sandbox_id: str, data: dto.ComputerUseActionDto) - Arguments:
- *sandbox_id: str ID of the sandbox
- *data: class dto.ComputerUseActionDto The action to execute
- Returns: class dto.SandboxActionResponseDto
import asyncio
from lybic import LybicClient, dto, ComputerUse, Sandbox
async def main():
async with LybicClient() as client:
computer_use = ComputerUse(client)
sandbox = Sandbox(client)
actions = await computer_use.parse_llm_output(
model_type="ui-tars",
llm_output="""Thought: The task requires double-left-clicking the "images" folder. In the File Explorer window, the "images" folder is visible under the Desktop directory. The target element is the folder named "images" with a yellow folder icon. Double-left-clicking this folder will open it.
Next action: Left - double - click on the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.
Action: left_double(point='<point>213 257</point>')"""
)
response = await sandbox.execute_sandbox_action(
sandbox_id="SBX-xxxx",
data=dto.ComputerUseActionDto(action=actions.actions[0])
)
print(response)
if __name__ == '__main__':
asyncio.run(main())dto.ExecuteSandboxActionDto (dto.ComputerUseActionDto upgrade)
This class is a data transfer object used to encapsulate a single computer use action that the agent can execute. It specifies the action itself and options for the response.
Attributes:
action(Union[MouseClickAction, MouseDoubleClickAction, ..., FailedAction]): The specific action to be performed. This is a union of all possible action types.includeScreenShot(bool, optional): IfTrue(default), the response will include a URL to a screenshot taken after the action is executed.includeCursorPosition(bool, optional): IfTrue(default), the response will include the cursor's position after the action.callId(str, optional): A unique identifier for the action call.
Action Types:
The action attribute can be one of the following Pydantic models:
MouseClickAction: Simulates a single mouse click.MouseTripleClickAction: Simulates a triple click.MouseDoubleClickAction: Simulates a double-click.MouseMoveAction: Moves the mouse cursor to a specified position.MouseScrollAction: Scrolls the mouse wheel.MouseDragAction: Simulates dragging the mouse from a start to an end point.KeyboardTypeAction: Types a string of text.KeyDownAction: Simulates pressing a key.KeyUpAction: Simulates releasing a key.KeyboardHotkeyAction: Simulates a keyboard shortcut (e.g., Ctrl+C).ScreenshotAction: Takes a screenshot.WaitAction: Pauses execution for a specified duration.FinishedAction: Signals that the task is successfully completed.FailedAction: Signals that the task has failed.ClientUserTakeoverAction: Represents the user taking over control.
Each action has its own specific set of parameters. For example, a MouseClickAction requires x and y coordinates.
Example:
import asyncio
from lybic import LybicClient, dto, Sandbox
async def main():
async with LybicClient() as client:
sandbox = Sandbox(client)
# 1. Define the action: a left-click at position (500, 300)
click_action = dto.MouseClickAction(
type="mouse:click",
x=dto.PixelLength(type="px", value=500),
y=dto.PixelLength(type="px", value=300),
button=1 # 1 for left button
)
# 2. Wrap the action in ExecuteSandboxActionDto
action_dto = dto.ExecuteSandboxActionDto(
action=click_action,
includeScreenShot=True,
includeCursorPosition=True
)
# 3. Execute the action on a specific sandbox
sandbox_id = "SBX-xxxx" # Replace with your sandbox ID
response = await sandbox.execute_sandbox_action(
sandbox_id=sandbox_id,
data=action_dto
)
# The response will contain the screenshot URL and cursor position
print(response)
# Example for typing text
type_action = dto.KeyboardTypeAction(
type="keyboard:type",
content="Hello, Lybic!"
)
action_dto_typing = dto.ExecuteSandboxActionDto(action=type_action)
response_typing = await sandbox.execute_sandbox_action(
sandbox_id=sandbox_id,
data=action_dto_typing
)
print(response_typing)
if __name__ == '__main__':
asyncio.run(main())Mobile Use:
MobileUse is a client for the Lybic MobileUse API, used for parsing model outputs and executing actions on mobile sandboxes.
Parse grounding model output into mobile action
Similar to ComputerUse, you can use this method to parse model outputs into mobile actions.
import asyncio
from lybic import LybicClient, MobileUse
async def main():
async with LybicClient() as client:
mobile_use = MobileUse(client)
actions = await mobile_use.parse_llm_output(
model_type="ui-tars",
llm_output="""Thought: The task requires double-left-clicking the "images" folder. In the File Explorer window, the "images" folder is visible under the Desktop directory. The target element is the folder named "images" with a yellow folder icon. Double-left-clicking this folder will open it.
Next action: Left - double - click on the "images" folder icon located in the File Explorer window, under the Desktop directory, with the name "images" and yellow folder icon.
Action: left_double(point='<point>213 257</point>')"""
)
print(actions)
if __name__ == '__main__':
asyncio.run(main())Action Space:
from typing import Union
MobileUseAction = Union[
ScreenshotAction, # generalActionScreenshotSchema
WaitAction, # generalActionWaitSchema
FinishedAction, # generalActionFinishedSchema
FailedAction, # generalActionFailedSchema
ClientUserTakeoverAction, # generalActionUserTakeoverSchema
KeyboardTypeAction, # generalActionKeyboardTypeSchema
KeyboardHotkeyAction, # generalActionKeyboardHotkeySchema
TouchTapAction, # mobileUseActionTapSchema
TouchDragAction, # mobileUseActionDragSchema
TouchSwipeAction, # mobileUseActionSwipeSchema
TouchLongPressAction, # mobileUseActionLongPressSchema
AndroidBackAction, # mobileUseActionPressBackSchema
AndroidHomeAction, # mobileUseActionPressHomeSchema
OsStartAppAction, # mobileUseActionStartAppSchema
OsStartAppByNameAction, # mobileUseActionStartAppByNameSchema
OsCloseAppAction, # mobileUseActionCloseAppSchema
OsListAppsAction, # mobileUseActionListAppsSchema
]Action Execution for Mobile Use and ComputerUse
Mobile Use does not have a dedicated action usage interface; it shares a new action execution interface with Computer Use. The old interface for Computer Use has been deprecated.
refer to Execute sandbox action for more details.
Transferring files between local storage, object storage, and sandbox
The sandbox.copy_files method provides a unified way to transfer files bidirectionally between the sandbox and external locations (HTTP/S3). It supports multiple file location types and batch operations.
method: copy_files(sandbox_id: str, data: dto.SandboxFileCopyRequestDto) or copy_files(sandbox_id: str, **kwargs)
- args:
- data: dto.SandboxFileCopyRequestDto
- files: List[dto.FileCopyItem]
- index: int (unique identifier for tracking each file operation)
- src: FileLocation (source location)
- dest: FileLocation (destination location)
- return: dto.SandboxFileCopyResponseDto
- results: List[dto.FileCopyResult]
- index: int
- success: bool
- error: Optional[str]
Supported File Location Types:
SandboxFileLocation: File path within the sandboxHttpPutLocation: HTTP PUT upload URL (for uploading files)HttpGetLocation: HTTP GET download URL (for downloading files)HttpPostFormLocation: HTTP POST multipart form upload (for services requiring form uploads)
1. Upload files from local machine to sandbox (MinIO end-to-end example)
Workflow: The overall process is: upload file to object storage → generate presigned GET URL → sandbox downloads from URL.
Prerequisites:
- Install minio SDK:
pip install minio - You have a MinIO instance and bucket (e.g.
agent-data)
Complete workflow:
- Upload local file to MinIO using MinIO SDK
- Generate a presigned GET URL for the uploaded object
- Call Lybic
sandbox.copy_files()with HttpGetLocation (source) and SandboxFileLocation (destination) - Sandbox downloads the file from the URL and saves it to the specified path
import asyncio
from datetime import timedelta
from minio import Minio
from lybic import Sandbox, LybicClient, LybicAuth
from lybic.dto import (
SandboxFileCopyRequestDto,
FileCopyItem,
SandboxFileLocation,
HttpGetLocation
)
# MinIO configuration
MINIO_ENDPOINT = 'play.min.io' # Replace with your MinIO endpoint
ACCESS_KEY = 'Q3AM3UQ867SPQQA43P2F'
SECRET_KEY = 'zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG'
USE_SECURE = True
BUCKET = 'agent-data'
# File configuration
LOCAL_FILE_PATH = './local_input.txt' # Local file to upload
OBJECT_NAME = 'uploads/input.txt' # Object key in MinIO
SANDBOX_PATH = '/home/agent/input.txt' # Destination path in sandbox
async def upload_file_to_sandbox():
# Step 1: Upload local file to MinIO
minio_client = Minio(MINIO_ENDPOINT, ACCESS_KEY, SECRET_KEY, secure=USE_SECURE)
# Ensure bucket exists
if not minio_client.bucket_exists(BUCKET):
minio_client.make_bucket(BUCKET)
print(f"Created bucket: {BUCKET}")
# Upload file to MinIO
minio_client.fput_object(BUCKET, OBJECT_NAME, LOCAL_FILE_PATH)
print(f"Uploaded {LOCAL_FILE_PATH} to MinIO as {OBJECT_NAME}")
# Step 2: Generate presigned GET URL (valid for 1 hour)
presigned_url = minio_client.presigned_get_object(
BUCKET, OBJECT_NAME, expires=timedelta(minutes=60)
)
print(f"Generated presigned URL: {presigned_url}")
# Step 3: Use Lybic SDK to copy file from URL to sandbox
async with LybicClient(
LybicAuth(
org_id='ORG-xxxx',
api_key='lysk-xxxxxxxxxxx',
endpoint='https://api.lybic.cn/'
)
) as client:
sandbox = Sandbox(client)
response = await sandbox.copy_files(
'BOX-xxxx', # Your sandbox ID
SandboxFileCopyRequestDto(files=[
FileCopyItem(
src=HttpGetLocation(url=presigned_url),
dest=SandboxFileLocation(path=SANDBOX_PATH)
)
])
)
print("Copy result:", response)
for result in response.results:
if result.success:
print(f"✓ File successfully copied to sandbox (index: {result.id})")
else:
print(f"✗ Failed to copy file (index: {result.id}): {result.error}")
if __name__ == '__main__':
asyncio.run(upload_file_to_sandbox())2. Download files from sandbox to local machine (MinIO end-to-end example)
Workflow:
Complete workflow:
- Generate a presigned PUT URL using MinIO SDK
- Call Lybic
sandbox.copy_files()with SandboxFileLocation (source) and HttpPutLocation (destination) - Sandbox uploads its local file to the presigned URL
- Download the file from MinIO to your local machine
import asyncio
from datetime import timedelta
from minio import Minio
from lybic import Sandbox, LybicClient, LybicAuth
from lybic.dto import (
SandboxFileCopyRequestDto,
FileCopyItem,
SandboxFileLocation,
HttpPutLocation
)
# MinIO configuration
MINIO_ENDPOINT = 'play.min.io'
ACCESS_KEY = 'Q3AM3UQ867SPQQA43P2F'
SECRET_KEY = 'zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG'
USE_SECURE = True
BUCKET = 'agent-data'
# File configuration
SANDBOX_FILE_PATH = '/home/agent/data/output.txt' # File path in sandbox
OBJECT_NAME = 'downloads/output.txt' # Target object key in MinIO
LOCAL_DOWNLOAD_PATH = './downloaded_output.txt' # Local destination
async def download_file_from_sandbox():
minio_client = Minio(MINIO_ENDPOINT, ACCESS_KEY, SECRET_KEY, secure=USE_SECURE)
# Ensure bucket exists
if not minio_client.bucket_exists(BUCKET):
minio_client.make_bucket(BUCKET)
print(f"Created bucket: {BUCKET}")
# Step 1: Generate presigned PUT URL (valid for 1 hour)
presigned_put_url = minio_client.presigned_put_object(
BUCKET, OBJECT_NAME, expires=timedelta(minutes=60)
)
print(f"Generated presigned PUT URL: {presigned_put_url}")
# Step 2: Use Lybic SDK to copy file from sandbox to URL
async with LybicClient(
LybicAuth(
org_id='ORG-xxxx',
api_key='lysk-xxxxxxxxxxx',
endpoint='https://api.lybic.cn/'
)
) as client:
sandbox = Sandbox(client)
response = await sandbox.copy_files(
'BOX-xxxx', # Your sandbox ID
SandboxFileCopyRequestDto(files=[
FileCopyItem(
src=SandboxFileLocation(path=SANDBOX_FILE_PATH),
dest=HttpPutLocation(url=presigned_put_url)
)
])
)
print("Copy result:", response)
for result in response.results:
if result.success:
print(f"✓ File successfully copied from sandbox (index: {result.id})")
else:
print(f"✗ Failed to copy file (index: {result.id}): {result.error}")
# Step 3: Download the file from MinIO to local machine
minio_client.fget_object(BUCKET, OBJECT_NAME, LOCAL_DOWNLOAD_PATH)
print(f"Downloaded file from MinIO to {LOCAL_DOWNLOAD_PATH}")
# Step 4: Verify the file
with open(LOCAL_DOWNLOAD_PATH, 'r') as f:
content = f.read()
print(f"File content preview: {content[:100]}...")
if __name__ == '__main__':
asyncio.run(download_file_from_sandbox())3. Batch copy multiple files
Copy multiple files in a single request (mixed directions):
from lybic.dto import (
SandboxFileCopyRequestDto,
FileCopyItem,
SandboxFileLocation,
HttpGetLocation,
HttpPutLocation
)
# Copy multiple files: some from external to sandbox, some from sandbox to external
response = await sandbox.copy_files(
'SBX-xxxx',
SandboxFileCopyRequestDto(files=[
# Download from URL to sandbox
FileCopyItem(
src=HttpGetLocation(url='https://example.com/file1.txt'),
dest=SandboxFileLocation(path='/home/agent/file1.txt')
),
# Upload from sandbox to URL
FileCopyItem(
src=SandboxFileLocation(path='/home/agent/output.log'),
dest=HttpPutLocation(url='https://s3.example.com/output.log')
),
# Another download
FileCopyItem(
src=HttpGetLocation(url='https://example.com/file2.txt'),
dest=SandboxFileLocation(path='/home/agent/file2.txt')
)
])
)
# Check results by index
for result in response.results:
if result.success:
print(f"✓ File {result.id} copied successfully")
else:
print(f"✗ File {result.id} failed: {result.error}")4. Using HTTP POST multipart form upload
For services that require multipart form uploads (e.g., some AWS S3 presigned POST policies):
from minio import Minio, PostPolicy
from datetime import datetime, timedelta
from lybic.dto import (
SandboxFileCopyRequestDto,
FileCopyItem,
SandboxFileLocation,
HttpPostFormLocation
)
# Generate POST policy with MinIO
minio_client = Minio(MINIO_ENDPOINT, ACCESS_KEY, SECRET_KEY, secure=USE_SECURE)
policy = PostPolicy()
policy.set_bucket(BUCKET)
policy.set_key('uploads/report.pdf')
policy.set_expires(datetime.now(datetime.UTC) + timedelta(hours=1))
form_data = minio_client.presigned_post_policy(policy)
# Use POST form for upload
response = await sandbox.copy_files(
'BOX-xxxx',
SandboxFileCopyRequestDto(files=[
FileCopyItem(
src=SandboxFileLocation(path='/home/agent/report.pdf'),
dest=HttpPostFormLocation(
url=form_data['url'],
form={k: v for k, v in form_data.items() if k != 'url'},
fileField='file' # Form field name for the file
)
)
])
)5. With custom headers
Add custom headers for authentication or other purposes:
from lybic.dto import HttpPutLocation, HttpGetLocation
# GET with custom headers (e.g., authentication)
response = await sandbox.copy_files(
'SBX-xxxx',
SandboxFileCopyRequestDto(files=[
FileCopyItem(
src=HttpGetLocation(
url='https://api.example.com/files/data.json',
headers={
'Authorization': 'Bearer YOUR_TOKEN',
'X-Custom-Header': 'value'
}
),
dest=SandboxFileLocation(path='/home/agent/data.json')
)
])
)
# PUT with custom headers
response = await sandbox.copy_files(
'SBX-xxxx',
SandboxFileCopyRequestDto(files=[
FileCopyItem(
src=SandboxFileLocation(path='/home/agent/result.json'),
dest=HttpPutLocation(
url='https://storage.example.com/uploads/result.json',
headers={
'Content-Type': 'application/json',
'X-Upload-Id': 'unique-id'
}
)
)
])
)Execute a process inside a sandbox
Run an executable with arguments; capture stdout/stderr (base64-encoded) and exit code.
method: sandbox.execute_process(sandbox_id: str, data: dto.SandboxProcessRequestDto) or execute_process(sandbox_id: str, executable=..., ...)
- args:
- executable: str (absolute or resolvable path in sandbox, e.g.
/usr/bin/python3) - args: List[str]
- workingDirectory: Optional[str]
- stdinBase64: Optional[str] (base64-encoded bytes to feed to stdin)
- executable: str (absolute or resolvable path in sandbox, e.g.
- return: dto.SandboxProcessResponseDto
{ stdoutBase64, stderrBase64, exitCode }
import asyncio
import base64
from lybic import dto, Sandbox, LybicClient, LybicAuth
async def run_process_example():
async with LybicClient(LybicAuth(org_id='ORG-xxxx', api_key='lysk-xxxxxxxxxxx')) as client:
sandbox = Sandbox(client)
# Example 1: Simple command
result = await sandbox.execute_process(
'SBX-xxxx',
executable='/bin/echo',
args=['Hello', 'World']
)
print(f"Exit code: {result.exitCode}")
stdout = base64.b64decode(result.stdoutBase64 or '').decode(errors='ignore')
print(f"Output: {stdout}")
# Example 2: Python script with stdin
stdin_data = base64.b64encode(b"print('Hello from stdin')\n").decode()
proc_req = dto.SandboxProcessRequestDto(
executable='/usr/bin/python3',
args=['-c', 'import sys; exec(sys.stdin.read())'],
workingDirectory='/home/agent',
stdinBase64=stdin_data
)
result = await sandbox.execute_process('SBX-xxxx', data=proc_req)
print(f"Exit: {result.exitCode}")
print(f"STDOUT: {base64.b64decode(result.stdoutBase64 or '').decode(errors='ignore')}")
print(f"STDERR: {base64.b64decode(result.stderrBase64 or '').decode(errors='ignore')}")
if __name__ == '__main__':
asyncio.run(run_process_example())For more APIs related to the SDK, please refer to GithubRepo