Refactor high-complexity React components in Dify frontend. Use when `pnpm analyze-component...
npx skills add halay08/fullstack-agent-skills --skill "autonomous-agent-patterns"
Install specific skill from multi-skill repository
# Description
Design patterns for building autonomous coding agents. Covers tool integration, permission systems, browser automation, and human-in-the-loop workflows. Use when building AI agents, designing tool APIs, implementing permission systems, or creating autonomous coding assistants.
# SKILL.md
name: autonomous-agent-patterns
description: "Design patterns for building autonomous coding agents. Covers tool integration, permission systems, browser automation, and human-in-the-loop workflows. Use when building AI agents, designing tool APIs, implementing permission systems, or creating autonomous coding assistants."
πΉοΈ Autonomous Agent Patterns
Design patterns for building autonomous coding agents, inspired by Cline and OpenAI Codex.
When to Use This Skill
Use this skill when:
- Building autonomous AI agents
- Designing tool/function calling APIs
- Implementing permission and approval systems
- Creating browser automation for agents
- Designing human-in-the-loop workflows
1. Core Agent Architecture
1.1 Agent Loop
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β AGENT LOOP β
β β
β ββββββββββββ ββββββββββββ ββββββββββββ β
β β Think βββββΆβ Decide βββββΆβ Act β β
β β (Reason) β β (Plan) β β (Execute)β β
β ββββββββββββ ββββββββββββ ββββββββββββ β
β β² β β
β β ββββββββββββ β β
β βββββββββββ Observe ββββββββββββ β
β β (Result) β β
β ββββββββββββ β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class AgentLoop:
def __init__(self, llm, tools, max_iterations=50):
self.llm = llm
self.tools = {t.name: t for t in tools}
self.max_iterations = max_iterations
self.history = []
def run(self, task: str) -> str:
self.history.append({"role": "user", "content": task})
for i in range(self.max_iterations):
# Think: Get LLM response with tool options
response = self.llm.chat(
messages=self.history,
tools=self._format_tools(),
tool_choice="auto"
)
# Decide: Check if agent wants to use a tool
if response.tool_calls:
for tool_call in response.tool_calls:
# Act: Execute the tool
result = self._execute_tool(tool_call)
# Observe: Add result to history
self.history.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": str(result)
})
else:
# No more tool calls = task complete
return response.content
return "Max iterations reached"
def _execute_tool(self, tool_call) -> Any:
tool = self.tools[tool_call.name]
args = json.loads(tool_call.arguments)
return tool.execute(**args)
1.2 Multi-Model Architecture
class MultiModelAgent:
"""
Use different models for different purposes:
- Fast model for planning
- Powerful model for complex reasoning
- Specialized model for code generation
"""
def __init__(self):
self.models = {
"fast": "gpt-3.5-turbo", # Quick decisions
"smart": "gpt-4-turbo", # Complex reasoning
"code": "claude-3-sonnet", # Code generation
}
def select_model(self, task_type: str) -> str:
if task_type == "planning":
return self.models["fast"]
elif task_type == "analysis":
return self.models["smart"]
elif task_type == "code":
return self.models["code"]
return self.models["smart"]
2. Tool Design Patterns
2.1 Tool Schema
class Tool:
"""Base class for agent tools"""
@property
def schema(self) -> dict:
"""JSON Schema for the tool"""
return {
"name": self.name,
"description": self.description,
"parameters": {
"type": "object",
"properties": self._get_parameters(),
"required": self._get_required()
}
}
def execute(self, **kwargs) -> ToolResult:
"""Execute the tool and return result"""
raise NotImplementedError
class ReadFileTool(Tool):
name = "read_file"
description = "Read the contents of a file from the filesystem"
def _get_parameters(self):
return {
"path": {
"type": "string",
"description": "Absolute path to the file"
},
"start_line": {
"type": "integer",
"description": "Line to start reading from (1-indexed)"
},
"end_line": {
"type": "integer",
"description": "Line to stop reading at (inclusive)"
}
}
def _get_required(self):
return ["path"]
def execute(self, path: str, start_line: int = None, end_line: int = None) -> ToolResult:
try:
with open(path, 'r') as f:
lines = f.readlines()
if start_line and end_line:
lines = lines[start_line-1:end_line]
return ToolResult(
success=True,
output="".join(lines)
)
except FileNotFoundError:
return ToolResult(
success=False,
error=f"File not found: {path}"
)
2.2 Essential Agent Tools
CODING_AGENT_TOOLS = {
# File operations
"read_file": "Read file contents",
"write_file": "Create or overwrite a file",
"edit_file": "Make targeted edits to a file",
"list_directory": "List files and folders",
"search_files": "Search for files by pattern",
# Code understanding
"search_code": "Search for code patterns (grep)",
"get_definition": "Find function/class definition",
"get_references": "Find all references to a symbol",
# Terminal
"run_command": "Execute a shell command",
"read_output": "Read command output",
"send_input": "Send input to running command",
# Browser (optional)
"open_browser": "Open URL in browser",
"click_element": "Click on page element",
"type_text": "Type text into input",
"screenshot": "Capture screenshot",
# Context
"ask_user": "Ask the user a question",
"search_web": "Search the web for information"
}
2.3 Edit Tool Design
class EditFileTool(Tool):
"""
Precise file editing with conflict detection.
Uses search/replace pattern for reliable edits.
"""
name = "edit_file"
description = "Edit a file by replacing specific content"
def execute(
self,
path: str,
search: str,
replace: str,
expected_occurrences: int = 1
) -> ToolResult:
"""
Args:
path: File to edit
search: Exact text to find (must match exactly, including whitespace)
replace: Text to replace with
expected_occurrences: How many times search should appear (validation)
"""
with open(path, 'r') as f:
content = f.read()
# Validate
actual_occurrences = content.count(search)
if actual_occurrences != expected_occurrences:
return ToolResult(
success=False,
error=f"Expected {expected_occurrences} occurrences, found {actual_occurrences}"
)
if actual_occurrences == 0:
return ToolResult(
success=False,
error="Search text not found in file"
)
# Apply edit
new_content = content.replace(search, replace)
with open(path, 'w') as f:
f.write(new_content)
return ToolResult(
success=True,
output=f"Replaced {actual_occurrences} occurrence(s)"
)
3. Permission & Safety Patterns
3.1 Permission Levels
class PermissionLevel(Enum):
# Fully automatic - no user approval needed
AUTO = "auto"
# Ask once per session
ASK_ONCE = "ask_once"
# Ask every time
ASK_EACH = "ask_each"
# Never allow
NEVER = "never"
PERMISSION_CONFIG = {
# Low risk - can auto-approve
"read_file": PermissionLevel.AUTO,
"list_directory": PermissionLevel.AUTO,
"search_code": PermissionLevel.AUTO,
# Medium risk - ask once
"write_file": PermissionLevel.ASK_ONCE,
"edit_file": PermissionLevel.ASK_ONCE,
# High risk - ask each time
"run_command": PermissionLevel.ASK_EACH,
"delete_file": PermissionLevel.ASK_EACH,
# Dangerous - never auto-approve
"sudo_command": PermissionLevel.NEVER,
"format_disk": PermissionLevel.NEVER
}
3.2 Approval UI Pattern
class ApprovalManager:
def __init__(self, ui, config):
self.ui = ui
self.config = config
self.session_approvals = {}
def request_approval(self, tool_name: str, args: dict) -> bool:
level = self.config.get(tool_name, PermissionLevel.ASK_EACH)
if level == PermissionLevel.AUTO:
return True
if level == PermissionLevel.NEVER:
self.ui.show_error(f"Tool '{tool_name}' is not allowed")
return False
if level == PermissionLevel.ASK_ONCE:
if tool_name in self.session_approvals:
return self.session_approvals[tool_name]
# Show approval dialog
approved = self.ui.show_approval_dialog(
tool=tool_name,
args=args,
risk_level=self._assess_risk(tool_name, args)
)
if level == PermissionLevel.ASK_ONCE:
self.session_approvals[tool_name] = approved
return approved
def _assess_risk(self, tool_name: str, args: dict) -> str:
"""Analyze specific call for risk level"""
if tool_name == "run_command":
cmd = args.get("command", "")
if any(danger in cmd for danger in ["rm -rf", "sudo", "chmod"]):
return "HIGH"
return "MEDIUM"
3.3 Sandboxing
class SandboxedExecution:
"""
Execute code/commands in isolated environment
"""
def __init__(self, workspace_dir: str):
self.workspace = workspace_dir
self.allowed_commands = ["npm", "python", "node", "git", "ls", "cat"]
self.blocked_paths = ["/etc", "/usr", "/bin", os.path.expanduser("~")]
def validate_path(self, path: str) -> bool:
"""Ensure path is within workspace"""
real_path = os.path.realpath(path)
workspace_real = os.path.realpath(self.workspace)
return real_path.startswith(workspace_real)
def validate_command(self, command: str) -> bool:
"""Check if command is allowed"""
cmd_parts = shlex.split(command)
if not cmd_parts:
return False
base_cmd = cmd_parts[0]
return base_cmd in self.allowed_commands
def execute_sandboxed(self, command: str) -> ToolResult:
if not self.validate_command(command):
return ToolResult(
success=False,
error=f"Command not allowed: {command}"
)
# Execute in isolated environment
result = subprocess.run(
command,
shell=True,
cwd=self.workspace,
capture_output=True,
timeout=30,
env={
**os.environ,
"HOME": self.workspace, # Isolate home directory
}
)
return ToolResult(
success=result.returncode == 0,
output=result.stdout.decode(),
error=result.stderr.decode() if result.returncode != 0 else None
)
4. Browser Automation
4.1 Browser Tool Pattern
class BrowserTool:
"""
Browser automation for agents using Playwright/Puppeteer.
Enables visual debugging and web testing.
"""
def __init__(self, headless: bool = True):
self.browser = None
self.page = None
self.headless = headless
async def open_url(self, url: str) -> ToolResult:
"""Navigate to URL and return page info"""
if not self.browser:
self.browser = await playwright.chromium.launch(headless=self.headless)
self.page = await self.browser.new_page()
await self.page.goto(url)
# Capture state
screenshot = await self.page.screenshot(type='png')
title = await self.page.title()
return ToolResult(
success=True,
output=f"Loaded: {title}",
metadata={
"screenshot": base64.b64encode(screenshot).decode(),
"url": self.page.url
}
)
async def click(self, selector: str) -> ToolResult:
"""Click on an element"""
try:
await self.page.click(selector, timeout=5000)
await self.page.wait_for_load_state("networkidle")
screenshot = await self.page.screenshot()
return ToolResult(
success=True,
output=f"Clicked: {selector}",
metadata={"screenshot": base64.b64encode(screenshot).decode()}
)
except TimeoutError:
return ToolResult(
success=False,
error=f"Element not found: {selector}"
)
async def type_text(self, selector: str, text: str) -> ToolResult:
"""Type text into an input"""
await self.page.fill(selector, text)
return ToolResult(success=True, output=f"Typed into {selector}")
async def get_page_content(self) -> ToolResult:
"""Get accessible text content of the page"""
content = await self.page.evaluate("""
() => {
// Get visible text
const walker = document.createTreeWalker(
document.body,
NodeFilter.SHOW_TEXT,
null,
false
);
let text = '';
while (walker.nextNode()) {
const node = walker.currentNode;
if (node.textContent.trim()) {
text += node.textContent.trim() + '\\n';
}
}
return text;
}
""")
return ToolResult(success=True, output=content)
4.2 Visual Agent Pattern
class VisualAgent:
"""
Agent that uses screenshots to understand web pages.
Can identify elements visually without selectors.
"""
def __init__(self, llm, browser):
self.llm = llm
self.browser = browser
async def describe_page(self) -> str:
"""Use vision model to describe current page"""
screenshot = await self.browser.screenshot()
response = self.llm.chat([
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this webpage. List all interactive elements you see."},
{"type": "image", "data": screenshot}
]
}
])
return response.content
async def find_and_click(self, description: str) -> ToolResult:
"""Find element by visual description and click it"""
screenshot = await self.browser.screenshot()
# Ask vision model to find element
response = self.llm.chat([
{
"role": "user",
"content": [
{
"type": "text",
"text": f"""
Find the element matching: "{description}"
Return the approximate coordinates as JSON: {{"x": number, "y": number}}
"""
},
{"type": "image", "data": screenshot}
]
}
])
coords = json.loads(response.content)
await self.browser.page.mouse.click(coords["x"], coords["y"])
return ToolResult(success=True, output=f"Clicked at ({coords['x']}, {coords['y']})")
5. Context Management
5.1 Context Injection Patterns
class ContextManager:
"""
Manage context provided to the agent.
Inspired by Cline's @-mention patterns.
"""
def __init__(self, workspace: str):
self.workspace = workspace
self.context = []
def add_file(self, path: str) -> None:
"""@file - Add file contents to context"""
with open(path, 'r') as f:
content = f.read()
self.context.append({
"type": "file",
"path": path,
"content": content
})
def add_folder(self, path: str, max_files: int = 20) -> None:
"""@folder - Add all files in folder"""
for root, dirs, files in os.walk(path):
for file in files[:max_files]:
file_path = os.path.join(root, file)
self.add_file(file_path)
def add_url(self, url: str) -> None:
"""@url - Fetch and add URL content"""
response = requests.get(url)
content = html_to_markdown(response.text)
self.context.append({
"type": "url",
"url": url,
"content": content
})
def add_problems(self, diagnostics: list) -> None:
"""@problems - Add IDE diagnostics"""
self.context.append({
"type": "diagnostics",
"problems": diagnostics
})
def format_for_prompt(self) -> str:
"""Format all context for LLM prompt"""
parts = []
for item in self.context:
if item["type"] == "file":
parts.append(f"## File: {item['path']}\n```\n{item['content']}\n```")
elif item["type"] == "url":
parts.append(f"## URL: {item['url']}\n{item['content']}")
elif item["type"] == "diagnostics":
parts.append(f"## Problems:\n{json.dumps(item['problems'], indent=2)}")
return "\n\n".join(parts)
5.2 Checkpoint/Resume
class CheckpointManager:
"""
Save and restore agent state for long-running tasks.
"""
def __init__(self, storage_dir: str):
self.storage_dir = storage_dir
os.makedirs(storage_dir, exist_ok=True)
def save_checkpoint(self, session_id: str, state: dict) -> str:
"""Save current agent state"""
checkpoint = {
"timestamp": datetime.now().isoformat(),
"session_id": session_id,
"history": state["history"],
"context": state["context"],
"workspace_state": self._capture_workspace(state["workspace"]),
"metadata": state.get("metadata", {})
}
path = os.path.join(self.storage_dir, f"{session_id}.json")
with open(path, 'w') as f:
json.dump(checkpoint, f, indent=2)
return path
def restore_checkpoint(self, checkpoint_path: str) -> dict:
"""Restore agent state from checkpoint"""
with open(checkpoint_path, 'r') as f:
checkpoint = json.load(f)
return {
"history": checkpoint["history"],
"context": checkpoint["context"],
"workspace": self._restore_workspace(checkpoint["workspace_state"]),
"metadata": checkpoint["metadata"]
}
def _capture_workspace(self, workspace: str) -> dict:
"""Capture relevant workspace state"""
# Git status, file hashes, etc.
return {
"git_ref": subprocess.getoutput(f"cd {workspace} && git rev-parse HEAD"),
"git_dirty": subprocess.getoutput(f"cd {workspace} && git status --porcelain")
}
6. MCP (Model Context Protocol) Integration
6.1 MCP Server Pattern
from mcp import Server, Tool
class MCPAgent:
"""
Agent that can dynamically discover and use MCP tools.
'Add a tool that...' pattern from Cline.
"""
def __init__(self, llm):
self.llm = llm
self.mcp_servers = {}
self.available_tools = {}
def connect_server(self, name: str, config: dict) -> None:
"""Connect to an MCP server"""
server = Server(config)
self.mcp_servers[name] = server
# Discover tools
tools = server.list_tools()
for tool in tools:
self.available_tools[tool.name] = {
"server": name,
"schema": tool.schema
}
async def create_tool(self, description: str) -> str:
"""
Create a new MCP server based on user description.
'Add a tool that fetches Jira tickets'
"""
# Generate MCP server code
code = self.llm.generate(f"""
Create a Python MCP server with a tool that does:
{description}
Use the FastMCP framework. Include proper error handling.
Return only the Python code.
""")
# Save and install
server_name = self._extract_name(description)
path = f"./mcp_servers/{server_name}/server.py"
with open(path, 'w') as f:
f.write(code)
# Hot-reload
self.connect_server(server_name, {"path": path})
return f"Created tool: {server_name}"
Best Practices Checklist
Agent Design
- [ ] Clear task decomposition
- [ ] Appropriate tool granularity
- [ ] Error handling at each step
- [ ] Progress visibility to user
Safety
- [ ] Permission system implemented
- [ ] Dangerous operations blocked
- [ ] Sandbox for untrusted code
- [ ] Audit logging enabled
UX
- [ ] Approval UI is clear
- [ ] Progress updates provided
- [ ] Undo/rollback available
- [ ] Explanation of actions
Resources
# Supported AI Coding Agents
This skill is compatible with the SKILL.md standard and works with all major AI coding agents:
Learn more about the SKILL.md standard and how to use these skills with your preferred AI coding agent.