-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcli.py
More file actions
305 lines (251 loc) · 10.8 KB
/
Copy pathcli.py
File metadata and controls
305 lines (251 loc) · 10.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
"""
命令行交互界面
支持人机协作:AI可以随时暂停询问用户
"""
import asyncio
import sys
import json
from typing import Optional
from prompt_toolkit import PromptSession
from prompt_toolkit.history import FileHistory
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
from rich.text import Text
from config import Config, APP_NAME, VERSION, SYSTEM_PROMPT
from llm_client import LLMClient
from browser_agent import SmartBrowserAgent
class BrowserAI_CLI:
"""BrowserAI命令行界面 - 支持人机协作"""
def __init__(self):
self.console = Console()
self.llm = LLMClient()
self.agent: Optional[SmartBrowserAgent] = None
self.session = None
self.smart_mode = True
self.interactive_mode = True # 是否启用人机协作
self._setup_prompt()
def _setup_prompt(self):
"""设置交互式提示符"""
try:
history = FileHistory('.browserai_history')
self.session = PromptSession(history=history)
except:
self.session = PromptSession()
async def ask_user_callback(self, question: str, context: str = "") -> str:
"""AI询问用户时的回调函数"""
# 显示问题
panel = Panel(
f"{question}\n\n[dim]{context}[/]",
title="🤖 AI 需要你的帮助",
border_style="yellow",
padding=(1, 2)
)
self.console.print(panel)
# 等待用户输入
try:
if self.session:
answer = await self.session.prompt_async("💬 你的回答: ", multiline=False)
else:
answer = input("💬 你的回答: ")
return answer
except:
return "继续"
def print_banner(self):
"""打印启动横幅"""
cfg = Config()
browser_mode = "Edge/Chrome" if cfg.BROWSER.use_local_browser else "内置浏览器"
banner = f"""
╭─────────────────────────────────────────────╮
│ │
│ 🤖 {APP_NAME} v{VERSION} │
│ 智能浏览器助手 · 自动决策 · 人机协作 │
│ │
│ 浏览器模式: {browser_mode:<20} │
│ 人机协作: {'开启' if self.interactive_mode else '关闭':<20} │
│ │
│ 输入 /help 查看命令 │
│ 输入 /quit 退出 │
│ │
╰─────────────────────────────────────────────╯
"""
self.console.print(banner, style="cyan")
def print_help(self):
"""打印帮助信息"""
help_text = """
## 可用命令
| 命令 | 说明 |
|------|------|
| `/help` | 显示此帮助信息 |
| `/quit` | 退出程序 |
| `/clear` | 清除对话历史 |
| `/interactive` | 切换人机协作模式 |
| `/smart` | 切换智能模式 |
| `/mode` | 查看当前模式 |
| `/browser` | 查看浏览器配置 |
## 核心特性
🧠 **人机协作模式**(智能询问)
AI在以下情况会暂停询问你:
- 需要登录账号("请登录后继续")
- 需要验证码("请输入验证码")
- 需要选择("要点哪个?")
- 需要确认("确认提交订单吗?")
🌐 **本地浏览器支持**
默认使用你电脑上的Edge/Chrome浏览器,可以使用你的:
- 已登录的账号(Cookie)
- 保存的密码
- 浏览器插件
## 使用示例
```
> 打开百度
> 去知乎搜索"AI Agent",找到相关内容
> 访问GitHub trending
> 截图保存当前页面
> 帮我查看某宝购物车的价格
```
## 配置选项 (.env 文件)
```
USE_LOCAL_BROWSER=true # 使用本地浏览器
LOCAL_BROWSER_PATH= # 手动指定路径
HEADLESS=false # 是否无头模式
```
"""
self.console.print(Markdown(help_text))
async def init_browser(self):
"""初始化浏览器"""
if not self.agent:
self.agent = SmartBrowserAgent()
# 设置回调函数
self.agent.set_ask_callback(self.ask_user_callback)
await self.agent.start()
async def close_browser(self):
"""关闭浏览器"""
if self.agent:
await self.agent.close()
self.agent = None
async def handle_command(self, user_input: str) -> Optional[bool]:
"""处理命令"""
cmd = user_input.lower().strip()
if cmd in ["/quit", "/exit", "/q"]:
return False
elif cmd == "/help":
self.print_help()
elif cmd == "/clear":
self.llm.clear_history()
self.console.print("✅ 对话历史已清除", style="green")
elif cmd == "/interactive":
self.interactive_mode = not self.interactive_mode
if self.agent:
self.agent.interactive_mode = self.interactive_mode
status = "开启" if self.interactive_mode else "关闭"
self.console.print(f"人机协作模式已{status}", style="green")
elif cmd == "/smart":
self.smart_mode = not self.smart_mode
status = "开启" if self.smart_mode else "关闭"
self.console.print(f"智能模式已{status}", style="green")
elif cmd == "/mode":
cfg = Config()
self.console.print(f"智能决策: {'开启' if self.smart_mode else '关闭'}", style="blue")
self.console.print(f"人机协作: {'开启' if self.interactive_mode else '关闭'}", style="blue")
self.console.print(f"本地浏览器: {'Edge/Chrome' if cfg.BROWSER.use_local_browser else '内置'}", style="blue")
elif cmd == "/browser":
cfg = Config()
self.console.print(f"使用本地浏览器: {cfg.BROWSER.use_local_browser}", style="blue")
if cfg.BROWSER.local_browser_path:
self.console.print(f"指定路径: {cfg.BROWSER.local_browser_path}", style="blue")
self.console.print(f"无头模式: {cfg.BROWSER.headless}", style="blue")
else:
return None
return True
def is_browser_task(self, user_input: str) -> bool:
"""判断是否为浏览器任务"""
browser_keywords = [
"打开", "访问", "搜索", "查找", "浏览", "截图",
"点击", "输入", "填写", "登录", "注册", "查看", "看看", "瞧瞧",
"去", "帮我", "查", "找", "搜", "浏览", "翻",
"baidu", "google", "bing", "github", "知乎", "bilibili", "淘宝",
"twitter", "推特", "微博", "facebook", "instagram", "youtube",
"https:", "http:", "www.", ".com", ".cn", ".org",
"网页", "网站", "页面", "url", "链接", "浏览器",
"账号", "社交媒体", "信息", "资料", "主页", "博客"
]
text_lower = user_input.lower()
return any(kw in text_lower or kw in user_input for kw in browser_keywords)
async def process_message(self, user_input: str):
"""处理用户消息"""
needs_browser = self.is_browser_task(user_input)
if needs_browser:
await self.init_browser()
if self.smart_mode:
print("\n🤖 正在执行任务,AI会自动决策...")
if self.interactive_mode:
print("(如有疑问,AI会暂停询问你)\n")
result = await self.agent.run_task(user_input, self.llm)
self.console.print(f"\n{result}")
else:
result = await self._execute_simple_task(user_input)
self.console.print(f"\n{result}")
else:
response = self.llm.chat(user_input)
self.console.print(f"\n🤖 {response}")
async def _execute_simple_task(self, user_input: str) -> str:
"""简单模式执行任务"""
if "搜索" in user_input:
query = user_input.split("搜索")[-1].strip()
if query:
engine = "bing"
if "百度" in user_input:
engine = "baidu"
elif "google" in user_input.lower():
engine = "google"
await self.agent.execute_action({
"action": "search",
"query": query,
"engine": engine
})
return f"✅ 已在{engine}搜索: {query}"
elif "打开" in user_input or "访问" in user_input:
url = user_input.replace("打开", "").replace("访问", "").strip()
await self.agent.execute_action({"action": "navigate", "url": url})
return f"✅ 已打开: {url}"
elif "截图" in user_input:
await self.agent.execute_action({"action": "screenshot", "path": "screenshot.png"})
return "✅ 已保存截图到 screenshot.png"
info = await self.agent.get_page_info()
return f"当前页面: {info.get('title', 'Unknown')}"
async def run(self):
"""主循环"""
if not Config.validate():
self.console.print("❌ 配置验证失败", style="red")
return
self.llm.add_system_message()
self.print_banner()
try:
while True:
try:
if self.session:
user_input = await self.session.prompt_async("> ", multiline=False)
else:
user_input = input("> ")
if not user_input.strip():
continue
cmd_result = await self.handle_command(user_input)
if cmd_result is False:
break
elif cmd_result is True:
continue
await self.process_message(user_input)
except KeyboardInterrupt:
print("\n")
continue
except EOFError:
break
finally:
await self.close_browser()
self.console.print(f"\n👋 感谢使用 {APP_NAME}!", style="cyan")
def main():
"""入口函数"""
cli = BrowserAI_CLI()
asyncio.run(cli.run())
if __name__ == "__main__":
main()