Remote-Controller-Assistant-for-Computers/main.py at main · emircansoftware/Remote-Controller-Assistant-for-Computers · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Okey Google - Main Launcher
Capture voice input and send to the agent
"""

import time
import sys
import os
from agent import agent
from speech_to_text import listen_and_recognize
from text_to_speech import speak
from langchain_core.messages import HumanMessage
from dotenv import load_dotenv

load_dotenv()


def main():
    """Main program - Capture voice commands and send to the agent"""

    # API Key check
    if not os.getenv("GOOGLE_API_KEY"):
        print("\n❌ ERROR: GOOGLE_API_KEY environment variable is not set!")
        print("   Please create a .env file and add your API key")
        sys.exit(1)

    print("\n" + "="*60)
    print("✨ Okey Google - Voice-Controlled Assistant")
    print("="*60)
    print("\n🛑 Press Ctrl+C to exit\n")

    try:
        while True:
            try:
                print("🎤 I'm ready... Please speak")

                # Get voice command
                listened_text = listen_and_recognize()
                print(f"📝 Recognized: {listened_text}\n")

                if not listened_text or "hatası" in listened_text.lower():
                    print("⚠️ Speech not recognized, please try again...\n")
                    speak("Ses tanınamadı, lütfen tekrar deneyin")
                    time.sleep(1)
                    continue

                # Run agent - send listened_text as HumanMessage
                print("🤖 Agent running...\n")
                result = agent.invoke({
                    "messages": [HumanMessage(content=listened_text)],
                    "request": listened_text,
                    "installed_programs": [],
                    "desktop_files": [],
                    "screen_texts": [],
                    "is_action_successful": False,
                    "actions": [],
                    "current_action_idx": 0,
                    "action_result": ""
                }, {"configurable": {"thread_id": "1"}})

                # Get agent response and speak it
                agent_response = result.get("messages", [])
                if agent_response:
                    last_message = agent_response[-1]
                    # AIMessage içinden content'i çıkar
                    if hasattr(last_message, 'content'):
                        response_text = last_message.content
                    else:
                        response_text = str(last_message)

                    print(f"\n🤖 Response: {response_text}\n")

                    # Play spoken response
                    print("🔊 Playing spoken response...")
                    speak(response_text)

                print("✅ Completed!\n")

                # Listen for additional commands - refresh after each
                print("⏳ Waiting for additional commands (refreshes after each)...\n")

                # Listen for additional commands within 2 minutes
                additional_timeout_start = time.time()
                additional_timeout_limit = 120  # 2 dakika

                while time.time() - additional_timeout_start < additional_timeout_limit:
                    try:
                        print("🎤 Please say an additional command (listening for 10 seconds)...\n")

                        # Listen with 10-second timeout
                        extra_command = listen_and_recognize(
                            timeout=10,
                            phrase_time_limit=None,
                            require_activation=False
                        )

                        if extra_command and "hatası" not in extra_command.lower() and extra_command.strip():
                            print(f"\n📝 Additional command: {extra_command}\n")

                            # Run additional command
                            print("🤖 Agent running...\n")
                            result = agent.invoke({
                                "messages": [HumanMessage(content=extra_command)],
                                "request": extra_command,
                                "installed_programs": [],
                                "desktop_files": [],
                                "screen_texts": [],
                                "is_action_successful": False,
                                "actions": [],
                                "current_action_idx": 0,
                                "action_result": ""
                            }, {"configurable": {"thread_id": "1"}})

                            # Play the response
                            agent_response = result.get("messages", [])
                            if agent_response:
                                last_message = agent_response[-1]
                                if hasattr(last_message, 'content'):
                                    response_text = last_message.content
                                else:
                                    response_text = str(last_message)

                                print(f"\n🤖 Response: {response_text}\n")
                                print("🔊 Playing spoken response...")
                                speak(response_text)

                            # After processing, start listening again (timeout resets)
                            print("\n" + "="*60)
                            print("Ready for the next command...\n")

                        else:
                            # Timeout - continue if silence
                            remaining = additional_timeout_limit - (time.time() - additional_timeout_start)
                            if remaining > 0:
                                print(f"⏱️  Still listening ({remaining:.0f}s left)...\n")

                    except KeyboardInterrupt:
                        print("\n🛑 Additional command wait cancelled...\n")
                        break
                    except Exception as e:
                        remaining = additional_timeout_limit - (time.time() - additional_timeout_start)
                        if remaining > 0:
                            print(f"⚠️  Still listening ({remaining:.0f}s left)...\n")
                        else:
                            break

                print("\n" + "="*60)
                print("Additional command wait time expired\n")

                print("\n" + "="*60)
                print("Waiting for Ok Google...\n")

            except KeyboardInterrupt:
                print("\n🛑 Loop interrupted...\n")
                time.sleep(1)
            except Exception as e:
                print(f"\n❌ Error: {e}\n")
                import traceback
                traceback.print_exc()
                time.sleep(2)

    except KeyboardInterrupt:
        print("\n👋 Shutting down...")
        sys.exit(0)


if __name__ == "__main__":
    main()