| 1 | import requests |
| 2 | import time |
| 3 | import os |
| 4 | import json |
| 5 | import threading |
| 6 | |
| 7 | from deepgram import DeepgramClient |
| 8 | from deepgram.core.events import EventType |
| 9 | from deepgram.agent.v1.types import ( |
| 10 | AgentV1Settings, |
| 11 | AgentV1SettingsAgent, |
| 12 | AgentV1SettingsAudio, |
| 13 | AgentV1SettingsAudioInput, |
| 14 | AgentV1SettingsAudioOutput, |
| 15 | AgentV1SettingsAgentListen, |
| 16 | AgentV1SettingsAgentListenProvider_V1, |
| 17 | ) |
| 18 | from deepgram.types.think_settings_v1 import ThinkSettingsV1 |
| 19 | from deepgram.types.think_settings_v1provider import ThinkSettingsV1Provider_OpenAi |
| 20 | from deepgram.types.speak_settings_v1 import SpeakSettingsV1 |
| 21 | from deepgram.types.speak_settings_v1provider import SpeakSettingsV1Provider_Deepgram |
| 22 | |
| 23 | def main(): |
| 24 | try: |
| 25 | api_key = os.getenv("DEEPGRAM_API_KEY") |
| 26 | if not api_key: |
| 27 | raise ValueError("DEEPGRAM_API_KEY environment variable is not set") |
| 28 | |
| 29 | client = DeepgramClient(api_key=api_key) |
| 30 | |
| 31 | with client.agent.v1.connect() as connection: |
| 32 | print("Created WebSocket connection...") |
| 33 | |
| 34 | settings = AgentV1Settings( |
| 35 | audio=AgentV1SettingsAudio( |
| 36 | input=AgentV1SettingsAudioInput( |
| 37 | encoding="linear16", |
| 38 | sample_rate=24000, |
| 39 | ), |
| 40 | output=AgentV1SettingsAudioOutput( |
| 41 | encoding="linear16", |
| 42 | sample_rate=24000, |
| 43 | container="wav", |
| 44 | ), |
| 45 | ), |
| 46 | agent=AgentV1SettingsAgent( |
| 47 | language="en", |
| 48 | listen=AgentV1SettingsAgentListen( |
| 49 | provider=AgentV1SettingsAgentListenProvider_V1( |
| 50 | type="deepgram", |
| 51 | model="nova-3", |
| 52 | ) |
| 53 | ), |
| 54 | think=ThinkSettingsV1( |
| 55 | provider=ThinkSettingsV1Provider_OpenAi( |
| 56 | type="open_ai", |
| 57 | model="gpt-4o-mini", |
| 58 | ), |
| 59 | prompt="You are a friendly AI assistant.", |
| 60 | ), |
| 61 | speak=SpeakSettingsV1( |
| 62 | provider=SpeakSettingsV1Provider_Deepgram( |
| 63 | type="deepgram", |
| 64 | model="aura-2-thalia-en", |
| 65 | ) |
| 66 | ), |
| 67 | greeting="Hello! How can I help you today?", |
| 68 | ), |
| 69 | ) |
| 70 | |
| 71 | audio_buffer = bytearray() |
| 72 | file_counter = 0 |
| 73 | processing_complete = False |
| 74 | |
| 75 | def on_message(message): |
| 76 | nonlocal audio_buffer, file_counter, processing_complete |
| 77 | |
| 78 | if isinstance(message, bytes): |
| 79 | audio_buffer.extend(message) |
| 80 | return |
| 81 | |
| 82 | msg_type = getattr(message, "type", "Unknown") |
| 83 | |
| 84 | if msg_type == "ConversationText": |
| 85 | print(f"Conversation: {message}") |
| 86 | with open("chatlog.txt", 'a') as chatlog: |
| 87 | chatlog.write(f"{json.dumps(message.__dict__)}\n") |
| 88 | |
| 89 | elif msg_type == "AgentAudioDone": |
| 90 | print("Agent audio done") |
| 91 | if len(audio_buffer) > 0: |
| 92 | with open(f"output-{file_counter}.wav", 'wb') as f: |
| 93 | f.write(create_wav_header()) |
| 94 | f.write(audio_buffer) |
| 95 | print(f"Created output-{file_counter}.wav") |
| 96 | audio_buffer = bytearray() |
| 97 | file_counter += 1 |
| 98 | processing_complete = True |
| 99 | |
| 100 | connection.on(EventType.MESSAGE, on_message) |
| 101 | connection.send_settings(settings) |
| 102 | |
| 103 | listener_thread = threading.Thread(target=connection.start_listening, daemon=True) |
| 104 | listener_thread.start() |
| 105 | |
| 106 | time.sleep(1) |
| 107 | |
| 108 | print("Streaming audio...") |
| 109 | response = requests.get("https://dpgr.am/spacewalk.wav", stream=True) |
| 110 | header = response.raw.read(44) |
| 111 | |
| 112 | for chunk in response.iter_content(chunk_size=8192): |
| 113 | if chunk: |
| 114 | connection.send_media(chunk) |
| 115 | time.sleep(0.1) |
| 116 | |
| 117 | print("Waiting for agent response...") |
| 118 | start_time = time.time() |
| 119 | while not processing_complete and (time.time() - start_time) < 30: |
| 120 | time.sleep(1) |
| 121 | |
| 122 | print("Finished") |
| 123 | |
| 124 | except Exception as e: |
| 125 | print(f"Error: {str(e)}") |
| 126 | |
| 127 | def create_wav_header(sample_rate=24000, bits_per_sample=16, channels=1): |
| 128 | byte_rate = sample_rate * channels * (bits_per_sample // 8) |
| 129 | block_align = channels * (bits_per_sample // 8) |
| 130 | header = bytearray(44) |
| 131 | header[0:4] = b'RIFF' |
| 132 | header[4:8] = b'\x00\x00\x00\x00' |
| 133 | header[8:12] = b'WAVE' |
| 134 | header[12:16] = b'fmt ' |
| 135 | header[16:20] = b'\x10\x00\x00\x00' |
| 136 | header[20:22] = b'\x01\x00' |
| 137 | header[22:24] = channels.to_bytes(2, 'little') |
| 138 | header[24:28] = sample_rate.to_bytes(4, 'little') |
| 139 | header[28:32] = byte_rate.to_bytes(4, 'little') |
| 140 | header[32:34] = block_align.to_bytes(2, 'little') |
| 141 | header[34:36] = bits_per_sample.to_bytes(2, 'little') |
| 142 | header[36:40] = b'data' |
| 143 | header[40:44] = b'\x00\x00\x00\x00' |
| 144 | return header |
| 145 | |
| 146 | if __name__ == "__main__": |
| 147 | main() |