1 | package main |
2 | |
3 | import ( |
4 | "context" |
5 | "fmt" |
6 | "os" |
7 | "strings" |
8 | "sync" |
9 | "time" |
10 | |
11 | msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces" |
12 | interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces/v1" |
13 | speak "github.com/deepgram/deepgram-go-sdk/pkg/client/speak" |
14 | ) |
15 | |
16 | const ( |
17 | TTS_TEXT = "Hello, this is a text to speech example using Deepgram." |
18 | AUDIO_FILE = "output.wav" |
19 | ) |
20 | |
21 | type MyHandler struct { |
22 | binaryChan chan *[]byte |
23 | openChan chan *msginterfaces.OpenResponse |
24 | metadataChan chan *msginterfaces.MetadataResponse |
25 | flushChan chan *msginterfaces.FlushedResponse |
26 | clearChan chan *msginterfaces.ClearedResponse |
27 | closeChan chan *msginterfaces.CloseResponse |
28 | warningChan chan *msginterfaces.WarningResponse |
29 | errorChan chan *msginterfaces.ErrorResponse |
30 | unhandledChan chan *[]byte |
31 | } |
32 | |
33 | func NewMyHandler() MyHandler { |
34 | handler := MyHandler{ |
35 | binaryChan: make(chan *[]byte), |
36 | openChan: make(chan *msginterfaces.OpenResponse), |
37 | metadataChan: make(chan *msginterfaces.MetadataResponse), |
38 | flushChan: make(chan *msginterfaces.FlushedResponse), |
39 | clearChan: make(chan *msginterfaces.ClearedResponse), |
40 | closeChan: make(chan *msginterfaces.CloseResponse), |
41 | warningChan: make(chan *msginterfaces.WarningResponse), |
42 | errorChan: make(chan *msginterfaces.ErrorResponse), |
43 | unhandledChan: make(chan *[]byte), |
44 | } |
45 | |
46 | go func() { |
47 | handler.Run() |
48 | }() |
49 | |
50 | return handler |
51 | } |
52 | |
53 | // GetUnhandled returns the binary event channels |
54 | func (dch MyHandler) GetBinary() []*chan *[]byte { |
55 | return []*chan *[]byte{&dch.binaryChan} |
56 | } |
57 | |
58 | // GetOpen returns the open channels |
59 | func (dch MyHandler) GetOpen() []*chan *msginterfaces.OpenResponse { |
60 | return []*chan *msginterfaces.OpenResponse{&dch.openChan} |
61 | } |
62 | |
63 | // GetMetadata returns the metadata channels |
64 | func (dch MyHandler) GetMetadata() []*chan *msginterfaces.MetadataResponse { |
65 | return []*chan *msginterfaces.MetadataResponse{&dch.metadataChan} |
66 | } |
67 | |
68 | // GetFlushed returns the flush channels |
69 | func (dch MyHandler) GetFlush() []*chan *msginterfaces.FlushedResponse { |
70 | return []*chan *msginterfaces.FlushedResponse{&dch.flushChan} |
71 | } |
72 | |
73 | // GetCleared returns the clear channels |
74 | func (dch MyHandler) GetClear() []*chan *msginterfaces.ClearedResponse { |
75 | return []*chan *msginterfaces.ClearedResponse{&dch.clearChan} |
76 | } |
77 | |
78 | // GetClose returns the close channels |
79 | func (dch MyHandler) GetClose() []*chan *msginterfaces.CloseResponse { |
80 | return []*chan *msginterfaces.CloseResponse{&dch.closeChan} |
81 | } |
82 | |
83 | // GetWarning returns the warning channels |
84 | func (dch MyHandler) GetWarning() []*chan *msginterfaces.WarningResponse { |
85 | return []*chan *msginterfaces.WarningResponse{&dch.warningChan} |
86 | } |
87 | |
88 | // GetError returns the error channels |
89 | func (dch MyHandler) GetError() []*chan *msginterfaces.ErrorResponse { |
90 | return []*chan *msginterfaces.ErrorResponse{&dch.errorChan} |
91 | } |
92 | |
93 | // GetUnhandled returns the unhandled event channels |
94 | func (dch MyHandler) GetUnhandled() []*chan *[]byte { |
95 | return []*chan *[]byte{&dch.unhandledChan} |
96 | } |
97 | |
98 | // Open is the callback for when the connection opens |
99 | // golintci: funlen |
100 | func (dch MyHandler) Run() error { |
101 | wgReceivers := sync.WaitGroup{} |
102 | |
103 | // open channel |
104 | wgReceivers.Add(1) |
105 | go func() { |
106 | defer wgReceivers.Done() |
107 | |
108 | for _ = range dch.openChan { |
109 | fmt.Printf("\n\n[OpenResponse]\n\n") |
110 | } |
111 | }() |
112 | |
113 | // binary channel |
114 | wgReceivers.Add(1) |
115 | go func() { |
116 | defer wgReceivers.Done() |
117 | |
118 | for br := range dch.binaryChan { |
119 | fmt.Printf("\n\n[Binary Data]\n") |
120 | |
121 | file, err := os.OpenFile(AUDIO_FILE, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o666) |
122 | if err != nil { |
123 | fmt.Printf("Failed to open file. Err: %v\n", err) |
124 | continue |
125 | } |
126 | |
127 | _, err = file.Write(*br) |
128 | file.Close() |
129 | |
130 | if err != nil { |
131 | fmt.Printf("Failed to write to file. Err: %v\n", err) |
132 | continue |
133 | } |
134 | } |
135 | }() |
136 | |
137 | // metadata channel |
138 | wgReceivers.Add(1) |
139 | go func() { |
140 | defer wgReceivers.Done() |
141 | |
142 | for mr := range dch.metadataChan { |
143 | fmt.Printf("\n[FlushedResponse]\n") |
144 | fmt.Printf("RequestID: %s\n", strings.TrimSpace(mr.RequestID)) |
145 | } |
146 | }() |
147 | |
148 | // flushed channel |
149 | wgReceivers.Add(1) |
150 | go func() { |
151 | defer wgReceivers.Done() |
152 | |
153 | for _ = range dch.flushChan { |
154 | fmt.Printf("\n[FlushedResponse]\n") |
155 | } |
156 | }() |
157 | |
158 | // cleared channel |
159 | wgReceivers.Add(1) |
160 | go func() { |
161 | defer wgReceivers.Done() |
162 | |
163 | for _ = range dch.clearChan { |
164 | fmt.Printf("\n[ClearedResponse]\n") |
165 | } |
166 | }() |
167 | |
168 | // close channel |
169 | wgReceivers.Add(1) |
170 | go func() { |
171 | defer wgReceivers.Done() |
172 | |
173 | for _ = range dch.closeChan { |
174 | fmt.Printf("\n\n[CloseResponse]\n\n") |
175 | } |
176 | }() |
177 | |
178 | // warning channel |
179 | wgReceivers.Add(1) |
180 | go func() { |
181 | defer wgReceivers.Done() |
182 | |
183 | for er := range dch.warningChan { |
184 | fmt.Printf("\n[WarningResponse]\n") |
185 | fmt.Printf("\nWarning.Type: %s\n", er.WarnCode) |
186 | fmt.Printf("Warning.Message: %s\n", er.WarnMsg) |
187 | fmt.Printf("Warning.Description: %s\n\n", er.Description) |
188 | fmt.Printf("Warning.Variant: %s\n\n", er.Variant) |
189 | } |
190 | }() |
191 | |
192 | // error channel |
193 | wgReceivers.Add(1) |
194 | go func() { |
195 | defer wgReceivers.Done() |
196 | |
197 | for er := range dch.errorChan { |
198 | fmt.Printf("\n[ErrorResponse]\n") |
199 | fmt.Printf("\nError.Type: %s\n", er.ErrCode) |
200 | fmt.Printf("Error.Message: %s\n", er.ErrMsg) |
201 | fmt.Printf("Error.Description: %s\n\n", er.Description) |
202 | fmt.Printf("Error.Variant: %s\n\n", er.Variant) |
203 | } |
204 | }() |
205 | |
206 | // unhandled event channel |
207 | wgReceivers.Add(1) |
208 | go func() { |
209 | defer wgReceivers.Done() |
210 | |
211 | for byData := range dch.unhandledChan { |
212 | fmt.Printf("\n[UnhandledEvent]") |
213 | fmt.Printf("Dump:\n%s\n\n", string(*byData)) |
214 | } |
215 | }() |
216 | |
217 | // wait for all receivers to finish |
218 | wgReceivers.Wait() |
219 | |
220 | return nil |
221 | } |
222 | |
223 | func main() { |
224 | // init library |
225 | speak.Init(speak.InitLib{ |
226 | LogLevel: speak.LogLevelDefault, // LogLevelDefault, LogLevelFull, LogLevelDebug, LogLevelTrace |
227 | }) |
228 | |
229 | // Go context |
230 | ctx := context.Background() |
231 | |
232 | // set the Client options |
233 | cOptions := &interfaces.ClientOptions{ |
234 | // AutoFlushSpeakDelta: 1000, |
235 | } |
236 | |
237 | // set the TTS options |
238 | ttsOptions := &interfaces.WSSpeakOptions{ |
239 | Model: "aura-asteria-en", |
240 | Encoding: "linear16", |
241 | SampleRate: 48000, |
242 | } |
243 | |
244 | // create the callback |
245 | callback := NewMyHandler() |
246 | |
247 | // create a new stream using the NewStream function |
248 | dgClient, err := speak.NewWSUsingChan(ctx, "", cOptions, ttsOptions, callback) |
249 | if err != nil { |
250 | fmt.Println("ERROR creating TTS connection:", err) |
251 | return |
252 | } |
253 | |
254 | // connect the websocket to Deepgram |
255 | bConnected := dgClient.Connect() |
256 | if !bConnected { |
257 | fmt.Println("Client.Connect failed") |
258 | os.Exit(1) |
259 | } |
260 | |
261 | file, err := os.OpenFile(AUDIO_FILE, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o666) |
262 | if err != nil { |
263 | fmt.Printf("Failed to open file. Err: %v\n", err) |
264 | return |
265 | } |
266 | // Add a wav audio container header to the file if you want to play the audio |
267 | // using a media player like VLC, Media Player, or Apple Music |
268 | header := []byte{ |
269 | 0x52, 0x49, 0x46, 0x46, // "RIFF" |
270 | 0x00, 0x00, 0x00, 0x00, // Placeholder for file size |
271 | 0x57, 0x41, 0x56, 0x45, // "WAVE" |
272 | 0x66, 0x6d, 0x74, 0x20, // "fmt " |
273 | 0x10, 0x00, 0x00, 0x00, // Chunk size (16) |
274 | 0x01, 0x00, // Audio format (1 for PCM) |
275 | 0x01, 0x00, // Number of channels (1) |
276 | 0x80, 0xbb, 0x00, 0x00, // Sample rate (48000) |
277 | 0x00, 0xee, 0x02, 0x00, // Byte rate (48000 * 2) |
278 | 0x02, 0x00, // Block align (2) |
279 | 0x10, 0x00, // Bits per sample (16) |
280 | 0x64, 0x61, 0x74, 0x61, // "data" |
281 | 0x00, 0x00, 0x00, 0x00, // Placeholder for data size |
282 | } |
283 | |
284 | _, err = file.Write(header) |
285 | if err != nil { |
286 | fmt.Printf("Failed to write header to file. Err: %v\n", err) |
287 | return |
288 | } |
289 | file.Close() |
290 | |
291 | // Send the text input |
292 | err = dgClient.SpeakWithText(TTS_TEXT) |
293 | if err != nil { |
294 | fmt.Printf("Error sending text input: %v\n", err) |
295 | return |
296 | } |
297 | |
298 | // If AutoFlushSpeakDelta is not set, you Flush the text input manually |
299 | err = dgClient.Flush() |
300 | if err != nil { |
301 | fmt.Printf("Error sending text input: %v\n", err) |
302 | return |
303 | } |
304 | |
305 | // wait for user input to exit |
306 | time.Sleep(5 * time.Second) |
307 | |
308 | // close the connection |
309 | dgClient.Stop() |
310 | |
311 | fmt.Printf("Program exiting...\n") |
312 | } |