Text to Speech REST — Deepgram

The Deepgram JavaScript SDK now works in both server and browser environments. A proxy configuration is required for browser environments (see the section below).

Installing the SDK

TypeScript

1 # Install the Deepgram JS SDK
2 # https://github.com/deepgram/deepgram-js-sdk
3 
4 npm install @deepgram/sdk

Initializing the SDK

TypeScript

1 import { createClient } from "@deepgram/sdk";
2 import fs from "fs";
3 
4 const deepgram = createClient("DEEPGRAM_API_KEY");

Make a Deepgram Text-to-Speech Request

Once the SDK is initialized, you can make a request to convert text into speech.

TypeScript

1 const text = "Hello, how can I help you today?";
2 
3 const getAudio = async () => {
4   // STEP 1: Make a request and configure the request with options (such as model choice, audio configuration, etc.)
5   const response = await deepgram.speak.request(
6     { text },
7     {
8       model: "aura-2-thalia-en",
9       encoding: "linear16",
10       container: "wav",
11     }
12   );
13 
14   // STEP 2: Get the audio stream and headers from the response
15   const stream = await response.getStream();
16   const headers = await response.getHeaders();
17   if (stream) {
18     // STEP 3: Convert the stream to an audio buffer
19     const buffer = await getAudioBuffer(stream);
20     // STEP 4: Write the audio buffer to a file
21     fs.writeFile("output.wav", buffer, (err) => {
22       if (err) {
23         console.error("Error writing audio to file:", err);
24       } else {
25         console.log("Audio file written to output.wav");
26       }
27     });
28   } else {
29     console.error("Error generating audio:", stream);
30   }
31 
32   if (headers) {
33     console.log("Headers:", headers);
34   }
35 };
36 
37 // Helper function to convert the stream to an audio buffer
38 const getAudioBuffer = async (response) => {
39   const reader = response.getReader();
40   const chunks = [];
41 
42   while (true) {
43     const { done, value } = await reader.read();
44     if (done) break;
45 
46     chunks.push(value);
47   }
48 
49   const dataArray = chunks.reduce(
50     (acc, chunk) => Uint8Array.from([...acc, ...chunk]),
51     new Uint8Array(0)
52   );
53 
54   return Buffer.from(dataArray.buffer);
55 };
56 
57 getAudio();

Audio Output Streaming

Deepgram’s TTS API allows you to start playing the audio as soon as the first byte is received. This section provides examples to help you stream the audio output efficiently.

Single Text Source Payload

The following example demonstrates how to stream the audio as soon as the first byte arrives for a single text source.

TypeScript

1 const DEEPGRAM_API_KEY = 'YOUR_DEEPGRAM_API_KEY';
2 const deepgram = createClient(DEEPGRAM_API_KEY);
3 
4 const text = "Hello, how can I help you today? My name is Emily and I'm very glad to meet you. What do you think of this new text-to-speech API?";
5 const audioFilePath = 'output.wav'; // Path to save the audio file
6 
7 const getAudio = async () => {
8   const response = await deepgram.speak.request(
9     { text },
10     {
11       model: 'aura-2-thalia-en',
12       encoding: 'linear16',
13       container: 'wav',
14     }
15   );
16 
17   const stream = await response.getStream();
18   if (stream) {
19     const buffer = await getAudioBuffer(stream);
20     fs.writeFile(audioFilePath, buffer, (err) => {
21       if (err) {
22         console.error('Error writing audio to file:', err);
23       } else {
24         console.log('Audio file written to', audioFilePath);
25       }
26     });
27   } else {
28     console.error('Error generating audio:', stream);
29   }
30 };
31 
32 const getAudioBuffer = async (response) => {
33   const reader = response.getReader();
34   const chunks = [];
35 
36   while (true) {
37     const { done, value } = await reader.read();
38     if (done) break;
39 
40     chunks.push(value);
41   }
42 
43   const dataArray = chunks.reduce(
44     (acc, chunk) => Uint8Array.from([...acc, ...chunk]),
45     new Uint8Array(0)
46   );
47 
48   return Buffer.from(dataArray.buffer);
49 };
50 
51 getAudio();

Chunked Text Source Payload

This example shows how to chunk the text source by sentence boundaries and stream the audio for each chunk consecutively.

TypeScript

1 import fs from 'fs';
2 import { createClient } from '@deepgram/sdk';
3 
4 const DEEPGRAM_API_KEY = 'YOUR_DEEPGRAM_API_KEY';
5 const deepgram = createClient(DEEPGRAM_API_KEY);
6 
7 const inputText = "Your long text goes here...";
8 
9 function segmentTextBySentence(text) {
10   return text.match(/[^.!?]+[.!?]/g).map((sentence) => sentence.trim());
11 }
12 
13 async function synthesizeAudio(text) {
14   const response = await deepgram.speak.request(
15     { text },
16     {
17       model: 'aura-helios-en',
18       encoding: 'linear16',
19       container: 'wav',
20     }
21   );
22 
23   const stream = await response.getStream();
24   if (stream) {
25     const buffer = await getAudioBuffer(stream);
26     return buffer;
27   } else {
28     throw new Error('Error generating audio');
29   }
30 }
31 
32 const getAudioBuffer = async (response) => {
33   const reader = response.getReader();
34   const chunks = [];
35 
36   while (true) {
37     const { done, value } = await reader.read();
38     if (done) break;
39 
40     chunks.push(value);
41   }
42 
43   const dataArray = chunks.reduce(
44     (acc, chunk) => Uint8Array.from([...acc, ...chunk]),
45     new Uint8Array(0)
46   );
47 
48   return Buffer.from(dataArray.buffer);
49 };
50 
51 async function main() {
52   const segments = segmentTextBySentence(inputText);
53 
54   // Create or truncate the output file
55   const outputFile = fs.createWriteStream("output.mp3");
56 
57   for (const segment of segments) {
58     try {
59       const audioData = await synthesizeAudio(segment);
60       outputFile.write(audioData);
61       console.log("Audio stream finished for segment:", segment);
62     } catch (error) {
63       console.error("Error synthesizing audio:", error);
64     }
65   }
66 
67   console.log("Audio file creation completed.");
68 }
69 
70 main();

Where to Find Additional Examples

The SDK repository has a good collection of text-to-speech examples. The README contains links to them. Each example below attempts to provide different options for transcribing an audio source.

Some Example(s):

Hello World - examples/node-speak

1	# Install the Deepgram JS SDK
2	# https://github.com/deepgram/deepgram-js-sdk
3
4	npm install @deepgram/sdk

1	import { createClient } from "@deepgram/sdk";
2	import fs from "fs";
3
4	const deepgram = createClient("DEEPGRAM_API_KEY");

1	const text = "Hello, how can I help you today?";
2
3	const getAudio = async () => {
4	// STEP 1: Make a request and configure the request with options (such as model choice, audio configuration, etc.)
5	const response = await deepgram.speak.request(
6	{ text },
7	{
8	model: "aura-2-thalia-en",
9	encoding: "linear16",
10	container: "wav",
11	}
12	);
13
14	// STEP 2: Get the audio stream and headers from the response
15	const stream = await response.getStream();
16	const headers = await response.getHeaders();
17	if (stream) {
18	// STEP 3: Convert the stream to an audio buffer
19	const buffer = await getAudioBuffer(stream);
20	// STEP 4: Write the audio buffer to a file
21	fs.writeFile("output.wav", buffer, (err) => {
22	if (err) {
23	console.error("Error writing audio to file:", err);
24	} else {
25	console.log("Audio file written to output.wav");
26	}
27	});
28	} else {
29	console.error("Error generating audio:", stream);
30	}
31
32	if (headers) {
33	console.log("Headers:", headers);
34	}
35	};
36
37	// Helper function to convert the stream to an audio buffer
38	const getAudioBuffer = async (response) => {
39	const reader = response.getReader();
40	const chunks = [];
41
42	while (true) {
43	const { done, value } = await reader.read();
44	if (done) break;
45
46	chunks.push(value);
47	}
48
49	const dataArray = chunks.reduce(
50	(acc, chunk) => Uint8Array.from([...acc, ...chunk]),
51	new Uint8Array(0)
52	);
53
54	return Buffer.from(dataArray.buffer);
55	};
56
57	getAudio();

1	const DEEPGRAM_API_KEY = 'YOUR_DEEPGRAM_API_KEY';
2	const deepgram = createClient(DEEPGRAM_API_KEY);
3
4	const text = "Hello, how can I help you today? My name is Emily and I'm very glad to meet you. What do you think of this new text-to-speech API?";
5	const audioFilePath = 'output.wav'; // Path to save the audio file
6
7	const getAudio = async () => {
8	const response = await deepgram.speak.request(
9	{ text },
10	{
11	model: 'aura-2-thalia-en',
12	encoding: 'linear16',
13	container: 'wav',
14	}
15	);
16
17	const stream = await response.getStream();
18	if (stream) {
19	const buffer = await getAudioBuffer(stream);
20	fs.writeFile(audioFilePath, buffer, (err) => {
21	if (err) {
22	console.error('Error writing audio to file:', err);
23	} else {
24	console.log('Audio file written to', audioFilePath);
25	}
26	});
27	} else {
28	console.error('Error generating audio:', stream);
29	}
30	};
31
32	const getAudioBuffer = async (response) => {
33	const reader = response.getReader();
34	const chunks = [];
35
36	while (true) {
37	const { done, value } = await reader.read();
38	if (done) break;
39
40	chunks.push(value);
41	}
42
43	const dataArray = chunks.reduce(
44	(acc, chunk) => Uint8Array.from([...acc, ...chunk]),
45	new Uint8Array(0)
46	);
47
48	return Buffer.from(dataArray.buffer);
49	};
50
51	getAudio();

1	import fs from 'fs';
2	import { createClient } from '@deepgram/sdk';
3
4	const DEEPGRAM_API_KEY = 'YOUR_DEEPGRAM_API_KEY';
5	const deepgram = createClient(DEEPGRAM_API_KEY);
6
7	const inputText = "Your long text goes here...";
8
9	function segmentTextBySentence(text) {
10	return text.match(/[^.!?]+[.!?]/g).map((sentence) => sentence.trim());
11	}
12
13	async function synthesizeAudio(text) {
14	const response = await deepgram.speak.request(
15	{ text },
16	{
17	model: 'aura-helios-en',
18	encoding: 'linear16',
19	container: 'wav',
20	}
21	);
22
23	const stream = await response.getStream();
24	if (stream) {
25	const buffer = await getAudioBuffer(stream);
26	return buffer;
27	} else {
28	throw new Error('Error generating audio');
29	}
30	}
31
32	const getAudioBuffer = async (response) => {
33	const reader = response.getReader();
34	const chunks = [];
35
36	while (true) {
37	const { done, value } = await reader.read();
38	if (done) break;
39
40	chunks.push(value);
41	}
42
43	const dataArray = chunks.reduce(
44	(acc, chunk) => Uint8Array.from([...acc, ...chunk]),
45	new Uint8Array(0)
46	);
47
48	return Buffer.from(dataArray.buffer);
49	};
50
51	async function main() {
52	const segments = segmentTextBySentence(inputText);
53
54	// Create or truncate the output file
55	const outputFile = fs.createWriteStream("output.mp3");
56
57	for (const segment of segments) {
58	try {
59	const audioData = await synthesizeAudio(segment);
60	outputFile.write(audioData);
61	console.log("Audio stream finished for segment:", segment);
62	} catch (error) {
63	console.error("Error synthesizing audio:", error);
64	}
65	}
66
67	console.log("Audio file creation completed.");
68	}
69
70	main();