Text to Speech REST

An overview of the Deepgram JavaScript SDK and Deepgram text-to-speech.

The Deepgram JavaScript SDK now works in both server and browser environments. A proxy configuration is required for browser environments (see the section below).

Installing the SDK

TypeScript
1# Install the Deepgram JS SDK
2# https://github.com/deepgram/deepgram-js-sdk
3
4npm install @deepgram/sdk

Initializing the SDK

TypeScript
1import { createClient } from "@deepgram/sdk";
2import fs from "fs";
3
4const deepgram = createClient("DEEPGRAM_API_KEY");

Make a Deepgram Text-to-Speech Request

Once the SDK is initialized, you can make a request to convert text into speech.

TypeScript
1const text = "Hello, how can I help you today?";
2
3const getAudio = async () => {
4 // STEP 1: Make a request and configure the request with options (such as model choice, audio configuration, etc.)
5 const response = await deepgram.speak.request(
6 { text },
7 {
8 model: "aura-asteria-en",
9 encoding: "linear16",
10 container: "wav",
11 }
12 );
13
14 // STEP 2: Get the audio stream and headers from the response
15 const stream = await response.getStream();
16 const headers = await response.getHeaders();
17 if (stream) {
18 // STEP 3: Convert the stream to an audio buffer
19 const buffer = await getAudioBuffer(stream);
20 // STEP 4: Write the audio buffer to a file
21 fs.writeFile("output.wav", buffer, (err) => {
22 if (err) {
23 console.error("Error writing audio to file:", err);
24 } else {
25 console.log("Audio file written to output.wav");
26 }
27 });
28 } else {
29 console.error("Error generating audio:", stream);
30 }
31
32 if (headers) {
33 console.log("Headers:", headers);
34 }
35};
36
37// Helper function to convert the stream to an audio buffer
38const getAudioBuffer = async (response) => {
39 const reader = response.getReader();
40 const chunks = [];
41
42 while (true) {
43 const { done, value } = await reader.read();
44 if (done) break;
45
46 chunks.push(value);
47 }
48
49 const dataArray = chunks.reduce(
50 (acc, chunk) => Uint8Array.from([...acc, ...chunk]),
51 new Uint8Array(0)
52 );
53
54 return Buffer.from(dataArray.buffer);
55};
56
57getAudio();

Audio Output Streaming

Deepgram’s TTS API allows you to start playing the audio as soon as the first byte is received. This section provides examples to help you stream the audio output efficiently.

Single Text Source Payload

The following example demonstrates how to stream the audio as soon as the first byte arrives for a single text source.

TypeScript
1const DEEPGRAM_API_KEY = 'YOUR_DEEPGRAM_API_KEY';
2const deepgram = createClient(DEEPGRAM_API_KEY);
3
4const text = "Hello, how can I help you today? My name is Emily and I'm very glad to meet you. What do you think of this new text-to-speech API?";
5const audioFilePath = 'output.wav'; // Path to save the audio file
6
7const getAudio = async () => {
8 const response = await deepgram.speak.request(
9 { text },
10 {
11 model: 'aura-asteria-en',
12 encoding: 'linear16',
13 container: 'wav',
14 }
15 );
16
17 const stream = await response.getStream();
18 if (stream) {
19 const buffer = await getAudioBuffer(stream);
20 fs.writeFile(audioFilePath, buffer, (err) => {
21 if (err) {
22 console.error('Error writing audio to file:', err);
23 } else {
24 console.log('Audio file written to', audioFilePath);
25 }
26 });
27 } else {
28 console.error('Error generating audio:', stream);
29 }
30};
31
32const getAudioBuffer = async (response) => {
33 const reader = response.getReader();
34 const chunks = [];
35
36 while (true) {
37 const { done, value } = await reader.read();
38 if (done) break;
39
40 chunks.push(value);
41 }
42
43 const dataArray = chunks.reduce(
44 (acc, chunk) => Uint8Array.from([...acc, ...chunk]),
45 new Uint8Array(0)
46 );
47
48 return Buffer.from(dataArray.buffer);
49};
50
51getAudio();

Chunked Text Source Payload

This example shows how to chunk the text source by sentence boundaries and stream the audio for each chunk consecutively.

TypeScript
1import fs from 'fs';
2import { createClient } from '@deepgram/sdk';
3
4const DEEPGRAM_API_KEY = 'YOUR_DEEPGRAM_API_KEY';
5const deepgram = createClient(DEEPGRAM_API_KEY);
6
7const inputText = "Your long text goes here...";
8
9function segmentTextBySentence(text) {
10 return text.match(/[^.!?]+[.!?]/g).map((sentence) => sentence.trim());
11}
12
13async function synthesizeAudio(text) {
14 const response = await deepgram.speak.request(
15 { text },
16 {
17 model: 'aura-helios-en',
18 encoding: 'linear16',
19 container: 'wav',
20 }
21 );
22
23 const stream = await response.getStream();
24 if (stream) {
25 const buffer = await getAudioBuffer(stream);
26 return buffer;
27 } else {
28 throw new Error('Error generating audio');
29 }
30}
31
32const getAudioBuffer = async (response) => {
33 const reader = response.getReader();
34 const chunks = [];
35
36 while (true) {
37 const { done, value } = await reader.read();
38 if (done) break;
39
40 chunks.push(value);
41 }
42
43 const dataArray = chunks.reduce(
44 (acc, chunk) => Uint8Array.from([...acc, ...chunk]),
45 new Uint8Array(0)
46 );
47
48 return Buffer.from(dataArray.buffer);
49};
50
51async function main() {
52 const segments = segmentTextBySentence(inputText);
53
54 // Create or truncate the output file
55 const outputFile = fs.createWriteStream("output.mp3");
56
57 for (const segment of segments) {
58 try {
59 const audioData = await synthesizeAudio(segment);
60 outputFile.write(audioData);
61 console.log("Audio stream finished for segment:", segment);
62 } catch (error) {
63 console.error("Error synthesizing audio:", error);
64 }
65 }
66
67 console.log("Audio file creation completed.");
68}
69
70main();

Where to Find Additional Examples

The SDK repository has a good collection of text-to-speech examples. The README contains links to them. Each example below attempts to provide different options for transcribing an audio source.

Some Example(s):

Built with