Build a Voice Agent with JavaScript

Create a real-time voice agent using the Deepgram JavaScript SDK.

This tutorial walks you through building a basic voice agent using JavaScript and the Deepgram SDK. You will learn how to connect to the Agent API, configure its behavior, and stream audio for processing.

Prerequisites

Before you begin, ensure you have the following:

A Deepgram API key. You can get one in the Deepgram Console.
Node.js installed on your machine.

1. Set up your environment

Create a new directory for your project and initialize it.

$ mkdir deepgram-agent-demo
$ cd deepgram-agent-demo
$ npm init -y
$ touch index.js

Export your Deepgram API key as an environment variable.

$ export DEEPGRAM_API_KEY="your_api_key"

2. Install the Deepgram SDK

Install the Deepgram JavaScript SDK and cross-fetch for audio streaming.

$ npm install @deepgram/sdk cross-fetch

3. Create the Voice Agent

Open index.js and add the following code. This script connects to Deepgram, configures the agent, and streams a sample audio file.

1 const { writeFile, appendFile } = require("fs/promises");
2 const { DeepgramClient } = require("@deepgram/sdk");
3 const fetch = require("cross-fetch");
4 const { join } = require("path");
5 
6 const deepgram = new DeepgramClient({ apiKey: process.env.DEEPGRAM_API_KEY });
7 
8 const agent = async () => {
9   let audioBuffer = Buffer.alloc(0);
10   let i = 0;
11   const url = "https://dpgr.am/spacewalk.wav";
12   const connection = await deepgram.agent.v1.connect();
13 
14   connection.on("message", async (data) => {
15     if (data.type === "Welcome") {
16       console.log("Welcome to the Deepgram Voice Agent!");
17 
18       connection.sendSettings({
19         type: "Settings",
20         audio: {
21           input: {
22             encoding: "linear16",
23             sample_rate: 24000,
24           },
25           output: {
26             encoding: "linear16",
27             sample_rate: 16000,
28             container: "wav",
29           },
30         },
31         agent: {
32           language: "en",
33           listen: {
34             provider: {
35               type: "deepgram",
36               model: "nova-3",
37             },
38           },
39           think: {
40             provider: {
41               type: "open_ai",
42               model: "gpt-4o-mini",
43             },
44             prompt: "You are a friendly AI assistant.",
45           },
46           speak: {
47             provider: {
48               type: "deepgram",
49               model: "aura-2-thalia-en",
50             },
51           },
52           greeting: "Hello! How can I help you today?",
53         },
54       });
55 
56       console.log("Deepgram agent configured!");
57 
58       setInterval(() => {
59         console.log("Keep alive!");
60         connection.sendKeepAlive({ type: "KeepAlive" });
61       }, 5000);
62 
63       fetch(url)
64         .then((r) => r.body)
65         .then((res) => {
66           res.on("readable", () => {
67             const chunk = res.read();
68             if (chunk) {
69               console.log("Sending audio chunk");
70               connection.sendMedia(chunk);
71             }
72           });
73         });
74     } else if (data.type === "ConversationText") {
75       await appendFile(join(__dirname, `chatlog.txt`), JSON.stringify(data) + "\n");
76     } else if (data.type === "UserStartedSpeaking") {
77       if (audioBuffer.length) {
78         console.log("Interrupting agent.");
79         audioBuffer = Buffer.alloc(0);
80       }
81     } else if (typeof Blob !== "undefined" && data instanceof Blob) {
82       console.log("Audio chunk received");
83       const chunk = Buffer.from(await data.arrayBuffer());
84       audioBuffer = Buffer.concat([audioBuffer, chunk]);
85     } else if (data.type === "AgentAudioDone") {
86       console.log("Agent audio done");
87       await writeFile(join(__dirname, `output-${i}.wav`), audioBuffer);
88       audioBuffer = Buffer.alloc(0);
89       i++;
90     }
91   });
92 
93   connection.on("open", () => {
94     console.log("Connection opened");
95   });
96 
97   connection.on("close", () => {
98     console.log("Connection closed");
99     process.exit(0);
100   });
101 
102   connection.on("error", (err) => {
103     console.error("Error:", err.message);
104   });
105 
106   connection.connect();
107   await connection.waitForOpen();
108 };
109 
110 void agent();

4. Run the Voice Agent

Run your script using Node.js.

$ node index.js

The agent will process the audio and generate responses. You can find the conversation transcript in chatlog.txt and the agent’s audio responses in output-*.wav files.

Next steps

Now that you have built a basic agent, you can customize its behavior:

Configure the Voice Agent: Explore all available settings for models and voices.
Build a Voice Agent: Return to the overview to see other language options.

$	mkdir deepgram-agent-demo
$	cd deepgram-agent-demo
$	npm init -y
$	touch index.js

1	const { writeFile, appendFile } = require("fs/promises");
2	const { DeepgramClient } = require("@deepgram/sdk");
3	const fetch = require("cross-fetch");
4	const { join } = require("path");
5
6	const deepgram = new DeepgramClient({ apiKey: process.env.DEEPGRAM_API_KEY });
7
8	const agent = async () => {
9	let audioBuffer = Buffer.alloc(0);
10	let i = 0;
11	const url = "https://dpgr.am/spacewalk.wav";
12	const connection = await deepgram.agent.v1.connect();
13
14	connection.on("message", async (data) => {
15	if (data.type === "Welcome") {
16	console.log("Welcome to the Deepgram Voice Agent!");
17
18	connection.sendSettings({
19	type: "Settings",
20	audio: {
21	input: {
22	encoding: "linear16",
23	sample_rate: 24000,
24	},
25	output: {
26	encoding: "linear16",
27	sample_rate: 16000,
28	container: "wav",
29	},
30	},
31	agent: {
32	language: "en",
33	listen: {
34	provider: {
35	type: "deepgram",
36	model: "nova-3",
37	},
38	},
39	think: {
40	provider: {
41	type: "open_ai",
42	model: "gpt-4o-mini",
43	},
44	prompt: "You are a friendly AI assistant.",
45	},
46	speak: {
47	provider: {
48	type: "deepgram",
49	model: "aura-2-thalia-en",
50	},
51	},
52	greeting: "Hello! How can I help you today?",
53	},
54	});
55
56	console.log("Deepgram agent configured!");
57
58	setInterval(() => {
59	console.log("Keep alive!");
60	connection.sendKeepAlive({ type: "KeepAlive" });
61	}, 5000);
62
63	fetch(url)
64	.then((r) => r.body)
65	.then((res) => {
66	res.on("readable", () => {
67	const chunk = res.read();
68	if (chunk) {
69	console.log("Sending audio chunk");
70	connection.sendMedia(chunk);
71	}
72	});
73	});
74	} else if (data.type === "ConversationText") {
75	await appendFile(join(__dirname, `chatlog.txt`), JSON.stringify(data) + "\n");
76	} else if (data.type === "UserStartedSpeaking") {
77	if (audioBuffer.length) {
78	console.log("Interrupting agent.");
79	audioBuffer = Buffer.alloc(0);
80	}
81	} else if (typeof Blob !== "undefined" && data instanceof Blob) {
82	console.log("Audio chunk received");
83	const chunk = Buffer.from(await data.arrayBuffer());
84	audioBuffer = Buffer.concat([audioBuffer, chunk]);
85	} else if (data.type === "AgentAudioDone") {
86	console.log("Agent audio done");
87	await writeFile(join(__dirname, `output-${i}.wav`), audioBuffer);
88	audioBuffer = Buffer.alloc(0);
89	i++;
90	}
91	});
92
93	connection.on("open", () => {
94	console.log("Connection opened");
95	});
96
97	connection.on("close", () => {
98	console.log("Connection closed");
99	process.exit(0);
100	});
101
102	connection.on("error", (err) => {
103	console.error("Error:", err.message);
104	});
105
106	connection.connect();
107	await connection.waitForOpen();
108	};
109
110	void agent();