Text to Speech

An overview of the Deepgram .NET SDK and Deepgram text-to-speech.

Installing the SDK

dotnet add package Deepgram

Make a Deepgram Text-to-Speech Request

using System.Text.Json;
using Deepgram;
using Deepgram.Models.Speak.v1;
using Deepgram.Logger;

namespace SampleApp
{
    class Program
    {
        static async Task Main(string[] args)
        {
            // STEP 1: Initialize Library with default logging
            Library.Initialize();

            // STEP 2: Create a Deepgram client.
            // Set "DEEPGRAM_API_KEY" environment variable to your Deepgram API Key
            var deepgramClient = new SpeakClient();

            // STEP 3: send/process the desired text to Deepgram to convert to Speech
            var response = await deepgramClient.ToFile(
                new TextSource("Hello World!"),
                "test.mp3",
                new SpeakSchema()
                {
                    Model = "aura-asteria-en",
                });

            Console.WriteLine(response);
            Console.ReadKey();

            // Teardown Library
            Library.Terminate();
        }
    }
}

Audio Output Streaming

Deepgram's TTS API allows you to start playing the audio as soon as the first byte is received. This section provides examples to help you stream the audio output efficiently.

Single Text Source Payload

using System.Text.Json;
using Deepgram;
using Deepgram.Models.Speak.v1;
using Deepgram.Logger;

namespace SampleApp
{
    class Program
    {
        static async Task Main(string[] args)
        {
            // STEP 1: Initialize Library with default logging
            Library.Initialize();
          
            // STEP 2: Create a Deepgram client.
            // Set "DEEPGRAM_API_KEY" environment variable to your Deepgram API Key
            var deepgramClient = new SpeakClient();

            // STEP 3: send/process the desired text to Deepgram to convert to Speech
            var response = await deepgramClient.Stream(
                new TextSource("Hello World!"),
                new SpeakSchema()
                {
                    Model = "aura-asteria-en",
                });

            await foreach (var audioSegment in response.AudioStream)
            {
                // Process the audio segment received
            }

            // Teardown Library
            Library.Terminate();
        }
    }
}

Chunk Text Source Payload

using System.Text.Json;
using System.Text.RegularExpressions;
using Deepgram;
using Deepgram.Models.Speak.v1;
using Deepgram.Logger;

namespace SampleApp
{
    class Program
    {
        static async Task Main(string[] args)
        {
            // STEP 1: Initialize Library with default logging
            Library.Initialize();
          
            // STEP 2: Create a Deepgram client.
            // Set "DEEPGRAM_API_KEY" environment variable to your Deepgram API Key
            var deepgramClient = new SpeakClient();

            var inputText = "Your long text goes here...";

          	// STEP 3: process the desired text in segments
            // Send each seqment to Deepgram to convert to Speech
            var segments = SegmentTextBySentence(inputText);
          
            foreach (var segment in segments)
            {
                var response = await deepgramClient.Stream(
                    new TextSource(segment),
                    new SpeakSchema()
                    {
                        Model = "aura-asteria-en",
                    });

                await foreach (var audioSegment in response.AudioStream)
                {
                    // Process the audio segment received
                }
            }

            // Teardown Library
            Library.Terminate();
        }

        static string[] SegmentTextBySentence(string text)
        {
            return Regex.Matches(text, @"[^.!?]+[.!?]")
                        .Cast()
                        .Select(m => m.Value)
                        .ToArray();
        }
    }
}

Where To Find Additional Examples

The SDK repository has a good collection text-to-speech examples. You can find links to them in the README.