Pre-Recorded Audio

POST

/v1/listen

POST

/v1/listen

1 import requests
2 
3 url = "https://api.deepgram.com/v1/listen"
4 
5 payload = { "url": "https://dpgr.am/spacewalk.wav" }
6 headers = {
7     "Authorization": "<apiKey>",
8     "Content-Type": "application/json"
9 }
10 
11 response = requests.post(url, json=payload, headers=headers)
12 
13 print(response.json())

Try it

200Successful

1 {
2   "metadata": {
3     "transaction_key": "deprecated",
4     "request_id": "a847f427-4ad5-4d67-9b95-db801e58251c",
5     "sha256": "154e291ecfa8be6ab8343560bcc109008fa7853eb5372533e8efdefc9b504c33",
6     "created": "2024-05-12T18:57:13.426Z",
7     "duration": 25.933313,
8     "channels": 1,
9     "models": [
10       "30089e05-99d1-4376-b32e-c263170674af"
11     ],
12     "model_info": {
13       "30089e05-99d1-4376-b32e-c263170674af": {
14         "name": "2-general-nova",
15         "version": "2024-01-09.29447",
16         "arch": "nova-2"
17       }
18     },
19     "summary_info": {
20       "model_uuid": "67875a7f-c9c4-48a0-aa55-5bdb8a91c34a",
21       "input_tokens": 95,
22       "output_tokens": 63
23     },
24     "sentiment_info": {
25       "model_uuid": "80ab3179-d113-4254-bd6b-4a2f96498695",
26       "input_tokens": 105,
27       "output_tokens": 105
28     },
29     "topics_info": {
30       "model_uuid": "80ab3179-d113-4254-bd6b-4a2f96498695",
31       "input_tokens": 105,
32       "output_tokens": 7
33     },
34     "intents_info": {
35       "model_uuid": "80ab3179-d113-4254-bd6b-4a2f96498695",
36       "input_tokens": 105,
37       "output_tokens": 4
38     },
39     "tags": [
40       "test"
41     ]
42   },
43   "results": {
44     "channels": [
45       {
46         "search": [
47           {
48             "query": "string",
49             "hits": [
50               {
51                 "confidence": 1.1,
52                 "start": 1.1,
53                 "end": 1.1,
54                 "snippet": "string"
55               }
56             ]
57           }
58         ],
59         "alternatives": [
60           {
61             "transcript": "string",
62             "confidence": 1.1,
63             "words": [
64               {
65                 "word": "string",
66                 "start": 1.1,
67                 "end": 1.1,
68                 "confidence": 1.1
69               }
70             ],
71             "paragraphs": {
72               "transcript": "string",
73               "paragraphs": [
74                 {
75                   "sentences": [
76                     {
77                       "text": "string",
78                       "start": 1.1,
79                       "end": 1.1
80                     }
81                   ],
82                   "speaker": 1.1,
83                   "num_words": 1.1,
84                   "start": 1.1,
85                   "end": 1.1
86                 }
87               ]
88             },
89             "entities": [
90               {
91                 "label": "string",
92                 "value": "string",
93                 "raw_value": "string",
94                 "confidence": 1.1,
95                 "start_word": 1.1,
96                 "end_word": 1.1
97               }
98             ],
99             "summaries": [
100               {
101                 "summary": "string",
102                 "start_word": 1.1,
103                 "end_word": 1.1
104               }
105             ],
106             "topics": [
107               {
108                 "text": "string",
109                 "start_word": 1.1,
110                 "end_word": 1.1,
111                 "topics": [
112                   "string"
113                 ]
114               }
115             ]
116           }
117         ],
118         "detected_language": "string"
119       }
120     ],
121     "utterances": [
122       {
123         "start": 1.1,
124         "end": 1.1,
125         "confidence": 1.1,
126         "channel": 1.1,
127         "transcript": "string",
128         "words": [
129           {
130             "word": "string",
131             "start": 1.1,
132             "end": 1.1,
133             "confidence": 1.1,
134             "speaker": 1.1,
135             "speaker_confidence": 1.1,
136             "punctuated_word": "string"
137           }
138         ],
139         "speaker": 1.1,
140         "id": "string"
141       }
142     ],
143     "summary": {
144       "result": "success",
145       "short": "Speaker 0 discusses the significance of the first all-female spacewalk with an all-female team, stating that it is a tribute to the skilled and qualified women who were denied opportunities in the past."
146     },
147     "topics": {
148       "results": {
149         "topics": {
150           "segments": [
151             {
152               "text": "And, um, I think if it signifies anything, it is, uh, to honor the the women who came before us who, um, were skilled and qualified, um, and didn't get the the same opportunities that we have today.",
153               "start_word": 32,
154               "end_word": 69,
155               "topics": [
156                 {
157                   "topic": "Spacewalk",
158                   "confidence_score": 0.91581345
159                 }
160               ]
161             }
162           ]
163         }
164       }
165     },
166     "intents": {
167       "results": {
168         "intents": {
169           "segments": [
170             {
171               "text": "If you found this valuable, you can subscribe to the show on spotify or your favorite podcast app.",
172               "start_word": 354,
173               "end_word": 414,
174               "intents": [
175                 {
176                   "intent": "Encourage podcasting",
177                   "confidence_score": 0.0038975573
178                 }
179               ]
180             }
181           ]
182         }
183       }
184     },
185     "sentiments": {
186       "segments": [
187         {
188           "text": "Yeah. As as much as, um, it's worth celebrating, uh, the first, uh, spacewalk, um, with an all-female team, I think many of us are looking forward to it just being normal. And, um, I think if it signifies anything, it is, uh, to honor the the women who came before us who, um, were skilled and qualified, um, and didn't get the the same opportunities that we have today.",
189           "start_word": 0,
190           "end_word": 69,
191           "sentiment": "positive",
192           "sentiment_score": 0.5810546875
193         }
194       ],
195       "average": {
196         "sentiment": "positive",
197         "sentiment_score": 0.5810185185185185
198       }
199     }
200   }
201 }

Transcribe audio and video using Deepgram’s speech-to-text REST API

Authentication

Authorizationstring

Use Authorization: Token <API_KEY> Example: Authorization: Token 12345abcdef

AuthorizationBearer

Use Authorization: Bearer <JWT> Example: Authorization: Bearer eyJhbGciOiJ...

Query parameters

callbackstringOptional

URL to which we'll make the callback request

callback_methodenumOptionalDefaults to POST

HTTP method by which the callback request will be made

Allowed values:

extrastring or list of stringsOptional

Arbitrary key-value pairs that are attached to the API response for usage in downstream processing

sentimentbooleanOptionalDefaults to false

Recognizes the sentiment throughout a transcript or text

summarizeenum or booleanOptional

Summarize content. For Listen API, supports string version option. For Read API, accepts boolean only.

tagstring or list of stringsOptional

Label your requests for the purpose of identification during usage reporting

topicsbooleanOptionalDefaults to false

Detect topics throughout a transcript or text

custom_topicstring or list of stringsOptional

Custom topics you want the model to detect within your input audio or text if present Submit up to 100.

custom_topic_modeenumOptionalDefaults to extended

Sets how the model will interpret strings submitted to the custom_topic param. When strict, the model will only return topics submitted using the custom_topic param. When extended, the model will return its own detected topics in addition to those submitted using the custom_topic param

Allowed values:

intentsbooleanOptionalDefaults to false

Recognizes speaker intent throughout a transcript or text

custom_intentstring or list of stringsOptional

Custom intents you want the model to detect within your input audio if present

custom_intent_modeenumOptionalDefaults to extended

Sets how the model will interpret intents submitted to the custom_intent param. When strict, the model will only return intents submitted using the custom_intent param. When extended, the model will return its own detected intents in the custom_intent param.

Allowed values:

detect_entitiesbooleanOptionalDefaults to false

Identifies and extracts key entities from content in submitted audio

detect_languageboolean or list of stringsOptional

Identifies the dominant language spoken in submitted audio

diarizebooleanOptionalDefaults to false

Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0

dictationbooleanOptionalDefaults to false

Dictation mode for controlling formatting with dictated speech

encodingenumOptional

Specify the expected encoding of your submitted audio

filler_wordsbooleanOptionalDefaults to false

Filler Words can help transcribe interruptions in your audio, like "uh" and "um"

keytermlist of stringsOptional

Key term prompting can boost or suppress specialized terminology and brands. Only compatible with Nova-3

keywordsstring or list of stringsOptional

Keywords can boost or suppress specialized terminology and brands

languagestringOptionalDefaults to en

The BCP-47 language tag that hints at the primary spoken language. Depending on the Model and API endpoint you choose only certain languages are available

The [BCP-47 language tag](https://tools.ietf.org/html/bcp47) that hints at the primary spoken language. Depending on the Model and API endpoint you choose only certain languages are available

measurementsbooleanOptionalDefaults to false

Spoken measurements will be converted to their corresponding abbreviations

modelenum or stringOptional

AI model used to process submitted audio

multichannelbooleanOptionalDefaults to false

Transcribe each audio channel independently

numeralsbooleanOptionalDefaults to false

Numerals converts numbers from written format to numerical format

paragraphsbooleanOptionalDefaults to false

Splits audio into paragraphs to improve transcript readability

profanity_filterbooleanOptionalDefaults to false

Profanity Filter looks for recognized profanity and converts it to the nearest recognized non-profane word or removes it from the transcript completely

punctuatebooleanOptionalDefaults to false

Add punctuation and capitalization to the transcript

redactstring or list of enumsOptional

Redaction removes sensitive information from your transcripts

replacestring or list of stringsOptional

Search for terms or phrases in submitted audio and replaces them

searchstring or list of stringsOptional

Search for terms or phrases in submitted audio

smart_formatbooleanOptionalDefaults to false

Apply formatting to transcript output. When set to true, additional formatting will be applied to transcripts to improve readability

utterancesbooleanOptionalDefaults to false

Segments speech into meaningful semantic units

utt_splitdoubleOptionalDefaults to 0.8

Seconds to wait before detecting a pause between words in submitted audio

versionenum or stringOptional

Version of an AI model to use

mip_opt_outbooleanOptionalDefaults to false

Opts out requests from the Deepgram Model Improvement Program. Refer to our Docs for pricing impacts before setting this to true. https://dpgr.am/deepgram-mip

Request

Transcribe an audio or video file

urlstringRequiredformat: "uri"

Response

Returns either transcription results, or a request_id when using a callback.

object

Authentication

Query parameters

Request

Response

Errors