UniMRCP  1.7.0
mrcp_recog_header.h
Go to the documentation of this file.
1 /*
2  * Copyright 2008-2015 Arsen Chaloyan
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MRCP_RECOG_HEADER_H
18 #define MRCP_RECOG_HEADER_H
19 
20 /**
21  * @file mrcp_recog_header.h
22  * @brief MRCP Recognizer Header
23  */
24 
25 #include "mrcp_types.h"
26 #include "mrcp_header_accessor.h"
27 
29 
30 /** MRCP recognizer header fields */
31 typedef enum {
32  RECOGNIZER_HEADER_CONFIDENCE_THRESHOLD,
33  RECOGNIZER_HEADER_SENSITIVITY_LEVEL,
34  RECOGNIZER_HEADER_SPEED_VS_ACCURACY,
35  RECOGNIZER_HEADER_N_BEST_LIST_LENGTH,
36  RECOGNIZER_HEADER_NO_INPUT_TIMEOUT,
37  RECOGNIZER_HEADER_RECOGNITION_TIMEOUT,
38  RECOGNIZER_HEADER_WAVEFORM_URI,
39  RECOGNIZER_HEADER_COMPLETION_CAUSE,
40  RECOGNIZER_HEADER_RECOGNIZER_CONTEXT_BLOCK,
41  RECOGNIZER_HEADER_START_INPUT_TIMERS,
42  RECOGNIZER_HEADER_SPEECH_COMPLETE_TIMEOUT,
43  RECOGNIZER_HEADER_SPEECH_INCOMPLETE_TIMEOUT,
44  RECOGNIZER_HEADER_DTMF_INTERDIGIT_TIMEOUT,
45  RECOGNIZER_HEADER_DTMF_TERM_TIMEOUT,
46  RECOGNIZER_HEADER_DTMF_TERM_CHAR,
47  RECOGNIZER_HEADER_FAILED_URI,
48  RECOGNIZER_HEADER_FAILED_URI_CAUSE,
49  RECOGNIZER_HEADER_SAVE_WAVEFORM,
50  RECOGNIZER_HEADER_NEW_AUDIO_CHANNEL,
51  RECOGNIZER_HEADER_SPEECH_LANGUAGE,
52 
53  /** Additional header fields for MRCP v2 */
55  RECOGNIZER_HEADER_INPUT_WAVEFORM_URI,
56  RECOGNIZER_HEADER_COMPLETION_REASON,
57  RECOGNIZER_HEADER_MEDIA_TYPE,
58  RECOGNIZER_HEADER_VER_BUFFER_UTTERANCE,
59  RECOGNIZER_HEADER_RECOGNITION_MODE,
60  RECOGNIZER_HEADER_CANCEL_IF_QUEUE,
61  RECOGNIZER_HEADER_HOTWORD_MAX_DURATION,
62  RECOGNIZER_HEADER_HOTWORD_MIN_DURATION,
63  RECOGNIZER_HEADER_INTERPRET_TEXT,
64  RECOGNIZER_HEADER_DTMF_BUFFER_TIME,
65  RECOGNIZER_HEADER_CLEAR_DTMF_BUFFER,
66  RECOGNIZER_HEADER_EARLY_NO_MATCH,
67  RECOGNIZER_HEADER_NUM_MIN_CONSISTENT_PRONUNCIATIONS,
68  RECOGNIZER_HEADER_CONSISTENCY_THRESHOLD,
69  RECOGNIZER_HEADER_CLASH_THRESHOLD,
70  RECOGNIZER_HEADER_PERSONAL_GRAMMAR_URI,
71  RECOGNIZER_HEADER_ENROLL_UTTERANCE,
72  RECOGNIZER_HEADER_PHRASE_ID,
73  RECOGNIZER_HEADER_PHRASE_NL,
74  RECOGNIZER_HEADER_WEIGHT,
75  RECOGNIZER_HEADER_SAVE_BEST_WAVEFORM,
76  RECOGNIZER_HEADER_NEW_PHRASE_ID,
77  RECOGNIZER_HEADER_CONFUSABLE_PHRASES_URI,
78  RECOGNIZER_HEADER_ABORT_PHRASE_ENROLLMENT,
79 
80  RECOGNIZER_HEADER_COUNT
82 
83 
84 /** MRCP recognizer completion-cause */
85 typedef enum {
86  RECOGNIZER_COMPLETION_CAUSE_SUCCESS = 0,
87  RECOGNIZER_COMPLETION_CAUSE_NO_MATCH = 1,
88  RECOGNIZER_COMPLETION_CAUSE_NO_INPUT_TIMEOUT = 2,
89  RECOGNIZER_COMPLETION_CAUSE_RECOGNITION_TIMEOUT = 3,
90  RECOGNIZER_COMPLETION_CAUSE_GRAM_LOAD_FAILURE = 4,
91  RECOGNIZER_COMPLETION_CAUSE_GRAM_COMP_FAILURE = 5,
92  RECOGNIZER_COMPLETION_CAUSE_ERROR = 6,
93  RECOGNIZER_COMPLETION_CAUSE_SPEECH_TOO_EARLY = 7,
94  RECOGNIZER_COMPLETION_CAUSE_TOO_MUCH_SPEECH_TIMEOUT = 8,
95  RECOGNIZER_COMPLETION_CAUSE_URI_FAILURE = 9,
96  RECOGNIZER_COMPLETION_CAUSE_LANGUAGE_UNSUPPORTED = 10,
97 
98  /** Additional completion-cause for MRCP v2 */
100  RECOGNIZER_COMPLETION_CAUSE_SEMANTICS_FAILURE = 12,
101  RECOGNIZER_COMPLETION_CAUSE_PARTIAL_MATCH = 13,
102  RECOGNIZER_COMPLETION_CAUSE_PARTIAL_MATCH_MAXTIME = 14,
103  RECOGNIZER_COMPLETION_CAUSE_NO_MATCH_MAXTIME = 15,
104  RECOGNIZER_COMPLETION_CAUSE_GRAM_DEFINITION_FAILURE = 16,
105 
106  RECOGNIZER_COMPLETION_CAUSE_COUNT = 17,
107  RECOGNIZER_COMPLETION_CAUSE_UNKNOWN = RECOGNIZER_COMPLETION_CAUSE_COUNT
109 
110 
111 
112 /** MRCP recognizer-header declaration */
114 
115 /** MRCP recognizer-header */
117  /** Tells the recognizer resource what confidence level the client considers a
118  successful match */
120  /** To filter out background noise and not mistake it for speech */
122  /** Tunable towards Performance or Accuracy */
124  /** The client, by setting this header, can ask the recognition resource
125  to send it more than 1 alternative */
126  apr_size_t n_best_list_length;
127  /** The client can use the no-input-timeout header to set this timeout */
128  apr_size_t no_input_timeout;
129  /** The client can use the recognition-timeout header to set this timeout */
131  /** MUST be present in the RECOGNITION-COMPLETE event if the Save-Waveform
132  header was set to true */
134  /** MUST be part of a RECOGNITION-COMPLETE, event coming from
135  the recognizer resource to the client */
137  /** MAY be sent as part of the SET-PARAMS or GET-PARAMS request */
139  /** MAY be sent as part of the RECOGNIZE request. A value of false tells
140  the recognizer to start recognition, but not to start the no-input timer yet */
142  /** Specifies the length of silence required following user
143  speech before the speech recognizer finalizes a result */
145  /** Specifies the required length of silence following user
146  speech after which a recognizer finalizes a result */
148  /** Specifies the inter-digit timeout value to use when
149  recognizing DTMF input */
151  /** Specifies the terminating timeout to use when
152  recognizing DTMF input*/
153  apr_size_t dtmf_term_timeout;
154  /** Specifies the terminating DTMF character for DTMF input
155  recognition */
157  /** When a recognizer needs to fetch or access a URI and the access fails
158  the server SHOULD provide the failed URI in this header in the method response*/
160  /** When a recognizer method needs a recognizer to fetch or access a URI
161  and the access fails the server MUST provide the URI specific or
162  protocol specific response code for the URI in the Failed-URI header */
164  /** Allows the client to request the recognizer resource to
165  save the audio input to the recognizer */
167  /** MAY be specified in a RECOGNIZE request and allows the
168  client to tell the server that, from this point on, further input
169  audio comes from a different audio source */
171  /** Specifies the language of recognition grammar data within
172  a session or request, if it is not specified within the data */
174 
175  /** Additional header fields for MRCP v2 */
176  /** Specifies if the input that caused a barge-in was DTMF or speech */
178  /** Optional header specifies a URI pointing to audio content to be
179  processed by the RECOGNIZE operation */
181  /** MAY be specified in a RECOGNITION-COMPLETE event coming from
182  the recognizer resource to the client */
184  /** Tells the server resource the Media Type in which to store captured
185  audio such as the one captured and returned by the Waveform-URI header */
187  /** Lets the client request the server to buffer the
188  utterance associated with this recognition request into a buffer
189  available to a co-resident verification resource */
191  /** Specifies what mode the RECOGNIZE method will operate in */
193  /** Specifies what will happen if the client attempts to
194  invoke another RECOGNIZE method when this RECOGNIZE request is
195  already in progress for the resource*/
197  /** Specifies the maximum length of an utterance (in seconds) that will
198  be considered for Hotword recognition */
200  /** Specifies the minimum length of an utterance (in seconds) that will
201  be considered for Hotword recognition */
203  /** Provides a pointer to the text for which a natural language interpretation is desired */
205  /** MAY be specified in a GET-PARAMS or SET-PARAMS method and
206  is used to specify the size in time, in milliseconds, of the
207  typeahead buffer for the recognizer */
208  apr_size_t dtmf_buffer_time;
209  /** MAY be specified in a RECOGNIZE method and is used to
210  tell the recognizer to clear the DTMF type-ahead buffer before
211  starting the recognize */
213  /** MAY be specified in a RECOGNIZE method and is used to
214  tell the recognizer that it MUST not wait for the end of speech
215  before processing the collected speech to match active grammars */
217  /** MAY be specified in a START-PHRASE-ENROLLMENT, "SET-PARAMS", or
218  "GET-PARAMS" method and is used to specify the minimum number of
219  consistent pronunciations that must be obtained to voice enroll a new phrase */
221  /** MAY be sent as part of the START-PHRASE-ENROLLMENT,"SET-PARAMS", or
222  "GET-PARAMS" method and is used during voice-enrollment to specify how similar
223  to a previously enrolled pronunciation of the same phrase an utterance needs
224  to be in order to be considered "consistent" */
226  /** MAY be sent as part of the START-PHRASE-ENROLLMENT, SET-PARAMS, or
227  "GET-PARAMS" method and is used during voice-enrollment to specify
228  how similar the pronunciations of two different phrases can be
229  before they are considered to be clashing */
231  /** Specifies the speaker-trained grammar to be used or
232  referenced during enrollment operations */
234  /** MAY be specified in the RECOGNIZE method. If this header
235  is set to "true" and an Enrollment is active, the RECOGNIZE command
236  MUST add the collected utterance to the personal grammar that is
237  being enrolled */
239  /** Identifies a phrase in an existing personal grammar for which
240  enrollment is desired. It is also returned to the client in the
241  RECOGNIZE complete event */
243  /** Specifies the interpreted text to be returned when the
244  phrase is recognized */
246  /** Represents the occurrence likelihood of a phrase in an enrolled grammar */
247  float weight;
248  /** Allows the client to request the recognizer resource to
249  save the audio stream for the best repetition of the phrase that was
250  used during the enrollment session */
252  /** Replaces the id used to identify the phrase in a personal grammar */
254  /** Specifies a grammar that defines invalid phrases for enrollment */
256  /** Can optionally be specified in the END-PHRASE-ENROLLMENT
257  method to abort the phrase enrollment, rather than committing the
258  phrase to the personal grammar */
260 };
261 
262 
263 /** Get recognizer header vtable */
265 
266 /** Get recognizer completion cause string */
268 
270 
271 #endif /* MRCP_RECOG_HEADER_H */
#define MRCP_DECLARE(type)
Definition: mrcp.h:40
apt_str_t completion_reason
Definition: mrcp_recog_header.h:183
apt_bool_t save_waveform
Definition: mrcp_recog_header.h:166
float confidence_threshold
Definition: mrcp_recog_header.h:119
apr_size_t hotword_max_duration
Definition: mrcp_recog_header.h:199
apt_bool_t early_no_match
Definition: mrcp_recog_header.h:216
apt_bool_t cancel_if_queue
Definition: mrcp_recog_header.h:196
Definition: mrcp_recog_header.h:54
apt_str_t recognition_mode
Definition: mrcp_recog_header.h:192
apr_size_t n_best_list_length
Definition: mrcp_recog_header.h:126
const mrcp_header_vtable_t * mrcp_recog_header_vtable_get(mrcp_version_e version)
apt_bool_t ver_buffer_utterance
Definition: mrcp_recog_header.h:190
float speed_vs_accuracy
Definition: mrcp_recog_header.h:123
apt_str_t interpret_text
Definition: mrcp_recog_header.h:204
apt_str_t failed_uri_cause
Definition: mrcp_recog_header.h:163
#define APT_END_EXTERN_C
Definition: apt.h:38
apr_size_t num_min_consistent_pronunciations
Definition: mrcp_recog_header.h:220
float sensitivity_level
Definition: mrcp_recog_header.h:121
int apt_bool_t
Definition: apt.h:57
apt_bool_t save_best_waveform
Definition: mrcp_recog_header.h:251
mrcp_recog_completion_cause_e
Definition: mrcp_recog_header.h:85
Definition: mrcp_header_accessor.h:37
apt_bool_t new_audio_channel
Definition: mrcp_recog_header.h:170
apr_size_t recognition_timeout
Definition: mrcp_recog_header.h:130
mrcp_version_e
Definition: mrcp_types.h:30
char dtmf_term_char
Definition: mrcp_recog_header.h:156
apr_size_t dtmf_buffer_time
Definition: mrcp_recog_header.h:208
apt_str_t recognizer_context_block
Definition: mrcp_recog_header.h:138
apt_str_t waveform_uri
Definition: mrcp_recog_header.h:133
float clash_threshold
Definition: mrcp_recog_header.h:230
Definition: mrcp_recog_header.h:116
#define APT_BEGIN_EXTERN_C
Definition: apt.h:36
Basic MRCP Types.
apt_str_t phrase_nl
Definition: mrcp_recog_header.h:245
apt_str_t new_phrase_id
Definition: mrcp_recog_header.h:253
apt_str_t input_type
Definition: mrcp_recog_header.h:177
apt_bool_t abort_phrase_enrollment
Definition: mrcp_recog_header.h:259
apt_bool_t enroll_utterance
Definition: mrcp_recog_header.h:238
apt_str_t media_type
Definition: mrcp_recog_header.h:186
apr_size_t dtmf_term_timeout
Definition: mrcp_recog_header.h:153
apt_str_t speech_language
Definition: mrcp_recog_header.h:173
apr_size_t speech_complete_timeout
Definition: mrcp_recog_header.h:144
Definition: apt_string.h:36
apt_bool_t clear_dtmf_buffer
Definition: mrcp_recog_header.h:212
apt_str_t input_waveform_uri
Definition: mrcp_recog_header.h:180
mrcp_recognizer_header_id
Definition: mrcp_recog_header.h:31
apt_str_t confusable_phrases_uri
Definition: mrcp_recog_header.h:255
mrcp_recog_completion_cause_e completion_cause
Definition: mrcp_recog_header.h:136
Abstract MRCP Header Accessor.
apt_str_t personal_grammar_uri
Definition: mrcp_recog_header.h:233
float weight
Definition: mrcp_recog_header.h:247
apr_size_t hotword_min_duration
Definition: mrcp_recog_header.h:202
float consistency_threshold
Definition: mrcp_recog_header.h:225
apt_str_t failed_uri
Definition: mrcp_recog_header.h:159
apt_bool_t start_input_timers
Definition: mrcp_recog_header.h:141
Definition: mrcp_recog_header.h:99
apr_size_t dtmf_interdigit_timeout
Definition: mrcp_recog_header.h:150
apr_size_t no_input_timeout
Definition: mrcp_recog_header.h:128
const apt_str_t * mrcp_recog_completion_cause_get(mrcp_recog_completion_cause_e completion_cause, mrcp_version_e version)
apt_str_t phrase_id
Definition: mrcp_recog_header.h:242
apr_size_t speech_incomplete_timeout
Definition: mrcp_recog_header.h:147