00001 /* 00002 * Copyright 2008-2010 Arsen Chaloyan 00003 * 00004 * Licensed under the Apache License, Version 2.0 (the "License"); 00005 * you may not use this file except in compliance with the License. 00006 * You may obtain a copy of the License at 00007 * 00008 * http://www.apache.org/licenses/LICENSE-2.0 00009 * 00010 * Unless required by applicable law or agreed to in writing, software 00011 * distributed under the License is distributed on an "AS IS" BASIS, 00012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00013 * See the License for the specific language governing permissions and 00014 * limitations under the License. 00015 * 00016 * $Id: mrcp_recog_header.h 1736 2010-06-14 20:16:22Z achaloyan $ 00017 */ 00018 00019 #ifndef MRCP_RECOG_HEADER_H 00020 #define MRCP_RECOG_HEADER_H 00021 00022 /** 00023 * @file mrcp_recog_header.h 00024 * @brief MRCP Recognizer Header 00025 */ 00026 00027 #include "mrcp_types.h" 00028 #include "mrcp_header_accessor.h" 00029 00030 APT_BEGIN_EXTERN_C 00031 00032 /** MRCP recognizer header fields */ 00033 typedef enum { 00034 RECOGNIZER_HEADER_CONFIDENCE_THRESHOLD, 00035 RECOGNIZER_HEADER_SENSITIVITY_LEVEL, 00036 RECOGNIZER_HEADER_SPEED_VS_ACCURACY, 00037 RECOGNIZER_HEADER_N_BEST_LIST_LENGTH, 00038 RECOGNIZER_HEADER_NO_INPUT_TIMEOUT, 00039 RECOGNIZER_HEADER_RECOGNITION_TIMEOUT, 00040 RECOGNIZER_HEADER_WAVEFORM_URI, 00041 RECOGNIZER_HEADER_COMPLETION_CAUSE, 00042 RECOGNIZER_HEADER_RECOGNIZER_CONTEXT_BLOCK, 00043 RECOGNIZER_HEADER_START_INPUT_TIMERS, 00044 RECOGNIZER_HEADER_SPEECH_COMPLETE_TIMEOUT, 00045 RECOGNIZER_HEADER_SPEECH_INCOMPLETE_TIMEOUT, 00046 RECOGNIZER_HEADER_DTMF_INTERDIGIT_TIMEOUT, 00047 RECOGNIZER_HEADER_DTMF_TERM_TIMEOUT, 00048 RECOGNIZER_HEADER_DTMF_TERM_CHAR, 00049 RECOGNIZER_HEADER_FAILED_URI, 00050 RECOGNIZER_HEADER_FAILED_URI_CAUSE, 00051 RECOGNIZER_HEADER_SAVE_WAVEFORM, 00052 RECOGNIZER_HEADER_NEW_AUDIO_CHANNEL, 00053 RECOGNIZER_HEADER_SPEECH_LANGUAGE, 00054 00055 /** Additional header fields for MRCP v2 */ 00056 RECOGNIZER_HEADER_INPUT_TYPE, 00057 RECOGNIZER_HEADER_INPUT_WAVEFORM_URI, 00058 RECOGNIZER_HEADER_COMPLETION_REASON, 00059 RECOGNIZER_HEADER_MEDIA_TYPE, 00060 RECOGNIZER_HEADER_VER_BUFFER_UTTERANCE, 00061 RECOGNIZER_HEADER_RECOGNITION_MODE, 00062 RECOGNIZER_HEADER_CANCEL_IF_QUEUE, 00063 RECOGNIZER_HEADER_HOTWORD_MAX_DURATION, 00064 RECOGNIZER_HEADER_HOTWORD_MIN_DURATION, 00065 RECOGNIZER_HEADER_INTERPRET_TEXT, 00066 RECOGNIZER_HEADER_DTMF_BUFFER_TIME, 00067 RECOGNIZER_HEADER_CLEAR_DTMF_BUFFER, 00068 RECOGNIZER_HEADER_EARLY_NO_MATCH, 00069 RECOGNIZER_HEADER_NUM_MIN_CONSISTENT_PRONUNCIATIONS, 00070 RECOGNIZER_HEADER_CONSISTENCY_THRESHOLD, 00071 RECOGNIZER_HEADER_CLASH_THRESHOLD, 00072 RECOGNIZER_HEADER_PERSONAL_GRAMMAR_URI, 00073 RECOGNIZER_HEADER_ENROLL_UTTERANCE, 00074 RECOGNIZER_HEADER_PHRASE_ID, 00075 RECOGNIZER_HEADER_PHRASE_NL, 00076 RECOGNIZER_HEADER_WEIGHT, 00077 RECOGNIZER_HEADER_SAVE_BEST_WAVEFORM, 00078 RECOGNIZER_HEADER_NEW_PHRASE_ID, 00079 RECOGNIZER_HEADER_CONFUSABLE_PHRASES_URI, 00080 RECOGNIZER_HEADER_ABORT_PHRASE_ENROLLMENT, 00081 00082 RECOGNIZER_HEADER_COUNT 00083 } mrcp_recognizer_header_id; 00084 00085 00086 /** MRCP recognizer completion-cause */ 00087 typedef enum { 00088 RECOGNIZER_COMPLETION_CAUSE_SUCCESS = 0, 00089 RECOGNIZER_COMPLETION_CAUSE_NO_MATCH = 1, 00090 RECOGNIZER_COMPLETION_CAUSE_NO_INPUT_TIMEOUT = 2, 00091 RECOGNIZER_COMPLETION_CAUSE_RECOGNITION_TIMEOUT = 3, 00092 RECOGNIZER_COMPLETION_CAUSE_GRAM_LOAD_FAILURE = 4, 00093 RECOGNIZER_COMPLETION_CAUSE_GRAM_COMP_FAILURE = 5, 00094 RECOGNIZER_COMPLETION_CAUSE_ERROR = 6, 00095 RECOGNIZER_COMPLETION_CAUSE_SPEECH_TOO_EARLY = 7, 00096 RECOGNIZER_COMPLETION_CAUSE_TOO_MUCH_SPEECH_TIMEOUT = 8, 00097 RECOGNIZER_COMPLETION_CAUSE_URI_FAILURE = 9, 00098 RECOGNIZER_COMPLETION_CAUSE_LANGUAGE_UNSUPPORTED = 10, 00099 00100 /** Additional completion-cause for MRCP v2 */ 00101 RECOGNIZER_COMPLETION_CAUSE_CANCELLED = 11, 00102 RECOGNIZER_COMPLETION_CAUSE_SEMANTICS_FAILURE = 12, 00103 RECOGNIZER_COMPLETION_CAUSE_PARTIAL_MATCH = 13, 00104 RECOGNIZER_COMPLETION_CAUSE_PARTIAL_MATCH_MAXTIME = 14, 00105 RECOGNIZER_COMPLETION_CAUSE_NO_MATCH_MAXTIME = 15, 00106 RECOGNIZER_COMPLETION_CAUSE_GRAM_DEFINITION_FAILURE = 16, 00107 00108 RECOGNIZER_COMPLETION_CAUSE_COUNT = 17, 00109 RECOGNIZER_COMPLETION_CAUSE_UNKNOWN = RECOGNIZER_COMPLETION_CAUSE_COUNT 00110 } mrcp_recog_completion_cause_e; 00111 00112 00113 00114 /** MRCP recognizer-header declaration */ 00115 typedef struct mrcp_recog_header_t mrcp_recog_header_t; 00116 00117 /** MRCP recognizer-header */ 00118 struct mrcp_recog_header_t { 00119 /** Tells the recognizer resource what confidence level the client considers a 00120 successful match */ 00121 float confidence_threshold; 00122 /** To filter out background noise and not mistake it for speech */ 00123 float sensitivity_level; 00124 /** Tunable towards Performance or Accuracy */ 00125 float speed_vs_accuracy; 00126 /** The client, by setting this header, can ask the recognition resource 00127 to send it more than 1 alternative */ 00128 apr_size_t n_best_list_length; 00129 /** The client can use the no-input-timeout header to set this timeout */ 00130 apr_size_t no_input_timeout; 00131 /** The client can use the recognition-timeout header to set this timeout */ 00132 apr_size_t recognition_timeout; 00133 /** MUST be present in the RECOGNITION-COMPLETE event if the Save-Waveform 00134 header was set to true */ 00135 apt_str_t waveform_uri; 00136 /** MUST be part of a RECOGNITION-COMPLETE, event coming from 00137 the recognizer resource to the client */ 00138 mrcp_recog_completion_cause_e completion_cause; 00139 /** MAY be sent as part of the SET-PARAMS or GET-PARAMS request */ 00140 apt_str_t recognizer_context_block; 00141 /** MAY be sent as part of the RECOGNIZE request. A value of false tells 00142 the recognizer to start recognition, but not to start the no-input timer yet */ 00143 apt_bool_t start_input_timers; 00144 /** Specifies the length of silence required following user 00145 speech before the speech recognizer finalizes a result */ 00146 apr_size_t speech_complete_timeout; 00147 /** Specifies the required length of silence following user 00148 speech after which a recognizer finalizes a result */ 00149 apr_size_t speech_incomplete_timeout; 00150 /** Specifies the inter-digit timeout value to use when 00151 recognizing DTMF input */ 00152 apr_size_t dtmf_interdigit_timeout; 00153 /** Specifies the terminating timeout to use when 00154 recognizing DTMF input*/ 00155 apr_size_t dtmf_term_timeout; 00156 /** Specifies the terminating DTMF character for DTMF input 00157 recognition */ 00158 char dtmf_term_char; 00159 /** When a recognizer needs to fetch or access a URI and the access fails 00160 the server SHOULD provide the failed URI in this header in the method response*/ 00161 apt_str_t failed_uri; 00162 /** When a recognizer method needs a recognizer to fetch or access a URI 00163 and the access fails the server MUST provide the URI specific or 00164 protocol specific response code for the URI in the Failed-URI header */ 00165 apt_str_t failed_uri_cause; 00166 /** Allows the client to request the recognizer resource to 00167 save the audio input to the recognizer */ 00168 apt_bool_t save_waveform; 00169 /** MAY be specified in a RECOGNIZE request and allows the 00170 client to tell the server that, from this point on, further input 00171 audio comes from a different audio source */ 00172 apt_bool_t new_audio_channel; 00173 /** Specifies the language of recognition grammar data within 00174 a session or request, if it is not specified within the data */ 00175 apt_str_t speech_language; 00176 00177 /** Additional header fields for MRCP v2 */ 00178 /** Specifies if the input that caused a barge-in was DTMF or speech */ 00179 apt_str_t input_type; 00180 /** Optional header specifies a URI pointing to audio content to be 00181 processed by the RECOGNIZE operation */ 00182 apt_str_t input_waveform_uri; 00183 /** MAY be specified in a RECOGNITION-COMPLETE event coming from 00184 the recognizer resource to the client */ 00185 apt_str_t completion_reason; 00186 /** Tells the server resource the Media Type in which to store captured 00187 audio such as the one captured and returned by the Waveform-URI header */ 00188 apt_str_t media_type; 00189 /** Lets the client request the server to buffer the 00190 utterance associated with this recognition request into a buffer 00191 available to a co-resident verification resource */ 00192 apt_bool_t ver_buffer_utterance; 00193 /** Specifies what mode the RECOGNIZE method will operate in */ 00194 apt_str_t recognition_mode; 00195 /** Specifies what will happen if the client attempts to 00196 invoke another RECOGNIZE method when this RECOGNIZE request is 00197 already in progress for the resource*/ 00198 apt_bool_t cancel_if_queue; 00199 /** Specifies the maximum length of an utterance (in seconds) that will 00200 be considered for Hotword recognition */ 00201 apr_size_t hotword_max_duration; 00202 /** Specifies the minimum length of an utterance (in seconds) that will 00203 be considered for Hotword recognition */ 00204 apr_size_t hotword_min_duration; 00205 /** Provides a pointer to the text for which a natural language interpretation is desired */ 00206 apt_str_t interpret_text; 00207 /** MAY be specified in a GET-PARAMS or SET-PARAMS method and 00208 is used to specify the size in time, in milliseconds, of the 00209 typeahead buffer for the recognizer */ 00210 apr_size_t dtmf_buffer_time; 00211 /** MAY be specified in a RECOGNIZE method and is used to 00212 tell the recognizer to clear the DTMF type-ahead buffer before 00213 starting the recognize */ 00214 apt_bool_t clear_dtmf_buffer; 00215 /** MAY be specified in a RECOGNIZE method and is used to 00216 tell the recognizer that it MUST not wait for the end of speech 00217 before processing the collected speech to match active grammars */ 00218 apt_bool_t early_no_match; 00219 /** MAY be specified in a START-PHRASE-ENROLLMENT, "SET-PARAMS", or 00220 "GET-PARAMS" method and is used to specify the minimum number of 00221 consistent pronunciations that must be obtained to voice enroll a new phrase */ 00222 apr_size_t num_min_consistent_pronunciations; 00223 /** MAY be sent as part of the START-PHRASE-ENROLLMENT,"SET-PARAMS", or 00224 "GET-PARAMS" method and is used during voice-enrollment to specify how similar 00225 to a previously enrolled pronunciation of the same phrase an utterance needs 00226 to be in order to be considered "consistent" */ 00227 float consistency_threshold; 00228 /** MAY be sent as part of the START-PHRASE-ENROLLMENT, SET-PARAMS, or 00229 "GET-PARAMS" method and is used during voice-enrollment to specify 00230 how similar the pronunciations of two different phrases can be 00231 before they are considered to be clashing */ 00232 float clash_threshold; 00233 /** Specifies the speaker-trained grammar to be used or 00234 referenced during enrollment operations */ 00235 apt_str_t personal_grammar_uri; 00236 /** MAY be specified in the RECOGNIZE method. If this header 00237 is set to "true" and an Enrollment is active, the RECOGNIZE command 00238 MUST add the collected utterance to the personal grammar that is 00239 being enrolled */ 00240 apt_bool_t enroll_utterance; 00241 /** Identifies a phrase in an existing personal grammar for which 00242 enrollment is desired. It is also returned to the client in the 00243 RECOGNIZE complete event */ 00244 apt_str_t phrase_id; 00245 /** Specifies the interpreted text to be returned when the 00246 phrase is recognized */ 00247 apt_str_t phrase_nl; 00248 /** Represents the occurrence likelihood of a phrase in an enrolled grammar */ 00249 float weight; 00250 /** Allows the client to request the recognizer resource to 00251 save the audio stream for the best repetition of the phrase that was 00252 used during the enrollment session */ 00253 apt_bool_t save_best_waveform; 00254 /** Replaces the id used to identify the phrase in a personal grammar */ 00255 apt_str_t new_phrase_id; 00256 /** Specifies a grammar that defines invalid phrases for enrollment */ 00257 apt_str_t confusable_phrases_uri; 00258 /** Can optionally be specified in the END-PHRASE-ENROLLMENT 00259 method to abort the phrase enrollment, rather than committing the 00260 phrase to the personal grammar */ 00261 apt_bool_t abort_phrase_enrollment; 00262 }; 00263 00264 00265 /** Get recognizer header vtable */ 00266 const mrcp_header_vtable_t* mrcp_recog_header_vtable_get(mrcp_version_e version); 00267 00268 /** Get recognizer completion cause string */ 00269 MRCP_DECLARE(const apt_str_t*) mrcp_recog_completion_cause_get(mrcp_recog_completion_cause_e completion_cause, mrcp_version_e version); 00270 00271 APT_END_EXTERN_C 00272 00273 #endif /* MRCP_RECOG_HEADER_H */