UniMRCP: mrcp_recog_header.h Source File

00001 /*
00002  * Copyright 2008-2010 Arsen Chaloyan
00003  *
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  *
00008  *     http://www.apache.org/licenses/LICENSE-2.0
00009  *
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  * 
00016  * $Id: mrcp_recog_header.h 1736 2010-06-14 20:16:22Z achaloyan $
00017  */
00018 
00019 #ifndef MRCP_RECOG_HEADER_H
00020 #define MRCP_RECOG_HEADER_H
00021 
00022 /**
00023  * @file mrcp_recog_header.h
00024  * @brief MRCP Recognizer Header
00025  */ 
00026 
00027 #include "mrcp_types.h"
00028 #include "mrcp_header_accessor.h"
00029 
00030 APT_BEGIN_EXTERN_C
00031 
00032 /** MRCP recognizer header fields */
00033 typedef enum {
00034         RECOGNIZER_HEADER_CONFIDENCE_THRESHOLD,
00035         RECOGNIZER_HEADER_SENSITIVITY_LEVEL,
00036         RECOGNIZER_HEADER_SPEED_VS_ACCURACY,
00037         RECOGNIZER_HEADER_N_BEST_LIST_LENGTH,
00038         RECOGNIZER_HEADER_NO_INPUT_TIMEOUT,
00039         RECOGNIZER_HEADER_RECOGNITION_TIMEOUT,
00040         RECOGNIZER_HEADER_WAVEFORM_URI,
00041         RECOGNIZER_HEADER_COMPLETION_CAUSE,
00042         RECOGNIZER_HEADER_RECOGNIZER_CONTEXT_BLOCK,
00043         RECOGNIZER_HEADER_START_INPUT_TIMERS,
00044         RECOGNIZER_HEADER_SPEECH_COMPLETE_TIMEOUT,
00045         RECOGNIZER_HEADER_SPEECH_INCOMPLETE_TIMEOUT,
00046         RECOGNIZER_HEADER_DTMF_INTERDIGIT_TIMEOUT,
00047         RECOGNIZER_HEADER_DTMF_TERM_TIMEOUT,
00048         RECOGNIZER_HEADER_DTMF_TERM_CHAR,
00049         RECOGNIZER_HEADER_FAILED_URI,
00050         RECOGNIZER_HEADER_FAILED_URI_CAUSE,
00051         RECOGNIZER_HEADER_SAVE_WAVEFORM,
00052         RECOGNIZER_HEADER_NEW_AUDIO_CHANNEL,
00053         RECOGNIZER_HEADER_SPEECH_LANGUAGE,
00054 
00055         /** Additional header fields for MRCP v2 */
00056         RECOGNIZER_HEADER_INPUT_TYPE,
00057         RECOGNIZER_HEADER_INPUT_WAVEFORM_URI,
00058         RECOGNIZER_HEADER_COMPLETION_REASON,
00059         RECOGNIZER_HEADER_MEDIA_TYPE,
00060         RECOGNIZER_HEADER_VER_BUFFER_UTTERANCE,
00061         RECOGNIZER_HEADER_RECOGNITION_MODE,
00062         RECOGNIZER_HEADER_CANCEL_IF_QUEUE,
00063         RECOGNIZER_HEADER_HOTWORD_MAX_DURATION,
00064         RECOGNIZER_HEADER_HOTWORD_MIN_DURATION,
00065         RECOGNIZER_HEADER_INTERPRET_TEXT,
00066         RECOGNIZER_HEADER_DTMF_BUFFER_TIME,
00067         RECOGNIZER_HEADER_CLEAR_DTMF_BUFFER,
00068         RECOGNIZER_HEADER_EARLY_NO_MATCH,
00069         RECOGNIZER_HEADER_NUM_MIN_CONSISTENT_PRONUNCIATIONS,
00070         RECOGNIZER_HEADER_CONSISTENCY_THRESHOLD,
00071         RECOGNIZER_HEADER_CLASH_THRESHOLD,
00072         RECOGNIZER_HEADER_PERSONAL_GRAMMAR_URI,
00073         RECOGNIZER_HEADER_ENROLL_UTTERANCE,
00074         RECOGNIZER_HEADER_PHRASE_ID,
00075         RECOGNIZER_HEADER_PHRASE_NL,
00076         RECOGNIZER_HEADER_WEIGHT,
00077         RECOGNIZER_HEADER_SAVE_BEST_WAVEFORM,
00078         RECOGNIZER_HEADER_NEW_PHRASE_ID,
00079         RECOGNIZER_HEADER_CONFUSABLE_PHRASES_URI,
00080         RECOGNIZER_HEADER_ABORT_PHRASE_ENROLLMENT,
00081 
00082         RECOGNIZER_HEADER_COUNT
00083 } mrcp_recognizer_header_id;
00084 
00085 
00086 /** MRCP recognizer completion-cause  */
00087 typedef enum {
00088         RECOGNIZER_COMPLETION_CAUSE_SUCCESS                 = 0,
00089         RECOGNIZER_COMPLETION_CAUSE_NO_MATCH                = 1,
00090         RECOGNIZER_COMPLETION_CAUSE_NO_INPUT_TIMEOUT        = 2,
00091         RECOGNIZER_COMPLETION_CAUSE_RECOGNITION_TIMEOUT     = 3,
00092         RECOGNIZER_COMPLETION_CAUSE_GRAM_LOAD_FAILURE       = 4,
00093         RECOGNIZER_COMPLETION_CAUSE_GRAM_COMP_FAILURE       = 5,
00094         RECOGNIZER_COMPLETION_CAUSE_ERROR                   = 6,
00095         RECOGNIZER_COMPLETION_CAUSE_SPEECH_TOO_EARLY        = 7,
00096         RECOGNIZER_COMPLETION_CAUSE_TOO_MUCH_SPEECH_TIMEOUT = 8,
00097         RECOGNIZER_COMPLETION_CAUSE_URI_FAILURE             = 9,
00098         RECOGNIZER_COMPLETION_CAUSE_LANGUAGE_UNSUPPORTED    = 10,
00099 
00100         /** Additional completion-cause for MRCP v2 */
00101         RECOGNIZER_COMPLETION_CAUSE_CANCELLED               = 11,
00102         RECOGNIZER_COMPLETION_CAUSE_SEMANTICS_FAILURE       = 12,
00103         RECOGNIZER_COMPLETION_CAUSE_PARTIAL_MATCH           = 13,
00104         RECOGNIZER_COMPLETION_CAUSE_PARTIAL_MATCH_MAXTIME   = 14,
00105         RECOGNIZER_COMPLETION_CAUSE_NO_MATCH_MAXTIME        = 15,
00106         RECOGNIZER_COMPLETION_CAUSE_GRAM_DEFINITION_FAILURE = 16,
00107 
00108         RECOGNIZER_COMPLETION_CAUSE_COUNT                   = 17,
00109         RECOGNIZER_COMPLETION_CAUSE_UNKNOWN                 = RECOGNIZER_COMPLETION_CAUSE_COUNT
00110 } mrcp_recog_completion_cause_e;
00111 
00112 
00113 
00114 /** MRCP recognizer-header declaration */
00115 typedef struct mrcp_recog_header_t mrcp_recog_header_t;
00116 
00117 /** MRCP recognizer-header */
00118 struct mrcp_recog_header_t {
00119         /** Tells the recognizer resource what confidence level the client considers a
00120     successful match */
00121         float                         confidence_threshold;
00122         /** To filter out background noise and not mistake it for speech */
00123         float                         sensitivity_level;
00124         /** Tunable towards Performance or Accuracy */
00125         float                         speed_vs_accuracy;
00126         /** The client, by setting this header, can ask the recognition resource 
00127         to send it more  than 1 alternative */
00128         apr_size_t                    n_best_list_length;
00129         /** The client can use the no-input-timeout header to set this timeout */
00130         apr_size_t                    no_input_timeout;
00131         /** The client can use the recognition-timeout header to set this timeout */
00132         apr_size_t                    recognition_timeout;
00133         /** MUST be present in the RECOGNITION-COMPLETE event if the Save-Waveform
00134         header was set to true */
00135         apt_str_t                     waveform_uri;
00136         /** MUST be part of a RECOGNITION-COMPLETE, event coming from
00137     the recognizer resource to the client */
00138         mrcp_recog_completion_cause_e completion_cause;
00139         /** MAY be sent as part of the SET-PARAMS or GET-PARAMS request */
00140         apt_str_t                     recognizer_context_block;
00141         /** MAY be sent as part of the RECOGNIZE request. A value of false tells
00142         the recognizer to start recognition, but not to start the no-input timer yet */
00143         apt_bool_t                    start_input_timers;
00144         /** Specifies the length of silence required following user
00145     speech before the speech recognizer finalizes a result */
00146         apr_size_t                    speech_complete_timeout;
00147         /** Specifies the required length of silence following user
00148     speech after which a recognizer finalizes a result */
00149         apr_size_t                    speech_incomplete_timeout;
00150         /** Specifies the inter-digit timeout value to use when
00151     recognizing DTMF input */
00152         apr_size_t                    dtmf_interdigit_timeout;
00153         /** Specifies the terminating timeout to use when 
00154         recognizing DTMF input*/
00155         apr_size_t                    dtmf_term_timeout;
00156         /** Specifies the terminating DTMF character for DTMF input
00157     recognition */
00158         char                          dtmf_term_char;
00159         /** When a recognizer needs to fetch or access a URI and the access fails
00160     the server SHOULD provide the failed URI in this header in the method response*/
00161         apt_str_t                     failed_uri;
00162         /** When a recognizer method needs a recognizer to fetch or access a URI
00163     and the access fails the server MUST provide the URI specific or
00164     protocol specific response code for the URI in the Failed-URI header */
00165         apt_str_t                     failed_uri_cause;
00166         /** Allows the client to request the recognizer resource to
00167     save the audio input to the recognizer */
00168         apt_bool_t                    save_waveform;
00169         /** MAY be specified in a RECOGNIZE request and allows the
00170     client to tell the server that, from this point on, further input
00171     audio comes from a different audio source */
00172         apt_bool_t                    new_audio_channel;
00173         /** Specifies the language of recognition grammar data within
00174     a session or request, if it is not specified within the data */
00175         apt_str_t                     speech_language;
00176 
00177         /** Additional header fields for MRCP v2 */
00178         /** Specifies if the input that caused a barge-in was DTMF or speech */
00179         apt_str_t                     input_type;
00180         /** Optional header specifies a URI pointing to audio content to be
00181     processed by the RECOGNIZE operation */
00182         apt_str_t                     input_waveform_uri;
00183         /** MAY be specified in a RECOGNITION-COMPLETE event coming from
00184     the recognizer resource to the client */
00185         apt_str_t                     completion_reason;
00186         /** Tells the server resource the Media Type in which to store captured 
00187         audio such as the one captured and returned by the Waveform-URI header */
00188         apt_str_t                     media_type;
00189         /** Lets the client request the server to buffer the
00190     utterance associated with this recognition request into a buffer
00191     available to a co-resident verification resource */
00192         apt_bool_t                    ver_buffer_utterance;
00193         /** Specifies what mode the RECOGNIZE method will operate in */
00194         apt_str_t                     recognition_mode;
00195         /** Specifies what will happen if the client attempts to
00196     invoke another RECOGNIZE method when this RECOGNIZE request is
00197     already in progress for the resource*/
00198         apt_bool_t                    cancel_if_queue;
00199         /** Specifies the maximum length of an utterance (in seconds) that will
00200     be considered for Hotword recognition */
00201         apr_size_t                    hotword_max_duration;
00202         /** Specifies the minimum length of an utterance (in seconds) that will
00203     be considered for Hotword recognition */
00204         apr_size_t                    hotword_min_duration;
00205         /** Provides a pointer to the text for which a natural language interpretation is desired */
00206         apt_str_t                     interpret_text;
00207         /** MAY be specified in a GET-PARAMS or SET-PARAMS method and
00208     is used to specify the size in time, in milliseconds, of the
00209     typeahead buffer for the recognizer */
00210         apr_size_t                    dtmf_buffer_time;
00211         /** MAY be specified in a RECOGNIZE method and is used to
00212     tell the recognizer to clear the DTMF type-ahead buffer before
00213     starting the recognize */
00214         apt_bool_t                    clear_dtmf_buffer;
00215         /** MAY be specified in a RECOGNIZE method and is used to
00216     tell the recognizer that it MUST not wait for the end of speech
00217     before processing the collected speech to match active grammars */
00218         apt_bool_t                    early_no_match;
00219         /** MAY be specified in a START-PHRASE-ENROLLMENT, "SET-PARAMS", or 
00220         "GET-PARAMS" method and is used to specify the minimum number of 
00221         consistent pronunciations that must be obtained to voice enroll a new phrase */
00222         apr_size_t                    num_min_consistent_pronunciations;
00223         /** MAY be sent as part of the START-PHRASE-ENROLLMENT,"SET-PARAMS", or 
00224         "GET-PARAMS" method and is used during voice-enrollment to specify how similar 
00225         to a previously enrolled pronunciation of the same phrase an utterance needs 
00226         to be in order to be considered "consistent" */
00227         float                         consistency_threshold;
00228         /** MAY be sent as part of the START-PHRASE-ENROLLMENT, SET-PARAMS, or 
00229         "GET-PARAMS" method and is used during voice-enrollment to specify 
00230         how similar the pronunciations of two different phrases can be 
00231         before they are considered to be clashing */
00232         float                         clash_threshold;
00233         /** Specifies the speaker-trained grammar to be used or
00234         referenced during enrollment operations */
00235         apt_str_t                     personal_grammar_uri;
00236         /** MAY be specified in the RECOGNIZE method. If this header
00237         is set to "true" and an Enrollment is active, the RECOGNIZE command
00238         MUST add the collected utterance to the personal grammar that is
00239         being enrolled */
00240         apt_bool_t                    enroll_utterance;
00241         /** Identifies a phrase in an existing personal grammar for which 
00242         enrollment is desired.  It is also returned to the client in the 
00243         RECOGNIZE complete event */
00244         apt_str_t                     phrase_id;
00245         /** Specifies the interpreted text to be returned when the
00246         phrase is recognized */
00247         apt_str_t                     phrase_nl;
00248         /** Represents the occurrence likelihood of a phrase in an enrolled grammar */
00249         float                         weight;
00250         /** Allows the client to request the recognizer resource to
00251         save the audio stream for the best repetition of the phrase that was
00252         used during the enrollment session */
00253         apt_bool_t                    save_best_waveform;
00254         /** Replaces the id used to identify the phrase in a personal grammar */
00255         apt_str_t                     new_phrase_id;
00256         /** Specifies a grammar that defines invalid phrases for enrollment */
00257         apt_str_t                     confusable_phrases_uri;
00258         /** Can optionally be specified in the END-PHRASE-ENROLLMENT
00259         method to abort the phrase enrollment, rather than committing the
00260         phrase to the personal grammar */
00261         apt_bool_t                    abort_phrase_enrollment;
00262 };
00263 
00264 
00265 /** Get recognizer header vtable */
00266 const mrcp_header_vtable_t* mrcp_recog_header_vtable_get(mrcp_version_e version);
00267 
00268 /** Get recognizer completion cause string */
00269 MRCP_DECLARE(const apt_str_t*) mrcp_recog_completion_cause_get(mrcp_recog_completion_cause_e completion_cause, mrcp_version_e version);
00270 
00271 APT_END_EXTERN_C
00272 
00273 #endif /* MRCP_RECOG_HEADER_H */