Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ root = true
indent_style = tab
indent_size = 2
tab_width = 8
trim_trailing_whitespace = true
trim_trailing_whitespace = false
insert_final_newline = true
56 changes: 29 additions & 27 deletions Sample.jconf
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,16 @@
#-rejectlong -1 # reject longer input (msec) -1 to disable

####
#### Speech detection by libfvad
#### Speech detection by WebRTC VAD (libfvad)
####
#-fvad -1 # disable libfvad
#-fvad 0 # enable on mode 0 (least aggressive to filtering out non-speech)
#-fvad 1 # enable on mode 1 (moderately aggressive to filtering out non-speech)
#-fvad 2 # enable on mode 2 (aggressive to filtering out non-speech)
#-fvad 3 # enable on mode 3 (very aggressive to filtering out non-speech)
#-fvad_param 5 0.5 # optinal parameter: smoothing frames, trigger threshold
#-fvad -1 # disable WebRTC VAD
#-fvad 0 # enable WebRTC VAD on mode 0 (least aggressive to filtering out non-speech)
#-fvad 1 # enable WebRTC VAD on mode 1 (moderately aggressive to filtering out non-speech)
#-fvad 2 # enable WebRTC VAD on mode 2 (aggressive to filtering out non-speech)
#-fvad 3 # enable WebRTC VAD on mode 3 (very aggressive to filtering out non-speech)
#-fvad_param 5 0.5 # optional parameter: smoothing frames, trigger threshold
#-agc # enable auto gain control. Should be specified with -fvad.
#-noagc # disable auto gain control.

####
#### Input rejection by average power (EXPERIMENTAL)
Expand All @@ -117,7 +119,7 @@
####
#### Gaussian Mixture Model
####
#### GMM will be used for input rejection by accumurated score, or
#### GMM will be used for input rejection by accumulated score, or
#### for GMM-based frontend VAD when "--enable-gmm-vad" specified.
####
#### NOTE: If you use MFCC for the GMM which is different from AM, you
Expand Down Expand Up @@ -188,13 +190,13 @@

## Create a new AM configuration set, and switch current to it.
## You should give a unique name.
#-AM name
#-AM name

## Create a new LM configuration set, and switch current to it.
## You should give a unique name.
#-LM name
#-LM name

## Create a new Search configuration set with AM and LM, and switch
## Create a new Search configuration set with AM and LM, and switch
## current to it. AM and LM name can be either name or ID number.
#-SR name am_name_or_id lm_name_or_id

Expand All @@ -208,7 +210,7 @@
## This option is only a switcher and can be used anywhere anytime.
# -GLOBAL

## This option disables the strict section checkings and back to 4.0
## This option disables the strict section checks and back to 4.0
# -nosectioncheck

######################################################################
Expand All @@ -231,7 +233,7 @@
#-mapunk "<unk>" # word to which unknown words should be mapped
#-iwspword # add a pause word to the dictionary
#-iwspentry "<UNK> [sp] sp sp" # word that will be added by "-iwspword"
#-sepnum 150 # num of high freq words to linearize
#-sepnum 150 # num of high freq words to linearize
#-adddict dictfile # append additional word dictionary
#-addword entry # append additional word entry

Expand Down Expand Up @@ -271,7 +273,7 @@
#### the AM defines the required parameter. You can use different MFCC
#### type for each AM.
#### For GMM, the same parameter should be specified after "-AM_GMM"
####
####
#### When using multiple AM, the values of "-smpPeriod", "-smpFreq",
#### "-fsize" and "-fshift" should be the same among all AM.
####
Expand Down Expand Up @@ -332,7 +334,7 @@
#-dnnconf file # DNN configuration file

## Others
#-htkconf configfile # load analysis settings from HTK Config file
#-htkconf configfile # load analysis settings from HTK Config file

######################################################################
#### RECOGNIZER (-SR)
Expand All @@ -341,7 +343,7 @@
#### Default values for beam width and LM weights will change
#### according to compile-time setup of JuliusLib and model specification.
#### Please see the startup log for the actual values.
####
####

####
#### parameter (common)
Expand Down Expand Up @@ -387,34 +389,34 @@
#-spdur 10 # # of frames to detect a short pause
#-pausemodels string # comma-separated pause model names
#### for decoder-VAD
#-spmargin 40 # backstep margin at trigger up (frame)
#-spmargin 40 # back-step margin at trigger up (frame)
#-spdelay 4 # decision delay at trigger up (frame)

####
####
#### lattice output
####
####
#-lattice # output result in word graph (aka -graphout)
#-graphrange 0 # merge same words nearby, -1 to disable merge
#-graphcut 80 # graph depth cut threshold (in depth)
#-graphboundloop 20 # max itertations for boundary adjustment loop
#-graphsearchdelay # activate an alternate generation algorithm
#-graphboundloop 20 # max iterations for boundary adjustment loop
#-graphsearchdelay # activate an alternate generation algorithm
#-nographsearchdelay # disable "-graphsearchdelay"

####
####
#### confusion network output
####
####
#-confnet # enable confusion network output
#-noconfnet # disable confusion network output

####
####
#### multi-grammar output (for grammar and isolated word)
####
####
#-multigramout # output max hypo for each grammar
#-nomultigramout # disable "-multigramout"

####
####
#### forced alignment
####
####
#-walign # enable alignment for result at word level
#-palign # enable alignment for result at phoneme level
#-salign # enable alignment for result at state level
Expand Down
48 changes: 25 additions & 23 deletions adinrec/adinrec.c
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
/**
* @file adinrec.c
*
*
* <JA>
* @brief マイクから一発話をファイルへ記録する
* </JA>
*
*
* <EN>
* @brief Record a speech segment from microphone to a file
* </EN>
*
*
* @author Akinobu LEE
* @date Wed Mar 23 20:33:01 2005
*
* $Revision: 1.13 $
*
*
*/
/*
* Copyright (c) 1991-2013 Kawahara Lab., Kyoto University
Expand All @@ -35,7 +35,7 @@ static char *filename = NULL; ///< Output file name
static boolean stout = FALSE; ///< True if output to stdout
static boolean use_raw = FALSE; ///< Output in RAW format if TRUE

/**
/**
* <JA>ヘルプを表示して終了する</JA>
* <EN>Print help and exit</EN>
*/
Expand All @@ -53,8 +53,10 @@ opt_help(Jconf *jconf, char *arg[], int argnum)
fprintf(stderr, " [-tailmargin msec] tail margin length (%d)\n", jconf->detect.tail_margin_msec);
fprintf(stderr, " [-chunksize sample] chunk size for processing (%d)\n", jconf->detect.chunk_size);
#ifdef HAVE_LIBFVAD
fprintf(stderr, " [-fvad] FVAD sw (-1=off, 0 - 3) (%d)\n", jconf->detect.fvad_mode);
fprintf(stderr, " [-fvad_param i f] FVAD parameter (dur/thres) (%d %.2f)\n", jconf->detect.fvad_smoothnum, jconf->detect.fvad_thres);
fprintf(stderr, " [-fvad mode] enable WebRTC VAD (0-3, larger value rejects noises aggressively) (%d)\n", jconf->detect.fvad_mode);
fprintf(stderr, " [-fvad_param i f] WebRTC VAD parameters (smoothing duration (frames), thres([0-1])) (%d %.2f)\n", jconf->detect.fvad_smoothnum, jconf->detect.fvad_thres);
fprintf(stderr, " [-agc][-noagc] enable/disable additional AGC on WebRTC VAD\n");
fprintf(stderr, " [-agc_param p1 ... p7] AGC parameters (%d %.2f %.2f %.2f %.2f %.2f %.2f)\n", jconf->detect.agc.overflow_thres , jconf->detect.agc.scale_max, jconf->detect.agc.scale_max_relative_first, jconf->detect.agc.level_factor_first, jconf->detect.agc.scale_up_rate, jconf->detect.agc.scale_down_rate, jconf->detect.agc.scale_down_overflow_rate);
#endif /* HAVE_LIBFVAD */
fprintf(stderr, " [-nostrip] not strip off zero samples\n");
fprintf(stderr, " [-zmean] remove DC by zero mean\n");
Expand Down Expand Up @@ -83,21 +85,21 @@ opt_freq(Jconf *jconf, char *arg[], int argnum)
return TRUE;
}

/**
/**
* <JA>
* 録音されたサンプル列を処理するコールバック関数
*
*
* @param now [in] 録音されたサンプル列
* @param len [in] 長さ(サンプル数)
*
*
* @return エラー時 -1,処理成功時 0,処理成功+区間終端検出時 1 を返す.
* </JA>
* <EN>
* Callback handler of recorded sample fragments
*
*
* @param now [in] recorded fragments of speech sample
* @param len [in] length of above in samples
*
*
* @return -1 on device error (require caller to exit and terminate input),
* 0 on success (allow caller to continue),
* 1 on succeeded but segmentation detected (require caller to exit but
Expand Down Expand Up @@ -155,11 +157,11 @@ adin_callback_file(SP16 *now, int len, Recog *recog)
return -1;
}
}

speechlen += len;

/* progress bar in dots */
fprintf(stderr, ".");
fprintf(stderr, ".");
return(0);
}

Expand All @@ -182,7 +184,7 @@ close_file()
}
}
fprintf(stderr, "\n%d samples (%d bytes, %.2f sec.) recorded\n", speechlen, size, (float)speechlen / (float)sfreq);
}
}

/* Interrupt signal handling */
static void
Expand All @@ -196,21 +198,21 @@ interrupt_record(int signum)
}


/**
/**
* <JA>
* メイン関数
*
*
* @param argc [in] 引数列の長さ
* @param argv [in] 引数列
*
* @return
*
* @return
* </JA>エラー時 1,通常終了時 0 を返す.
* <EN>
* Main function.
*
*
* @param argc [in] number of argument.
* @param argv [in] array of arguments.
*
*
* @return 1 on error, 0 on success.
* </EN>
*/
Expand Down Expand Up @@ -266,7 +268,7 @@ main(int argc, char *argv[])

/* set Julius default parameters for unspecified acoustic parameters */
apply_para(&(jconf->am_root->analysis.para), &(jconf->am_root->analysis.para_default));

/* set some values */
jconf->input.sfreq = jconf->am_root->analysis.para.smp_freq;
jconf->input.period = jconf->am_root->analysis.para.smp_period;
Expand Down
10 changes: 8 additions & 2 deletions adintool/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ GUI version:
## Description

`adintool` analyzes speech input, detects speech segments skipping silence, and
records the detected segments in various ways.
records the detected segments in various ways. It accepts all Julius options.

Input waveform:

Expand All @@ -47,7 +47,7 @@ Output waveform / feature vector:
- none

This tool uses Julius's internal VAD module for speech detection. The detection
algorithm and parameters are the same as Julius.
algorithm and parameters are the same as Julius. It also accepts all Julius options.

The default audio format is 16 bit, 1 channel in Microsoft WAV format.

Expand Down Expand Up @@ -80,6 +80,12 @@ Record utterances one by one, into file "test0001.wav", "test0002.wav", ...
% adintool -in mic -out file -filename test
```

Use WebRTC-based VAD and experimental AGC.

```shell
% adintool -in mic -out file -filename test -fvad 3 -agc
```

Record only one utterance into "test.wav"

```shell
Expand Down
5 changes: 5 additions & 0 deletions adintool/adintool.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ enum{SPOUT_NONE, SPOUT_FILE, SPOUT_STDOUT, SPOUT_ADINNET, SPOUT_VECTORNET};
#define WAVE_TICK_FLAG_PROCESSED 0x01
// audio tick flag: set to indicate that an input segment was triggered down
#define WAVE_TICK_FLAG_TRIGGER 0x02
#ifdef HAVE_LIBFVAD
// audio tick flag: set to indicate that an input segment was detemined as voice by fvad
#define WAVE_TICK_FLAG_FVAD_VOICED 0x04
#endif /* HAVE_LIBFVAD */


#ifdef AUTO_ADJUST_THRESHOLD
// mean / var computing window length in seconds
Expand Down
Loading