torchaudio Documentation
Main Page
Attributes
torchaudio.functional.TokenSpan.end
torchaudio.functional.TokenSpan.score
torchaudio.functional.TokenSpan.start
torchaudio.functional.TokenSpan.token
torchaudio.io.StreamReader
torchaudio.io.StreamWriter
torchaudio.models.decoder.CTCHypothesis.score
torchaudio.models.decoder.CTCHypothesis.timesteps
torchaudio.models.decoder.CTCHypothesis.tokens
torchaudio.models.decoder.CTCHypothesis.words
torchaudio.models.decoder.CUCTCHypothesis.score
torchaudio.models.decoder.CUCTCHypothesis.tokens
torchaudio.models.decoder.CUCTCHypothesis.words
torio.io.CodecConfig.bit_rate
torio.io.CodecConfig.compression_level
torio.io.CodecConfig.gop_size
torio.io.CodecConfig.max_b_frames
torio.io.CodecConfig.qscale
torio.io._streaming_media_decoder.ChunkTensor.pts
torio.io._streaming_media_decoder.OutputAudioStream.num_channels
torio.io._streaming_media_decoder.OutputAudioStream.sample_rate
torio.io._streaming_media_decoder.OutputStream.filter_description
torio.io._streaming_media_decoder.OutputStream.format
torio.io._streaming_media_decoder.OutputStream.media_type
torio.io._streaming_media_decoder.OutputStream.source_index
torio.io._streaming_media_decoder.OutputVideoStream.frame_rate
torio.io._streaming_media_decoder.OutputVideoStream.height
torio.io._streaming_media_decoder.OutputVideoStream.width
torio.io._streaming_media_decoder.SourceAudioStream.num_channels
torio.io._streaming_media_decoder.SourceAudioStream.sample_rate
torio.io._streaming_media_decoder.SourceStream.bit_rate
torio.io._streaming_media_decoder.SourceStream.bits_per_sample
torio.io._streaming_media_decoder.SourceStream.codec
torio.io._streaming_media_decoder.SourceStream.codec_long_name
torio.io._streaming_media_decoder.SourceStream.format
torio.io._streaming_media_decoder.SourceStream.media_type
torio.io._streaming_media_decoder.SourceStream.metadata
torio.io._streaming_media_decoder.SourceStream.num_frames
torio.io._streaming_media_decoder.SourceVideoStream.frame_rate
torio.io._streaming_media_decoder.SourceVideoStream.height
torio.io._streaming_media_decoder.SourceVideoStream.width
torio::io::Chunk::frames
torio::io::Chunk::pts
torio::io::OutputStreamInfo::filter_description
torio::io::OutputStreamInfo::format
torio::io::OutputStreamInfo::frame_rate
torio::io::OutputStreamInfo::height
torio::io::OutputStreamInfo::media_type
torio::io::OutputStreamInfo::num_channels
torio::io::OutputStreamInfo::sample_rate
torio::io::OutputStreamInfo::source_index
torio::io::OutputStreamInfo::width
torio::io::SrcStreamInfo::bit_rate
torio::io::SrcStreamInfo::bits_per_sample
torio::io::SrcStreamInfo::codec_long_name
torio::io::SrcStreamInfo::codec_name
torio::io::SrcStreamInfo::fmt_name
torio::io::SrcStreamInfo::frame_rate
torio::io::SrcStreamInfo::height
torio::io::SrcStreamInfo::media_type
torio::io::SrcStreamInfo::metadata
torio::io::SrcStreamInfo::num_channels
torio::io::SrcStreamInfo::num_frames
torio::io::SrcStreamInfo::sample_rate
torio::io::SrcStreamInfo::width
Classs
torchaudio.AudioMetaData
torchaudio.datasets.CMUARCTIC
torchaudio.datasets.CMUDict
torchaudio.datasets.COMMONVOICE
torchaudio.datasets.DR_VCTK
torchaudio.datasets.FluentSpeechCommands
torchaudio.datasets.GTZAN
torchaudio.datasets.IEMOCAP
torchaudio.datasets.LIBRISPEECH
torchaudio.datasets.LIBRITTS
torchaudio.datasets.LJSPEECH
torchaudio.datasets.LibriLightLimited
torchaudio.datasets.LibriMix
torchaudio.datasets.MUSDB_HQ
torchaudio.datasets.QUESST14
torchaudio.datasets.SPEECHCOMMANDS
torchaudio.datasets.Snips
torchaudio.datasets.TEDLIUM
torchaudio.datasets.VCTK_092
torchaudio.datasets.VoxCeleb1Identification
torchaudio.datasets.VoxCeleb1Verification
torchaudio.datasets.YESNO
torchaudio.functional.TokenSpan
torchaudio.io.AudioEffector
torchaudio.io._playback.play_audio
torchaudio.models.Conformer
torchaudio.models.ConvTasNet
torchaudio.models.DeepSpeech
torchaudio.models.Emformer
torchaudio.models.HDemucs
torchaudio.models.HuBERTPretrainModel
torchaudio.models.RNNT
torchaudio.models.RNNTBeamSearch
torchaudio.models.SquimObjective
torchaudio.models.SquimSubjective
torchaudio.models.Tacotron2
torchaudio.models.Wav2Letter
torchaudio.models.Wav2Vec2Model
torchaudio.models.WaveRNN
torchaudio.models.decoder.CTCDecoder
torchaudio.models.decoder.CTCDecoderLM
torchaudio.models.decoder.CTCDecoderLMState
torchaudio.models.decoder.CTCHypothesis
torchaudio.models.decoder.CUCTCDecoder
torchaudio.models.decoder.CUCTCHypothesis
torchaudio.pipelines.RNNTBundle
torchaudio.pipelines.RNNTBundle.FeatureExtractor
torchaudio.pipelines.RNNTBundle.TokenProcessor
torchaudio.pipelines.SourceSeparationBundle
torchaudio.pipelines.SquimObjectiveBundle
torchaudio.pipelines.SquimSubjectiveBundle
torchaudio.pipelines.Tacotron2TTSBundle
torchaudio.pipelines.Tacotron2TTSBundle.TextProcessor
torchaudio.pipelines.Tacotron2TTSBundle.Vocoder
torchaudio.pipelines.Wav2Vec2ASRBundle
torchaudio.pipelines.Wav2Vec2Bundle
torchaudio.pipelines.Wav2Vec2FABundle
torchaudio.pipelines.Wav2Vec2FABundle.Aligner
torchaudio.pipelines.Wav2Vec2FABundle.Tokenizer
torchaudio.prototype.datasets.Musan
torchaudio.prototype.models.ConformerWav2Vec2PretrainModel
torchaudio.prototype.models.ConvEmformer
torchaudio.prototype.models.HiFiGANVocoder
torchaudio.prototype.pipelines.HiFiGANVocoderBundle
torchaudio.prototype.pipelines.VGGishBundle
torchaudio.prototype.pipelines.VGGishBundle.VGGish
torchaudio.prototype.pipelines.VGGishBundle.VGGishInputProcessor
torchaudio.prototype.transforms.BarkScale
torchaudio.prototype.transforms.BarkSpectrogram
torchaudio.prototype.transforms.ChromaScale
torchaudio.prototype.transforms.ChromaSpectrogram
torchaudio.prototype.transforms.InverseBarkScale
torchaudio.transforms.AddNoise
torchaudio.transforms.AmplitudeToDB
torchaudio.transforms.ComputeDeltas
torchaudio.transforms.Convolve
torchaudio.transforms.Deemphasis
torchaudio.transforms.FFTConvolve
torchaudio.transforms.Fade
torchaudio.transforms.FrequencyMasking
torchaudio.transforms.GriffinLim
torchaudio.transforms.InverseMelScale
torchaudio.transforms.InverseSpectrogram
torchaudio.transforms.LFCC
torchaudio.transforms.Loudness
torchaudio.transforms.MFCC
torchaudio.transforms.MVDR
torchaudio.transforms.MelScale
torchaudio.transforms.MelSpectrogram
torchaudio.transforms.MuLawDecoding
torchaudio.transforms.MuLawEncoding
torchaudio.transforms.PSD
torchaudio.transforms.PitchShift
torchaudio.transforms.Preemphasis
torchaudio.transforms.RNNTLoss
torchaudio.transforms.RTFMVDR
torchaudio.transforms.Resample
torchaudio.transforms.SlidingWindowCmn
torchaudio.transforms.SoudenMVDR
torchaudio.transforms.SpectralCentroid
torchaudio.transforms.Spectrogram
torchaudio.transforms.Speed
torchaudio.transforms.SpeedPerturbation
torchaudio.transforms.TimeMasking
torchaudio.transforms.TimeStretch
torchaudio.transforms.Vad
torchaudio.transforms.Vol
torio.io.CodecConfig
torio.io.StreamingMediaDecoder
torio.io.StreamingMediaEncoder
torio.io._streaming_media_decoder.ChunkTensor
torio.io._streaming_media_decoder.OutputAudioStream
torio.io._streaming_media_decoder.OutputStream
torio.io._streaming_media_decoder.OutputVideoStream
torio.io._streaming_media_decoder.SourceAudioStream
torio.io._streaming_media_decoder.SourceStream
torio.io._streaming_media_decoder.SourceVideoStream
torio::io::Chunk
torio::io::OutputStreamInfo
torio::io::SrcStreamInfo
torio::io::StreamingMediaDecoder
torio::io::StreamingMediaDecoderCustomIO
torio::io::StreamingMediaEncoder
torio::io::StreamingMediaEncoderCustomIO
Functions
torchaudio.compliance.kaldi.fbank
torchaudio.compliance.kaldi.mfcc
torchaudio.compliance.kaldi.spectrogram
torchaudio.functional.DB_to_amplitude
torchaudio.functional.add_noise
torchaudio.functional.allpass_biquad
torchaudio.functional.amplitude_to_DB
torchaudio.functional.apply_beamforming
torchaudio.functional.apply_codec
torchaudio.functional.band_biquad
torchaudio.functional.bandpass_biquad
torchaudio.functional.bandreject_biquad
torchaudio.functional.bass_biquad
torchaudio.functional.biquad
torchaudio.functional.compute_deltas
torchaudio.functional.contrast
torchaudio.functional.convolve
torchaudio.functional.create_dct
torchaudio.functional.dcshift
torchaudio.functional.deemph_biquad
torchaudio.functional.deemphasis
torchaudio.functional.detect_pitch_frequency
torchaudio.functional.dither
torchaudio.functional.edit_distance
torchaudio.functional.equalizer_biquad
torchaudio.functional.fftconvolve
torchaudio.functional.filtfilt
torchaudio.functional.flanger
torchaudio.functional.forced_align
torchaudio.functional.frechet_distance
torchaudio.functional.gain
torchaudio.functional.griffinlim
torchaudio.functional.highpass_biquad
torchaudio.functional.inverse_spectrogram
torchaudio.functional.lfilter
torchaudio.functional.linear_fbanks
torchaudio.functional.loudness
torchaudio.functional.lowpass_biquad
torchaudio.functional.mask_along_axis
torchaudio.functional.mask_along_axis_iid
torchaudio.functional.melscale_fbanks
torchaudio.functional.merge_tokens
torchaudio.functional.mu_law_decoding
torchaudio.functional.mu_law_encoding
torchaudio.functional.mvdr_weights_rtf
torchaudio.functional.mvdr_weights_souden
torchaudio.functional.overdrive
torchaudio.functional.phase_vocoder
torchaudio.functional.phaser
torchaudio.functional.pitch_shift
torchaudio.functional.preemphasis
torchaudio.functional.psd
torchaudio.functional.resample
torchaudio.functional.riaa_biquad
torchaudio.functional.rnnt_loss
torchaudio.functional.rtf_evd
torchaudio.functional.rtf_power
torchaudio.functional.sliding_window_cmn
torchaudio.functional.spectral_centroid
torchaudio.functional.spectrogram
torchaudio.functional.speed
torchaudio.functional.treble_biquad
torchaudio.functional.vad
torchaudio.info
torchaudio.kaldi_io.read_mat_ark
torchaudio.kaldi_io.read_mat_scp
torchaudio.kaldi_io.read_vec_flt_ark
torchaudio.kaldi_io.read_vec_flt_scp
torchaudio.kaldi_io.read_vec_int_ark
torchaudio.list_audio_backends
torchaudio.load
torchaudio.models.conv_tasnet_base
torchaudio.models.decoder.ctc_decoder
torchaudio.models.decoder.cuda_ctc_decoder
torchaudio.models.decoder.download_pretrained_files
torchaudio.models.emformer_rnnt_base
torchaudio.models.emformer_rnnt_model
torchaudio.models.hdemucs_high
torchaudio.models.hdemucs_low
torchaudio.models.hdemucs_medium
torchaudio.models.hubert_base
torchaudio.models.hubert_large
torchaudio.models.hubert_pretrain_base
torchaudio.models.hubert_pretrain_large
torchaudio.models.hubert_pretrain_model
torchaudio.models.hubert_pretrain_xlarge
torchaudio.models.hubert_xlarge
torchaudio.models.squim_objective_base
torchaudio.models.squim_objective_model
torchaudio.models.squim_subjective_base
torchaudio.models.squim_subjective_model
torchaudio.models.wav2vec2.utils.import_fairseq_model
torchaudio.models.wav2vec2.utils.import_huggingface_model
torchaudio.models.wav2vec2_base
torchaudio.models.wav2vec2_large
torchaudio.models.wav2vec2_large_lv60k
torchaudio.models.wav2vec2_model
torchaudio.models.wav2vec2_xlsr_1b
torchaudio.models.wav2vec2_xlsr_2b
torchaudio.models.wav2vec2_xlsr_300m
torchaudio.models.wavlm_base
torchaudio.models.wavlm_large
torchaudio.models.wavlm_model
torchaudio.prototype.functional.adsr_envelope
torchaudio.prototype.functional.barkscale_fbanks
torchaudio.prototype.functional.chroma_filterbank
torchaudio.prototype.functional.extend_pitch
torchaudio.prototype.functional.filter_waveform
torchaudio.prototype.functional.frequency_impulse_response
torchaudio.prototype.functional.oscillator_bank
torchaudio.prototype.functional.ray_tracing
torchaudio.prototype.functional.simulate_rir_ism
torchaudio.prototype.functional.sinc_impulse_response
torchaudio.prototype.models.conformer_rnnt_base
torchaudio.prototype.models.conformer_rnnt_model
torchaudio.prototype.models.conformer_wav2vec2_base
torchaudio.prototype.models.conformer_wav2vec2_model
torchaudio.prototype.models.conformer_wav2vec2_pretrain_base
torchaudio.prototype.models.conformer_wav2vec2_pretrain_large
torchaudio.prototype.models.conformer_wav2vec2_pretrain_model
torchaudio.prototype.models.emformer_hubert_base
torchaudio.prototype.models.emformer_hubert_model
torchaudio.prototype.models.hifigan_vocoder
torchaudio.prototype.models.hifigan_vocoder_v1
torchaudio.prototype.models.hifigan_vocoder_v2
torchaudio.prototype.models.hifigan_vocoder_v3
torchaudio.save
torchaudio.sox_effects.apply_effects_file
torchaudio.sox_effects.apply_effects_tensor
torchaudio.sox_effects.effect_names
torchaudio.utils.ffmpeg_utils.clear_cuda_context_cache
torchaudio.utils.ffmpeg_utils.get_audio_decoders
torchaudio.utils.ffmpeg_utils.get_audio_encoders
torchaudio.utils.ffmpeg_utils.get_build_config
torchaudio.utils.ffmpeg_utils.get_demuxers
torchaudio.utils.ffmpeg_utils.get_input_devices
torchaudio.utils.ffmpeg_utils.get_input_protocols
torchaudio.utils.ffmpeg_utils.get_log_level
torchaudio.utils.ffmpeg_utils.get_muxers
torchaudio.utils.ffmpeg_utils.get_output_devices
torchaudio.utils.ffmpeg_utils.get_output_protocols
torchaudio.utils.ffmpeg_utils.get_versions
torchaudio.utils.ffmpeg_utils.get_video_decoders
torchaudio.utils.ffmpeg_utils.get_video_encoders
torchaudio.utils.ffmpeg_utils.set_log_level
torchaudio.utils.sox_utils.get_buffer_size
torchaudio.utils.sox_utils.list_effects
torchaudio.utils.sox_utils.list_read_formats
torchaudio.utils.sox_utils.list_write_formats
torchaudio.utils.sox_utils.set_buffer_size
torchaudio.utils.sox_utils.set_seed
torchaudio.utils.sox_utils.set_use_threads
torchaudio.utils.sox_utils.set_verbosity
torio.utils.ffmpeg_utils.clear_cuda_context_cache
torio.utils.ffmpeg_utils.get_audio_decoders
torio.utils.ffmpeg_utils.get_audio_encoders
torio.utils.ffmpeg_utils.get_build_config
torio.utils.ffmpeg_utils.get_demuxers
torio.utils.ffmpeg_utils.get_input_devices
torio.utils.ffmpeg_utils.get_input_protocols
torio.utils.ffmpeg_utils.get_log_level
torio.utils.ffmpeg_utils.get_muxers
torio.utils.ffmpeg_utils.get_output_devices
torio.utils.ffmpeg_utils.get_output_protocols
torio.utils.ffmpeg_utils.get_versions
torio.utils.ffmpeg_utils.get_video_decoders
torio.utils.ffmpeg_utils.get_video_encoders
torio.utils.ffmpeg_utils.set_log_level
torio::io::StreamingMediaDecoder::add_audio_stream
torio::io::StreamingMediaDecoder::add_video_stream
torio::io::StreamingMediaDecoder::fill_buffer
torio::io::StreamingMediaDecoder::find_best_audio_stream
torio::io::StreamingMediaDecoder::find_best_video_stream
torio::io::StreamingMediaDecoder::get_metadata
torio::io::StreamingMediaDecoder::get_out_stream_info
torio::io::StreamingMediaDecoder::get_src_stream_info
torio::io::StreamingMediaDecoder::is_buffer_ready
torio::io::StreamingMediaDecoder::num_out_streams
torio::io::StreamingMediaDecoder::num_src_streams
torio::io::StreamingMediaDecoder::pop_chunks
torio::io::StreamingMediaDecoder::process_all_packets
torio::io::StreamingMediaDecoder::process_packet
torio::io::StreamingMediaDecoder::process_packet_block
torio::io::StreamingMediaDecoder::remove_stream
torio::io::StreamingMediaDecoder::seek
torio::io::StreamingMediaDecoderCustomIO::StreamingMediaDecoderCustomIO
torio::io::StreamingMediaEncoder::StreamingMediaEncoder
torio::io::StreamingMediaEncoder::add_audio_stream
torio::io::StreamingMediaEncoder::add_video_stream
torio::io::StreamingMediaEncoder::close
torio::io::StreamingMediaEncoder::flush
torio::io::StreamingMediaEncoder::open
torio::io::StreamingMediaEncoder::set_metadata
torio::io::StreamingMediaEncoder::write_audio_chunk
torio::io::StreamingMediaEncoder::write_video_chunk
torio::io::StreamingMediaEncoderCustomIO::StreamingMediaEncoderCustomIO
Guides
Methods
torchaudio.datasets.CMUARCTIC.__getitem__
torchaudio.datasets.CMUDict.__getitem__
torchaudio.datasets.COMMONVOICE.__getitem__
torchaudio.datasets.DR_VCTK.__getitem__
torchaudio.datasets.FluentSpeechCommands.__getitem__
torchaudio.datasets.FluentSpeechCommands.get_metadata
torchaudio.datasets.GTZAN.__getitem__
torchaudio.datasets.IEMOCAP.__getitem__
torchaudio.datasets.IEMOCAP.get_metadata
torchaudio.datasets.LIBRISPEECH.__getitem__
torchaudio.datasets.LIBRISPEECH.get_metadata
torchaudio.datasets.LIBRITTS.__getitem__
torchaudio.datasets.LJSPEECH.__getitem__
torchaudio.datasets.LibriLightLimited.__getitem__
torchaudio.datasets.LibriMix.__getitem__
torchaudio.datasets.LibriMix.get_metadata
torchaudio.datasets.MUSDB_HQ.__getitem__
torchaudio.datasets.QUESST14.__getitem__
torchaudio.datasets.QUESST14.get_metadata
torchaudio.datasets.SPEECHCOMMANDS.__getitem__
torchaudio.datasets.SPEECHCOMMANDS.get_metadata
torchaudio.datasets.Snips.__getitem__
torchaudio.datasets.Snips.get_metadata
torchaudio.datasets.TEDLIUM.__getitem__
torchaudio.datasets.VCTK_092.__getitem__
torchaudio.datasets.VoxCeleb1Identification.__getitem__
torchaudio.datasets.VoxCeleb1Identification.get_metadata
torchaudio.datasets.VoxCeleb1Verification.__getitem__
torchaudio.datasets.VoxCeleb1Verification.get_metadata
torchaudio.datasets.YESNO.__getitem__
torchaudio.io.AudioEffector.apply
torchaudio.io.AudioEffector.stream
torchaudio.models.Conformer.forward
torchaudio.models.ConvTasNet.forward
torchaudio.models.DeepSpeech.forward
torchaudio.models.Emformer.forward
torchaudio.models.Emformer.infer
torchaudio.models.HDemucs.forward
torchaudio.models.HuBERTPretrainModel.forward
torchaudio.models.RNNT.forward
torchaudio.models.RNNT.join
torchaudio.models.RNNT.predict
torchaudio.models.RNNT.transcribe
torchaudio.models.RNNT.transcribe_streaming
torchaudio.models.RNNTBeamSearch.forward
torchaudio.models.RNNTBeamSearch.infer
torchaudio.models.SquimObjective.forward
torchaudio.models.SquimSubjective.forward
torchaudio.models.Tacotron2.forward
torchaudio.models.Tacotron2.infer
torchaudio.models.Wav2Letter.forward
torchaudio.models.Wav2Vec2Model.extract_features
torchaudio.models.Wav2Vec2Model.forward
torchaudio.models.WaveRNN.forward
torchaudio.models.WaveRNN.infer
torchaudio.models.decoder.CTCDecoder.__call__
torchaudio.models.decoder.CTCDecoder.decode_begin
torchaudio.models.decoder.CTCDecoder.decode_end
torchaudio.models.decoder.CTCDecoder.decode_step
torchaudio.models.decoder.CTCDecoder.get_final_hypothesis
torchaudio.models.decoder.CTCDecoder.idxs_to_tokens
torchaudio.models.decoder.CTCDecoderLM.finish
torchaudio.models.decoder.CTCDecoderLM.score
torchaudio.models.decoder.CTCDecoderLM.start
torchaudio.models.decoder.CTCDecoderLMState.child
torchaudio.models.decoder.CTCDecoderLMState.compare
torchaudio.models.decoder.CUCTCDecoder.__call__
torchaudio.pipelines.RNNTBundle.FeatureExtractor.__call__
torchaudio.pipelines.RNNTBundle.TokenProcessor.__call__
torchaudio.pipelines.RNNTBundle.get_decoder
torchaudio.pipelines.RNNTBundle.get_feature_extractor
torchaudio.pipelines.RNNTBundle.get_streaming_feature_extractor
torchaudio.pipelines.RNNTBundle.get_token_processor
torchaudio.pipelines.SourceSeparationBundle.get_model
torchaudio.pipelines.SquimObjectiveBundle.get_model
torchaudio.pipelines.SquimSubjectiveBundle.get_model
torchaudio.pipelines.Tacotron2TTSBundle.TextProcessor.__call__
torchaudio.pipelines.Tacotron2TTSBundle.Vocoder.__call__
torchaudio.pipelines.Tacotron2TTSBundle.get_tacotron2
torchaudio.pipelines.Tacotron2TTSBundle.get_text_processor
torchaudio.pipelines.Tacotron2TTSBundle.get_vocoder
torchaudio.pipelines.Wav2Vec2ASRBundle.get_labels
torchaudio.pipelines.Wav2Vec2ASRBundle.get_model
torchaudio.pipelines.Wav2Vec2Bundle.get_model
torchaudio.pipelines.Wav2Vec2FABundle.Aligner.__call__
torchaudio.pipelines.Wav2Vec2FABundle.Tokenizer.__call__
torchaudio.pipelines.Wav2Vec2FABundle.get_aligner
torchaudio.pipelines.Wav2Vec2FABundle.get_dict
torchaudio.pipelines.Wav2Vec2FABundle.get_labels
torchaudio.pipelines.Wav2Vec2FABundle.get_model
torchaudio.pipelines.Wav2Vec2FABundle.get_tokenizer
torchaudio.prototype.datasets.Musan.__getitem__
torchaudio.prototype.datasets.Musan.get_metadata
torchaudio.prototype.models.ConformerWav2Vec2PretrainModel.forward
torchaudio.prototype.models.ConvEmformer.forward
torchaudio.prototype.models.ConvEmformer.infer
torchaudio.prototype.models.HiFiGANVocoder.forward
torchaudio.prototype.pipelines.HiFiGANVocoderBundle.get_mel_transform
torchaudio.prototype.pipelines.HiFiGANVocoderBundle.get_vocoder
torchaudio.prototype.pipelines.VGGishBundle.VGGish.forward
torchaudio.prototype.pipelines.VGGishBundle.VGGishInputProcessor.__call__
torchaudio.prototype.pipelines.VGGishBundle.get_input_processor
torchaudio.prototype.pipelines.VGGishBundle.get_model
torchaudio.prototype.transforms.BarkScale.forward
torchaudio.prototype.transforms.BarkSpectrogram.forward
torchaudio.prototype.transforms.ChromaScale.forward
torchaudio.prototype.transforms.ChromaSpectrogram.forward
torchaudio.prototype.transforms.InverseBarkScale.forward
torchaudio.transforms.AddNoise.forward
torchaudio.transforms.AmplitudeToDB.forward
torchaudio.transforms.ComputeDeltas.forward
torchaudio.transforms.Convolve.forward
torchaudio.transforms.Deemphasis.forward
torchaudio.transforms.FFTConvolve.forward
torchaudio.transforms.Fade.forward
torchaudio.transforms.GriffinLim.forward
torchaudio.transforms.InverseMelScale.forward
torchaudio.transforms.InverseSpectrogram.forward
torchaudio.transforms.LFCC.forward
torchaudio.transforms.Loudness.forward
torchaudio.transforms.MFCC.forward
torchaudio.transforms.MVDR.forward
torchaudio.transforms.MelScale.forward
torchaudio.transforms.MelSpectrogram.forward
torchaudio.transforms.MuLawDecoding.forward
torchaudio.transforms.MuLawEncoding.forward
torchaudio.transforms.PSD.forward
torchaudio.transforms.PitchShift.forward
torchaudio.transforms.PitchShift.initialize_parameters
torchaudio.transforms.Preemphasis.forward
torchaudio.transforms.RNNTLoss.forward
torchaudio.transforms.RTFMVDR.forward
torchaudio.transforms.Resample.forward
torchaudio.transforms.SlidingWindowCmn.forward
torchaudio.transforms.SoudenMVDR.forward
torchaudio.transforms.SpectralCentroid.forward
torchaudio.transforms.Spectrogram.forward
torchaudio.transforms.Speed.forward
torchaudio.transforms.SpeedPerturbation.forward
torchaudio.transforms.TimeStretch.forward
torchaudio.transforms.Vad.forward
torchaudio.transforms.Vol.forward
torio.io.StreamingMediaDecoder.add_audio_stream
torio.io.StreamingMediaDecoder.add_basic_audio_stream
torio.io.StreamingMediaDecoder.add_basic_video_stream
torio.io.StreamingMediaDecoder.add_video_stream
torio.io.StreamingMediaDecoder.fill_buffer
torio.io.StreamingMediaDecoder.get_metadata
torio.io.StreamingMediaDecoder.get_out_stream_info
torio.io.StreamingMediaDecoder.get_src_stream_info
torio.io.StreamingMediaDecoder.is_buffer_ready
torio.io.StreamingMediaDecoder.pop_chunks
torio.io.StreamingMediaDecoder.process_all_packets
torio.io.StreamingMediaDecoder.process_packet
torio.io.StreamingMediaDecoder.remove_stream
torio.io.StreamingMediaDecoder.seek
torio.io.StreamingMediaDecoder.stream
torio.io.StreamingMediaEncoder.add_audio_stream
torio.io.StreamingMediaEncoder.add_video_stream
torio.io.StreamingMediaEncoder.close
torio.io.StreamingMediaEncoder.flush
torio.io.StreamingMediaEncoder.open
torio.io.StreamingMediaEncoder.set_metadata
torio.io.StreamingMediaEncoder.write_audio_chunk
torio.io.StreamingMediaEncoder.write_video_chunk
Modules
torchaudio.compliance.kaldi
torchaudio.datasets
torchaudio.functional
torchaudio.io
torchaudio.kaldi_io
torchaudio.models
torchaudio.models.decoder
torchaudio.pipelines
torchaudio.prototype.datasets
torchaudio.prototype.functional
torchaudio.prototype.models
torchaudio.prototype.pipelines
torchaudio.prototype.transforms
torchaudio.sox_effects
torchaudio.transforms
torchaudio.utils
torchaudio.utils.ffmpeg_utils
torchaudio.utils.sox_utils
torio
torio.io
torio.utils
torio.utils.ffmpeg_utils
Propertys
torchaudio.datasets.CMUDict.symbols
torchaudio.datasets.TEDLIUM.phoneme_dict
torchaudio.models.decoder.CTCDecoderLMState.children
torchaudio.pipelines.RNNTBundle.hop_length
torchaudio.pipelines.RNNTBundle.n_fft
torchaudio.pipelines.RNNTBundle.n_mels
torchaudio.pipelines.RNNTBundle.right_context_length
torchaudio.pipelines.RNNTBundle.sample_rate
torchaudio.pipelines.RNNTBundle.segment_length
torchaudio.pipelines.SourceSeparationBundle.sample_rate
torchaudio.pipelines.SquimObjectiveBundle.sample_rate
torchaudio.pipelines.SquimSubjectiveBundle.sample_rate
torchaudio.pipelines.Tacotron2TTSBundle.TextProcessor.tokens
torchaudio.pipelines.Tacotron2TTSBundle.Vocoder.sample_rate
torchaudio.pipelines.Wav2Vec2ASRBundle.sample_rate
torchaudio.pipelines.Wav2Vec2Bundle.sample_rate
torchaudio.pipelines.Wav2Vec2FABundle.sample_rate
torchaudio.prototype.pipelines.HiFiGANVocoderBundle.sample_rate
torchaudio.prototype.pipelines.VGGishBundle.sample_rate
torio.io.StreamingMediaDecoder.default_audio_stream
torio.io.StreamingMediaDecoder.default_video_stream
torio.io.StreamingMediaDecoder.num_out_streams
torio.io.StreamingMediaDecoder.num_src_streams
Sections
1. Data acquisition
1. Install Conda and activate conda environment
1. Install JetPack
1. Install MSYS2
1. Install build tools
1. Install build tools
1. Overview
1. Overview
1. Overview
1. Overview
1. Overview
1080P
1080P
2. Checking the supported devices
2. Install PyTorch
2. Install dependencies
2. Launch MSYS2
2. Pre-processing
2. Preparation
2. Preparation
2. Preparation
2. Preparation
2. Start the dev environment
2. [Optional] Install jtop
2.1. Import the packages
2.2. Download audio data
2.3. Helper functions
3. Build TorchAudio
3. Building inference pipeline
3. Construct the pipeline
3. Construct the pipeline
3. Data acquisition
3. Generate Ideal Ratio Masks (IRMs)
3. Install PyTorch
3. Install build tools
3. Install build tools
3. Install pip in user env
3. Load Speech and Noise Sample
3.1. Load audio data
3.2. Compute STFT coefficients
3.2.1. Visualize mixture speech
3.2.2. Visualize clean speech
3.2.3. Visualize noise
3.3. Define the reference microphone
3.4. Compute IRMs
3.4.1. Visualize IRM of target speech
3.4.2. Visualize IRM of noise
360P
360P
4. Build FFmpeg
4. Building inference pipeline
4. Check the installation
4. Clone the torchaudio repository
4. Compute PSD matrices
4. Configure the application function
4. Configure the audio stream
4. Create distorted (noisy) speech samples
4. Install PyTorch
4. The main process
4. [Optional] cuDNN
5. Beamforming using SoudenMVDR
5. Build
5. Build TorchAudio
5. Install external dependencies
5. Run Model
5. Run stream inference
5. The main process
5. Verify the build
5. Visualize the waveforms
5.1 Separate Track
5.1. Apply beamforming
5.2 Audio Segmenting and Processing
5.2. Result for SoudenMVDR
5.3 Spectrograms and Audio
6. Beamforming using RTFMVDR
6. Build TorchAudio
6. Predict Objective Metrics
6.1. Compute RTF
6.2. Apply beamforming
6.3. Result for RTFMVDR with rtf_evd
6.4. Result for RTFMVDR with rtf_power
7. Predict Mean Opinion Scores (Subjective) Metric
720P
720P
8 bit mu-law
8. Comparison with ground truths and baselines
ADSR Envelope
ASR Inference with CTC Decoder
ASR Inference with CTC Decoder
ASR Inference with CTC Decoder
ASR Inference with CUDA CTC Decoder
ASR Inference with CUDA CTC Decoder
ASR Inference with CUDA CTC Decoder
Accelerated video decoding with NVDEC
Accelerated video decoding with NVDEC
Accelerated video decoding with NVDEC
Accelerated video encoding with NVENC
Accelerated video encoding with NVENC
Accelerated video encoding with NVENC
Acknowledgement
Acknowledgement
Acoustic Model and Set Up
Acoustic Model and Set Up
AddNoise
AddNoise
Adding background noise
Additive Synthesis
Additive Synthesis
Additive Synthesis
Advanced: Handling transcripts with <star> token
Aligning transcripts to speech
AmplitudeToDB
AmplitudeToDB
Applying codec to Tensor object
Applying effects
Applying effects and filtering
Arbitrary frequence response
Arbitrary shapes
Audio / Video device input
Audio Data Augmentation
Audio Data Augmentation
Audio Data Augmentation
Audio Datasets
Audio Datasets
Audio Datasets
Audio Examples
Audio Examples
Audio Feature Augmentation
Audio Feature Augmentation
Audio Feature Augmentation
Audio Feature Extractions
Audio Feature Extractions
Audio Feature Extractions
Audio I/O
Audio I/O
Audio I/O
Audio Resampling
Audio Resampling
Audio Resampling
Audio Samples
Audio Samples
Audio Stream
Audio previews
AudioEffector
AudioEffector
AudioEffector Usages
AudioEffector Usages
AudioEffector Usages
AudioMetaData
Augmentations
Autograd
Backend and Dispatcher
Backend and Dispatcher
Background noise
Background noise added
Band-pass filter
BarkScale
BarkScale
BarkSpectrogram
BarkSpectrogram
Beam Search Decoder
Beam Search Decoder Parameters
Beam Search Decoder Parameters
Bell sound
Benchmark NVDEC with StreamReader
Benchmark NVENC with StreamWriter
Benchmark with flashlight CPU decoder
Brick-wall filter
Build FFmpeg with NVDEC/NVENC support
Building from source
Building from source
Building on Jetson
Building on Jetson
Building on Linux and macOS
Building on Linux and macOS
Building on Windows
Building on Windows
CMUARCTIC
CMUARCTIC
CMUDict
CMUDict
COMMONVOICE
COMMONVOICE
CONVTASNET_BASE_LIBRI2MIX
CONVTASNET_BASE_LIBRI2MIX
CPU
CTC Decoder
CTC forced alignment API tutorial
CTC forced alignment API tutorial
CTC forced alignment API tutorial
CTCDecoder
CTCDecoder
CTCDecoderLM
CTCDecoderLMState
CTCHypothesis
CUCTCDecoder
CUCTCDecoder
CUCTCHypothesis
CUDA
CUDA CTC Decoder
Cellular automaton
Changing Frequencies across time
Character-based encoding
Check the GPU and CUDA version
Check the prerequisites
Check the prerequisites
Checking the compute capability
Checking the installation
Checking the intallation
Checking the source streams
Checking the versions
Chinese
ChromaScale
ChromaScale
ChromaSpectrogram
ChromaSpectrogram
Chunk
ChunkTensor
Citing torchaudio
Codec applied
CodecConfig
Codecs
Combining multiple sine waves
Comparing resizing methods
Comparison against librosa
Comparison against librosa
Comparison against librosa
Comparison against librosa
Compatibility Matrix
Computation times
Computation times
Computation times
ComputeDeltas
ComputeDeltas
Computing alignments
Conclusion
Conclusion
Conclusion
Conclusion
Conda
Config methods
Configure Methods
Configuring ouptut streams
Configuring output streams
Configuring output streams
Configuring output streams
Conformer
Conformer
ConformerWav2Vec2PretrainModel
ConformerWav2Vec2PretrainModel
Construct CUDA Decoder
Construct Decoders
Constructors
Constructors
Controling resampling quality with parameters
ConvEmformer
ConvEmformer
ConvTasNet
ConvTasNet
Convolve
Convolve
Creating a pipeline
Creating multiple frequency pitches
Creating the pipeline
Custom Language Model
Custom filters
Customizing the build
DR_VCTK
DR_VCTK
DSP
Decode and resize
Decode as CUDA frames
Decoding videos with NVDEC
Deemphasis
Deemphasis
DeepSpeech
DeepSpeech
Default streams
Dependencies
Device ASR with Emformer RNN-T
Device ASR with Emformer RNN-T
Device ASR with Emformer RNN-T
Device AV-ASR with Emformer RNN-T
Device AV-ASR with Emformer RNN-T
Device AV-ASR with Emformer RNN-T
Device Availability
Devices
Dispatcher Migration
Dispatcher Migration
Downloading Pretrained Files
Downsample (16 -> 8 kHz)
Downsample (48 -> 44.1 kHz)
Drum Beats
EMFORMER_RNNT_BASE_LIBRISPEECH
EMFORMER_RNNT_BASE_LIBRISPEECH
EMFORMER_RNNT_BASE_MUSTC
EMFORMER_RNNT_BASE_MUSTC
EMFORMER_RNNT_BASE_TEDLIUM3
EMFORMER_RNNT_BASE_TEDLIUM3
Edge detection
Effects
Effects applied
Emformer
Emformer
Enabling GPU video decoder/encoder
Enabling GPU video decoder/encoder
Enabling GPU video decoder/encoder
Encoding videos with NVENC
Ex) Audio
Ex) Image
Ex) Video with audio
Ex) Video without audio
Example
Example
Example - Spectrum Visualizer
Extracting acoustic features
FFT filter - Robot 🤖
FFT filter - Whisper
FFTConvolve
FFTConvolve
Factory Functions
Factory Functions
Factory Functions
Factory Functions
Factory Functions
Factory Functions
Factory Functions
Factory Functions
Factory Functions
Fade
Fade
Feature Classifications
Feature Classifications
Feature Extractions
Feature Extractions
Feature classification
File-like objects
File-like objects
Files and Data for Decoder
Files and Data for Decoder
Filter design tutorial
Filter design tutorial
Filter design tutorial
Filtered
Filtered Noise
Filtering
Find the most likely path (backtracking)
FluentSpeechCommands
FluentSpeechCommands
Forced Alignment
Forced Alignment with Wav2Vec2
Forced Alignment with Wav2Vec2
Forced Alignment with Wav2Vec2
Forced alignment for multilingual data
Forced alignment for multilingual data
Forced alignment for multilingual data
Frame-level alignments
Frequency Response
Frequency Response
Frequency Response
Frequency Sampling
FrequencyMasking
FrequencyMasking
Full Transcript
G.722
GTZAN
GTZAN
Gallery
Generate alignment probability (trellis)
Generate frame-wise label probability
Generating emissions
Generating transcripts
German
Greedy Decoder
Griffin-Lim Vocoder
GriffinLim
GriffinLim
GriffinLim
HDEMUCS_HIGH_MUSDB
HDEMUCS_HIGH_MUSDB
HDEMUCS_HIGH_MUSDB_PLUS
HDEMUCS_HIGH_MUSDB_PLUS
HDemucs
HDemucs
HIFIGAN_VOCODER_V3_LJSPEECH
HIFIGAN_VOCODER_V3_LJSPEECH
HUBERT_ASR_LARGE
HUBERT_ASR_LARGE
HUBERT_ASR_XLARGE
HUBERT_ASR_XLARGE
HUBERT_BASE
HUBERT_BASE
HUBERT_LARGE
HUBERT_LARGE
HUBERT_XLARGE
HUBERT_XLARGE
HW resizing and cropping
Harmonic Overtones
Headerless media
Helper Function
Helper Function
HiFiGAN Vocoder
HiFiGANVocoder
HiFiGANVocoder
HiFiGANVocoderBundle
HiFiGANVocoderBundle
High-pass filter
Highpass / lowpass filter
Horizontal
HuBERTPretrainModel
HuBERTPretrainModel
Hypothesis
I/O
IEMOCAP
IEMOCAP
Icon
Impulse Response
Impulse Response
Impulse Response
Inconsistent treatment of blank token
Incremental decoding
Index
Inharmonic Paritials
Install FFmpeg dependencies
Install NVIDIA Video Codec Headers
Installing pre-built binaries
Installing pre-built binaries
Interface
Interface
Interface
Interface
Interface
Interface
Interface
Interface
Interface
Interface
InverseBarkScale
InverseBarkScale
InverseMelScale
InverseMelScale
InverseSpectrogram
InverseSpectrogram
Italian
John Conway's life game
KenLM
LFCC
LFCC
LFCC
LIBRISPEECH
LIBRISPEECH
LIBRITTS
LIBRITTS
LJSPEECH
LJSPEECH
Language Model
Lexicon
LibriLightLimited
LibriLightLimited
LibriMix
LibriMix
Loading audio data
Loading data
Loading from file-like object
Local files
Local files
Loss
Loss
Loudness
Loudness
Low-pass filter
Lowpass filter width
MFCC
MFCC
MFCC
MMS_FA
MMS_FA
MPlayer Test patterns
MSVC
MUSDB_HQ
MUSDB_HQ
MVDR
MVDR
Mandelbrot
Matrices
Mel Filter Bank
MelScale
MelScale
MelSpectrogram
MelSpectrogram
MelSpectrogram
Merge the segments into words
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Methods
Metric
Mirror
Module Index
MuLawDecoding
MuLawDecoding
MuLawEncoding
MuLawEncoding
Multi-channel
Multi-channel
Musan
Musan
Music Source Separation with Hybrid Demucs
Music Source Separation with Hybrid Demucs
Music Source Separation with Hybrid Demucs
Network protocols
No Language Model
Noise
Normalizing the transcript
Online ASR with Emformer RNN-T
Online ASR with Emformer RNN-T
Online ASR with Emformer RNN-T
Opening the source
Opening the source media
Optional Dependencies
Optional Dependencies
Original
Original
Original
Original
Original
Original
Original speech
Oscillating cutoff frequency
Oscillator Bank
Oscillator and ADSR envelope
Oscillator and ADSR envelope
Oscillator and ADSR envelope
OutputAudioStream
OutputStream
OutputStreaminfo
OutputVideoStream
Overview
Overview
Overview
Overview
Overview
Overview
Overview
Overview
Overview
Overview
Overview
Overview of audio features
PSD
PSD
Partial Transcript with <star> token
Partial Transcript without <star> token
Performance Benchmarking
Pertrained Models
Phoneme-based encoding
Pitch
PitchShift
PitchShift
Pixel manipulation
Playing Video
Playing audio
Pluck
Polish
Portuguese
Preemphasis
Preemphasis
Preparation
Preparation
Preparation
Preparation
Preparation
Preparation
Preparation
Preparation
Preparation
Preparation
Preparation
Preparation
Prepare Canvas
Prepare Data
Pretrained Models
Pretrained Models
Pretrained Models
Pretrained Models
Pretrained Models
Pretrained Models
Pretrained Models
Pretrained Models
Pretrained Models
Pretrained Models
Properties
Properties
Properties
Properties
Properties
Properties
Properties
Properties
Properties
Properties
Properties
Properties
Properties
Properties
Properties
Prototype Factory Functions
Prototype Factory Functions
Prototype Factory Functions of Beta Models
QUESST14
QUESST14
QVGA
QVGA
Quality Spotcheck
Query Methods
Querying audio metadata
Querying file-like object
RIR applied
RIR applied
RNN-T Streaming/Non-Streaming ASR
RNN-T Streaming/Non-Streaming ASR
RNN-T Streaming/Non-Streaming ASR
RNNT
RNNT
RNNTBeamSearch
RNNTBeamSearch
RNNTBundle
RNNTBundle
RNNTBundle.FeatureExtractor
RNNTBundle.FeatureExtractor
RNNTBundle.TokenProcessor
RNNTBundle.TokenProcessor
RNNTLoss
RNNTLoss
RTFMVDR
RTFMVDR
RTMP (Real-Time Messaging Protocol)
Random rotation
References
References
References
References
References
References
Resample
Resample
Resampling Overview
Result
Result
Result
Result
Retrieval Methods
Riser
Rolloff
Room Impulse Response Simulation
Run Inference
Run Inference
SNR 10 dB
SNR 20 dB
SNR 3 dB
SPEECHCOMMANDS
SPEECHCOMMANDS
SQUIM_OBJECTIVE
SQUIM_OBJECTIVE
SQUIM_SUBJECTIVE
SQUIM_SUBJECTIVE
Saving audio to file
Saving to file-like object
Sawtooth wave
Search Page
Segment the path
Sierpinski carpet/triangle fractal
Signal with arbitral expression
Simple Sine Wave
Simulating a phone recoding
Simulating room reverberation
Sine wave
SlidingWindowCmn
SlidingWindowCmn
Snips
Snips
SoudenMVDR
SoudenMVDR
Source Separation
SourceAudioStream
SourceSeparationBundle
SourceSeparationBundle
SourceStream
SourceVideoStream
SpecAugment
SpectralCentroid
SpectralCentroid
Spectrogram
Spectrogram
Spectrogram
Spectrogram Generation
Speech Enhancement with MVDR Beamforming
Speech Enhancement with MVDR Beamforming
Speech Enhancement with MVDR Beamforming
Speech Recognition with Wav2Vec2
Speech Recognition with Wav2Vec2
Speech Recognition with Wav2Vec2
Speed
Speed
SpeedPerturbation
SpeedPerturbation
Square wave
Squim Objective
Squim Subjective
SquimObjective
SquimObjective
SquimObjectiveBundle
SquimObjectiveBundle
SquimSubjective
SquimSubjective
SquimSubjectiveBundle
SquimSubjectiveBundle
SrcStreaminfo
Stream Methods
StreamReader
StreamReader
StreamReader Advanced Usages
StreamReader Advanced Usages
StreamReader Advanced Usages
StreamReader Basic Usages
StreamReader Basic Usages
StreamReader Basic Usages
StreamWriter
StreamWriter
StreamWriter Advanced Usage
StreamWriter Advanced Usage
StreamWriter Advanced Usage
StreamWriter Basic Usage
StreamWriter Basic Usage
StreamWriter Basic Usage
Streaming
Streaming
Streaming Video
Streaming protocols
StreamingMediaDecoder
StreamingMediaDecoder
StreamingMediaDecoder
StreamingMediaDecoderCustomIO
StreamingMediaEncoder
StreamingMediaEncoder
StreamingMediaEncoder
StreamingMediaEncoderCustomIO
Subtractive synthesis
Subtractive synthesis
Subtractive synthesis
Summary
Support Structure
Support Structures
Support Structures
Support Structures
Support Structures
Support Structures
Support Structures
Supported Features
Supported Features
Sweeping cutoff frequency
Synthetic source streams
Synthetic source streams
TACOTRON2_GRIFFINLIM_CHAR_LJSPEECH
TACOTRON2_GRIFFINLIM_CHAR_LJSPEECH
TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH
TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH
TACOTRON2_WAVERNN_CHAR_LJSPEECH
TACOTRON2_WAVERNN_CHAR_LJSPEECH
TACOTRON2_WAVERNN_PHONE_LJSPEECH
TACOTRON2_WAVERNN_PHONE_LJSPEECH
TEDLIUM
TEDLIUM
Tacotron2
Tacotron2
Tacotron2 Text-To-Speech
Tacotron2 Text-To-Speech
Tacotron2TTSBundle
Tacotron2TTSBundle
Tacotron2TTSBundle.TextProcessor
Tacotron2TTSBundle.TextProcessor
Tacotron2TTSBundle.Vocoder
Tacotron2TTSBundle.Vocoder
Text Processing
Text-to-Speech with Tacotron2
Text-to-Speech with Tacotron2
Text-to-Speech with Tacotron2
The basic usage
The effect of n_fft parameter
Time and Frequency Masking
TimeMasking
TimeMasking
TimeStretch
TimeStretch
TimeStretch
Timestep Alignments
Tips on slicing
Token-level alignments
TokenSpan
TokenSpan
Tokenize the transcript
Tokens
Tokens
TorchAudio Logo
TorchAudio Logo
TorchScript
Torchaudio Documentation
Torchaudio Documentation
Torchaudio-Squim: Non-intrusive Speech Assessment in TorchAudio
Torchaudio-Squim: Non-intrusive Speech Assessment in TorchAudio
Torchaudio-Squim: Non-intrusive Speech Assessment in TorchAudio
Triangle wave
Tutorials
UDP (User Datagram Protocol)
Upsample (44.1 -> 48 kHz)
Upsample (8 -> 16 kHz)
Usage
Using the GPU decoder/encoder from TorchAudio
Using the hardware decoder and encoder
Utilities
Utility
Utility
Utility
Utility Functions
VCTK_092
VCTK_092
VGA
VGA
VGGISH
VGGISH
VGGish
VGGishBundle
VGGishBundle
VGGishBundle.VGGish
VGGishBundle.VGGish
VGGishBundle.VGGishInputProcessor
VGGishBundle.VGGishInputProcessor
VOXPOPULI_ASR_BASE_10K_DE
VOXPOPULI_ASR_BASE_10K_DE
VOXPOPULI_ASR_BASE_10K_EN
VOXPOPULI_ASR_BASE_10K_EN
VOXPOPULI_ASR_BASE_10K_ES
VOXPOPULI_ASR_BASE_10K_ES
VOXPOPULI_ASR_BASE_10K_FR
VOXPOPULI_ASR_BASE_10K_FR
VOXPOPULI_ASR_BASE_10K_IT
VOXPOPULI_ASR_BASE_10K_IT
Vad
Vad
Vectors
Vertical
Video Examples
Video Examples
Video Stream
Visualization
Visualization
Visualization
Visualization
Visualization
Visualization
Visualization
Visualization
Vol
Vol
Vorbis
VoxCeleb1Identification
VoxCeleb1Identification
VoxCeleb1Verification
VoxCeleb1Verification
WAV2VEC2_ASR_BASE_100H
WAV2VEC2_ASR_BASE_100H
WAV2VEC2_ASR_BASE_10M
WAV2VEC2_ASR_BASE_10M
WAV2VEC2_ASR_BASE_960H
WAV2VEC2_ASR_BASE_960H
WAV2VEC2_ASR_LARGE_100H
WAV2VEC2_ASR_LARGE_100H
WAV2VEC2_ASR_LARGE_10M
WAV2VEC2_ASR_LARGE_10M
WAV2VEC2_ASR_LARGE_960H
WAV2VEC2_ASR_LARGE_960H
WAV2VEC2_ASR_LARGE_LV60K_100H
WAV2VEC2_ASR_LARGE_LV60K_100H
WAV2VEC2_ASR_LARGE_LV60K_10M
WAV2VEC2_ASR_LARGE_LV60K_10M
WAV2VEC2_ASR_LARGE_LV60K_960H
WAV2VEC2_ASR_LARGE_LV60K_960H
WAV2VEC2_BASE
WAV2VEC2_BASE
WAV2VEC2_LARGE
WAV2VEC2_LARGE
WAV2VEC2_LARGE_LV60K
WAV2VEC2_LARGE_LV60K
WAV2VEC2_XLSR53
WAV2VEC2_XLSR53
WAV2VEC2_XLSR_1B
WAV2VEC2_XLSR_1B
WAV2VEC2_XLSR_2B
WAV2VEC2_XLSR_2B
WAV2VEC2_XLSR_300M
WAV2VEC2_XLSR_300M
WAVLM_BASE
WAVLM_BASE
WAVLM_BASE_PLUS
WAVLM_BASE_PLUS
WAVLM_LARGE
WAVLM_LARGE
Wah-wah effects
Wav2Letter
Wav2Letter
Wav2Vec2ASRBundle
Wav2Vec2ASRBundle
Wav2Vec2Bundle
Wav2Vec2Bundle
Wav2Vec2FABundle
Wav2Vec2FABundle
Wav2Vec2FABundle.Aligner
Wav2Vec2FABundle.Aligner
Wav2Vec2FABundle.Tokenizer
Wav2Vec2FABundle.Tokenizer
Wav2Vec2Model
Wav2Vec2Model
WaveRNN
WaveRNN
WaveRNN Vocoder
Waveform Generation
Waveglow Vocoder
Window function
Windowed-Sinc Filter
Windowed-sinc filter
Word-level alignments
Write Video
Write data
Write destination
Write methods
Writing data chunk by chunk
XGA
XGA
YESNO
YESNO
[Optional] Build TorchAudio with a custom FFmpeg
[Optional] Build TorchAudio with a custom built FFmpeg
[Optional] Building FFmpeg from source
__call__
__call__
__call__
__call__
__call__
__call__
__call__
__call__
__call__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
__getitem__
add_audio_stream
add_audio_stream
add_audio_stream
add_audio_stream
add_basic_audio_stream
add_basic_video_stream
add_video_stream
add_video_stream
add_video_stream
add_video_stream
additional parameters
allpass
apply
bandpass
bandreject
beam size
beam size
beam size token
beam threshold
bit_rate=192k
bit_rate=8k
blank skip threshold
chorus
classtorio_1_1io_1_1StreamingMediaDecoder
classtorio_1_1io_1_1StreamingMediaDecoderCustomIO
classtorio_1_1io_1_1StreamingMediaDecoderCustomIO_1a202cf89aab3628148e72a804f13e90f4
classtorio_1_1io_1_1StreamingMediaDecoder_1a01014e8d07967bcd99cf79409bc4026b
classtorio_1_1io_1_1StreamingMediaDecoder_1a01fdf9e71bd58aeafcf2aeb4418a817c
classtorio_1_1io_1_1StreamingMediaDecoder_1a0f4f370219d1615b3b5222aa5385f42b
classtorio_1_1io_1_1StreamingMediaDecoder_1a2675b80361ce5ac9da29bb63105f1135
classtorio_1_1io_1_1StreamingMediaDecoder_1a2949d852664329f6cdc84122ae04c3c3
classtorio_1_1io_1_1StreamingMediaDecoder_1a47015d7637b5c24376ba1c9cac442cdb
classtorio_1_1io_1_1StreamingMediaDecoder_1a4dbd058266fd2876508d87ec98adb978
classtorio_1_1io_1_1StreamingMediaDecoder_1a6901f6138d5bc2c7dc0e2919960aa378
classtorio_1_1io_1_1StreamingMediaDecoder_1a69a405cbf490b82bc23e7d8dae9991a7
classtorio_1_1io_1_1StreamingMediaDecoder_1a6b3e5fd480cc50ee5ec9b389641c4512
classtorio_1_1io_1_1StreamingMediaDecoder_1ab435622f836a58a577a27f0bea8f5819
classtorio_1_1io_1_1StreamingMediaDecoder_1ab79ca75fbcf46543b5947762d4feba46
classtorio_1_1io_1_1StreamingMediaDecoder_1ac00e054c55f2187d7dd6ed9ba21d2228
classtorio_1_1io_1_1StreamingMediaDecoder_1ae3f5ab23b21ee6d2e5a2ed8c53dc260d
classtorio_1_1io_1_1StreamingMediaDecoder_1ae41fbb0c7e92538ba68557a815e09b15
classtorio_1_1io_1_1StreamingMediaDecoder_1af0c02f604589f887e6d4581a7f4a3b17
classtorio_1_1io_1_1StreamingMediaDecoder_1af443fa8e8ac186ac4404d81ea3bec59c
classtorio_1_1io_1_1StreamingMediaEncoder
classtorio_1_1io_1_1StreamingMediaEncoderCustomIO
classtorio_1_1io_1_1StreamingMediaEncoderCustomIO_1adbb9bd5b9630d44dfbc20a1f4f24dcd7
classtorio_1_1io_1_1StreamingMediaEncoder_1a0e21811fac24de6266ad31ef36c7175a
classtorio_1_1io_1_1StreamingMediaEncoder_1a6f34d0b9887f3af40c4d4ec421ffe1d3
classtorio_1_1io_1_1StreamingMediaEncoder_1a7785f86ab22716b0d6fcab72b43e0eea
classtorio_1_1io_1_1StreamingMediaEncoder_1a7c1f0d545d84b6dcfe27655abfb23484
classtorio_1_1io_1_1StreamingMediaEncoder_1a9a0a1eb7632782ca3505f4c006e63351
classtorio_1_1io_1_1StreamingMediaEncoder_1aac942a7abc08bc215fd0a62233f623da
classtorio_1_1io_1_1StreamingMediaEncoder_1ab8dbb3b62454618d83a25fb255de91df
classtorio_1_1io_1_1StreamingMediaEncoder_1ac192ea68a58042c6ffe5001f3ef8b49b
classtorio_1_1io_1_1StreamingMediaEncoder_1ad4b54cba53a7564bb058ede079ff6530
clear_cuda_context_cache
clear_cuda_context_cache
close
close
compression_level=1
compression_level=9
crystalizer
ctc_decoder
ctc_decoder
cuda_ctc_decoder
cuda_ctc_decoder
decode_begin
decode_end
decode_step
default
default_audio_stream
default_video_stream
download_pretrained_files
download_pretrained_files
echo
extract_features
ffmpeg_dependency
ffmpeg_utils
ffmpeg_utils
ffmpeg_utils
ffmpeg_utils
fft filter
fill_buffer
fill_buffer
find_best_audio_stream
find_best_video_stream
flanger
flush
flush
forward
forward
forward
forward
forward
forward
forward
forward
forward
forward
forward
forward
forward
forward
forward
forward
forward
forward
get_aligner
get_audio_decoders
get_audio_decoders
get_audio_encoders
get_audio_encoders
get_buffer_size
get_build_config
get_build_config
get_decoder
get_demuxers
get_demuxers
get_dict
get_feature_extractor
get_final_hypothesis
get_input_devices
get_input_devices
get_input_processor
get_input_protocols
get_input_protocols
get_labels
get_labels
get_log_level
get_log_level
get_mel_transform
get_metadata
get_metadata
get_metadata
get_metadata
get_metadata
get_metadata
get_metadata
get_metadata
get_metadata
get_metadata
get_metadata
get_metadata
get_model
get_model
get_model
get_model
get_model
get_model
get_model
get_muxers
get_muxers
get_out_stream_info
get_out_stream_info
get_output_devices
get_output_devices
get_output_protocols
get_output_protocols
get_src_stream_info
get_src_stream_info
get_streaming_feature_extractor
get_tacotron2
get_text_processor
get_token_processor
get_tokenizer
get_versions
get_versions
get_video_decoders
get_video_decoders
get_video_encoders
get_video_encoders
get_vocoder
get_vocoder
haas
highpass
hop_length
idxs_to_tokens
infer
infer
infer
infer
infer
is_buffer_ready
is_buffer_ready
join
kaiser_best
kaiser_fast
language model weight
libtorio
libtorio
list_effects
list_read_formats
list_write_formats
lowpass
mp3
n_fft
n_mels
nbest
nbest
num_out_streams
num_out_streams
num_src_streams
num_src_streams
nvdec_tutorial
nvenc_tutorial
ogg
ogg - default encoder (flac)
ogg - opus
ogg - vorbis
open
open
phaser
phoneme_dict
play_audio
play_audio
pop_chunks
pop_chunks
predict
process_all_packets
process_all_packets
process_packet
process_packet
process_packet_block
pulsator
qscale=1
qscale=9
read_mat_ark
read_mat_scp
read_vec_flt_ark
read_vec_flt_scp
read_vec_int_ark
remove_stream
remove_stream
right_context_length
sample_rate
sample_rate
sample_rate
sample_rate
sample_rate
sample_rate
sample_rate
sample_rate
sample_rate
sample_rate
seek
seek
segment_length
set_buffer_size
set_log_level
set_log_level
set_metadata
set_metadata
set_seed
set_use_threads
set_verbosity
sox_utils
sox_utils
sphx_glr_download_tutorials_additive_synthesis_tutorial.py
sphx_glr_download_tutorials_asr_inference_with_ctc_decoder_tutorial.py
sphx_glr_download_tutorials_asr_inference_with_cuda_ctc_decoder_tutorial.py
sphx_glr_download_tutorials_audio_data_augmentation_tutorial.py
sphx_glr_download_tutorials_audio_datasets_tutorial.py
sphx_glr_download_tutorials_audio_feature_augmentation_tutorial.py
sphx_glr_download_tutorials_audio_feature_extractions_tutorial.py
sphx_glr_download_tutorials_audio_io_tutorial.py
sphx_glr_download_tutorials_audio_resampling_tutorial.py
sphx_glr_download_tutorials_ctc_forced_alignment_api_tutorial.py
sphx_glr_download_tutorials_device_asr.py
sphx_glr_download_tutorials_device_avsr.py
sphx_glr_download_tutorials_effector_tutorial.py
sphx_glr_download_tutorials_filter_design_tutorial.py
sphx_glr_download_tutorials_forced_alignment_for_multilingual_data_tutorial.py
sphx_glr_download_tutorials_forced_alignment_tutorial.py
sphx_glr_download_tutorials_hybrid_demucs_tutorial.py
sphx_glr_download_tutorials_mvdr_tutorial.py
sphx_glr_download_tutorials_nvdec_tutorial.py
sphx_glr_download_tutorials_nvenc_tutorial.py
sphx_glr_download_tutorials_online_asr_tutorial.py
sphx_glr_download_tutorials_oscillator_tutorial.py
sphx_glr_download_tutorials_speech_recognition_pipeline_tutorial.py
sphx_glr_download_tutorials_squim_tutorial.py
sphx_glr_download_tutorials_streamreader_advanced_tutorial.py
sphx_glr_download_tutorials_streamreader_basic_tutorial.py
sphx_glr_download_tutorials_streamwriter_advanced.py
sphx_glr_download_tutorials_streamwriter_basic_tutorial.py
sphx_glr_download_tutorials_subtractive_synthesis_tutorial.py
sphx_glr_download_tutorials_tacotron2_pipeline_tutorial.py
stream
stream
structtorio_1_1io_1_1Chunk
structtorio_1_1io_1_1Chunk_1a6b4f6a8b707901b3b10ef2d7df2e3e4d
structtorio_1_1io_1_1Chunk_1a9d15ed58a3e748508e5ac9b2f39f5be5
structtorio_1_1io_1_1OutputStreamInfo
structtorio_1_1io_1_1OutputStreamInfo_1a27a775f9402919c3e48a3bc63cd19414
structtorio_1_1io_1_1OutputStreamInfo_1a28f3b8e320a2d620b132acf3cff24715
structtorio_1_1io_1_1OutputStreamInfo_1a41688027e3c680fc3e43f2bc5834d745
structtorio_1_1io_1_1OutputStreamInfo_1a5fbe6e8d7efede48e86d5fa52c82f0b0
structtorio_1_1io_1_1OutputStreamInfo_1a77441472f664aae0434bc2c332f6790e
structtorio_1_1io_1_1OutputStreamInfo_1a7fa46a5796af9b560c93dd3688052b7a
structtorio_1_1io_1_1OutputStreamInfo_1abf0481abb45d23afe25806f149fdee44
structtorio_1_1io_1_1OutputStreamInfo_1af0796c137d1d62d61fb505b7ddc60a50
structtorio_1_1io_1_1OutputStreamInfo_1afb5bc0d1918e188b27d8273bd04511c7
structtorio_1_1io_1_1SrcStreamInfo
structtorio_1_1io_1_1SrcStreamInfo_1a0c1c2812bbd202ca42913d6b6b81b597
structtorio_1_1io_1_1SrcStreamInfo_1a15fb5310df582da99644c0cfe9cda926
structtorio_1_1io_1_1SrcStreamInfo_1a1610ecb097b5769f2355b663956bda75
structtorio_1_1io_1_1SrcStreamInfo_1a7445147c98842da1e0a17a532a5a1882
structtorio_1_1io_1_1SrcStreamInfo_1a92f5b3eee0a9191ac9b941813e59a39f
structtorio_1_1io_1_1SrcStreamInfo_1ab395c00d4a21aada8eaf9a26219010e6
structtorio_1_1io_1_1SrcStreamInfo_1abacd6778fbed84e9813e2d0d12b2f6e9
structtorio_1_1io_1_1SrcStreamInfo_1abb1dbb8e70a976dd6b8e6960f4f36313
structtorio_1_1io_1_1SrcStreamInfo_1acff9793602878574a5b4bcb3e69b9b28
structtorio_1_1io_1_1SrcStreamInfo_1aedaf37cd17c18485f2f2dcc81d8fa1ca
structtorio_1_1io_1_1SrcStreamInfo_1af513beba4aa8a4d938340b1155436e2d
structtorio_1_1io_1_1SrcStreamInfo_1af63a840ef28286d0c8a72dfa79d2093b
structtorio_1_1io_1_1SrcStreamInfo_1afbb856b739941b2c7baf41f68d888767
symbols
tempo
tokens
torchaudio
torchaudio
torchaudio.compliance.kaldi
torchaudio.compliance.kaldi
torchaudio.compliance.kaldi.fbank
torchaudio.compliance.kaldi.fbank
torchaudio.compliance.kaldi.mfcc
torchaudio.compliance.kaldi.mfcc
torchaudio.compliance.kaldi.spectrogram
torchaudio.compliance.kaldi.spectrogram
torchaudio.datasets
torchaudio.datasets
torchaudio.functional
torchaudio.functional
torchaudio.functional.DB_to_amplitude
torchaudio.functional.DB_to_amplitude
torchaudio.functional.add_noise
torchaudio.functional.add_noise
torchaudio.functional.allpass_biquad
torchaudio.functional.allpass_biquad
torchaudio.functional.amplitude_to_DB
torchaudio.functional.amplitude_to_DB
torchaudio.functional.apply_beamforming
torchaudio.functional.apply_beamforming
torchaudio.functional.apply_codec
torchaudio.functional.apply_codec
torchaudio.functional.band_biquad
torchaudio.functional.band_biquad
torchaudio.functional.bandpass_biquad
torchaudio.functional.bandpass_biquad
torchaudio.functional.bandreject_biquad
torchaudio.functional.bandreject_biquad
torchaudio.functional.bass_biquad
torchaudio.functional.bass_biquad
torchaudio.functional.biquad
torchaudio.functional.biquad
torchaudio.functional.compute_deltas
torchaudio.functional.compute_deltas
torchaudio.functional.contrast
torchaudio.functional.contrast
torchaudio.functional.convolve
torchaudio.functional.convolve
torchaudio.functional.create_dct
torchaudio.functional.create_dct
torchaudio.functional.dcshift
torchaudio.functional.dcshift
torchaudio.functional.deemph_biquad
torchaudio.functional.deemph_biquad
torchaudio.functional.deemphasis
torchaudio.functional.deemphasis
torchaudio.functional.detect_pitch_frequency
torchaudio.functional.detect_pitch_frequency
torchaudio.functional.dither
torchaudio.functional.dither
torchaudio.functional.edit_distance
torchaudio.functional.edit_distance
torchaudio.functional.equalizer_biquad
torchaudio.functional.equalizer_biquad
torchaudio.functional.fftconvolve
torchaudio.functional.fftconvolve
torchaudio.functional.filtfilt
torchaudio.functional.filtfilt
torchaudio.functional.flanger
torchaudio.functional.flanger
torchaudio.functional.forced_align
torchaudio.functional.forced_align
torchaudio.functional.frechet_distance
torchaudio.functional.frechet_distance
torchaudio.functional.gain
torchaudio.functional.gain
torchaudio.functional.griffinlim
torchaudio.functional.griffinlim
torchaudio.functional.highpass_biquad
torchaudio.functional.highpass_biquad
torchaudio.functional.inverse_spectrogram
torchaudio.functional.inverse_spectrogram
torchaudio.functional.lfilter
torchaudio.functional.lfilter
torchaudio.functional.linear_fbanks
torchaudio.functional.linear_fbanks
torchaudio.functional.loudness
torchaudio.functional.loudness
torchaudio.functional.lowpass_biquad
torchaudio.functional.lowpass_biquad
torchaudio.functional.mask_along_axis
torchaudio.functional.mask_along_axis
torchaudio.functional.mask_along_axis_iid
torchaudio.functional.mask_along_axis_iid
torchaudio.functional.melscale_fbanks
torchaudio.functional.melscale_fbanks
torchaudio.functional.merge_tokens
torchaudio.functional.merge_tokens
torchaudio.functional.mu_law_decoding
torchaudio.functional.mu_law_decoding
torchaudio.functional.mu_law_encoding
torchaudio.functional.mu_law_encoding
torchaudio.functional.mvdr_weights_rtf
torchaudio.functional.mvdr_weights_rtf
torchaudio.functional.mvdr_weights_souden
torchaudio.functional.mvdr_weights_souden
torchaudio.functional.overdrive
torchaudio.functional.overdrive
torchaudio.functional.phase_vocoder
torchaudio.functional.phase_vocoder
torchaudio.functional.phaser
torchaudio.functional.phaser
torchaudio.functional.pitch_shift
torchaudio.functional.pitch_shift
torchaudio.functional.preemphasis
torchaudio.functional.preemphasis
torchaudio.functional.psd
torchaudio.functional.psd
torchaudio.functional.resample
torchaudio.functional.resample
torchaudio.functional.riaa_biquad
torchaudio.functional.riaa_biquad
torchaudio.functional.rnnt_loss
torchaudio.functional.rnnt_loss
torchaudio.functional.rtf_evd
torchaudio.functional.rtf_evd
torchaudio.functional.rtf_power
torchaudio.functional.rtf_power
torchaudio.functional.sliding_window_cmn
torchaudio.functional.sliding_window_cmn
torchaudio.functional.spectral_centroid
torchaudio.functional.spectral_centroid
torchaudio.functional.spectrogram
torchaudio.functional.spectrogram
torchaudio.functional.speed
torchaudio.functional.speed
torchaudio.functional.treble_biquad
torchaudio.functional.treble_biquad
torchaudio.functional.vad
torchaudio.functional.vad
torchaudio.info
torchaudio.info
torchaudio.io
torchaudio.io
torchaudio.kaldi_io
torchaudio.list_audio_backends
torchaudio.list_audio_backends
torchaudio.load
torchaudio.load
torchaudio.models
torchaudio.models
torchaudio.models.conv_tasnet_base
torchaudio.models.conv_tasnet_base
torchaudio.models.decoder
torchaudio.models.decoder
torchaudio.models.emformer_rnnt_base
torchaudio.models.emformer_rnnt_base
torchaudio.models.emformer_rnnt_model
torchaudio.models.emformer_rnnt_model
torchaudio.models.hdemucs_high
torchaudio.models.hdemucs_high
torchaudio.models.hdemucs_low
torchaudio.models.hdemucs_low
torchaudio.models.hdemucs_medium
torchaudio.models.hdemucs_medium
torchaudio.models.hubert_base
torchaudio.models.hubert_base
torchaudio.models.hubert_large
torchaudio.models.hubert_large
torchaudio.models.hubert_pretrain_base
torchaudio.models.hubert_pretrain_base
torchaudio.models.hubert_pretrain_large
torchaudio.models.hubert_pretrain_large
torchaudio.models.hubert_pretrain_model
torchaudio.models.hubert_pretrain_model
torchaudio.models.hubert_pretrain_xlarge
torchaudio.models.hubert_pretrain_xlarge
torchaudio.models.hubert_xlarge
torchaudio.models.hubert_xlarge
torchaudio.models.squim_objective_base
torchaudio.models.squim_objective_base
torchaudio.models.squim_objective_model
torchaudio.models.squim_objective_model
torchaudio.models.squim_subjective_base
torchaudio.models.squim_subjective_base
torchaudio.models.squim_subjective_model
torchaudio.models.squim_subjective_model
torchaudio.models.wav2vec2.utils.import_fairseq_model
torchaudio.models.wav2vec2.utils.import_fairseq_model
torchaudio.models.wav2vec2.utils.import_huggingface_model
torchaudio.models.wav2vec2.utils.import_huggingface_model
torchaudio.models.wav2vec2_base
torchaudio.models.wav2vec2_base
torchaudio.models.wav2vec2_large
torchaudio.models.wav2vec2_large
torchaudio.models.wav2vec2_large_lv60k
torchaudio.models.wav2vec2_large_lv60k
torchaudio.models.wav2vec2_model
torchaudio.models.wav2vec2_model
torchaudio.models.wav2vec2_xlsr_1b
torchaudio.models.wav2vec2_xlsr_1b
torchaudio.models.wav2vec2_xlsr_2b
torchaudio.models.wav2vec2_xlsr_2b
torchaudio.models.wav2vec2_xlsr_300m
torchaudio.models.wav2vec2_xlsr_300m
torchaudio.models.wavlm_base
torchaudio.models.wavlm_base
torchaudio.models.wavlm_large
torchaudio.models.wavlm_large
torchaudio.models.wavlm_model
torchaudio.models.wavlm_model
torchaudio.pipelines
torchaudio.pipelines
torchaudio.prototype
torchaudio.prototype
torchaudio.prototype.datasets
torchaudio.prototype.datasets
torchaudio.prototype.functional
torchaudio.prototype.functional.adsr_envelope
torchaudio.prototype.functional.adsr_envelope
torchaudio.prototype.functional.barkscale_fbanks
torchaudio.prototype.functional.barkscale_fbanks
torchaudio.prototype.functional.chroma_filterbank
torchaudio.prototype.functional.chroma_filterbank
torchaudio.prototype.functional.extend_pitch
torchaudio.prototype.functional.extend_pitch
torchaudio.prototype.functional.filter_waveform
torchaudio.prototype.functional.filter_waveform
torchaudio.prototype.functional.frequency_impulse_response
torchaudio.prototype.functional.frequency_impulse_response
torchaudio.prototype.functional.oscillator_bank
torchaudio.prototype.functional.oscillator_bank
torchaudio.prototype.functional.ray_tracing
torchaudio.prototype.functional.ray_tracing
torchaudio.prototype.functional.simulate_rir_ism
torchaudio.prototype.functional.simulate_rir_ism
torchaudio.prototype.functional.sinc_impulse_response
torchaudio.prototype.functional.sinc_impulse_response
torchaudio.prototype.models
torchaudio.prototype.models.conformer_rnnt_base
torchaudio.prototype.models.conformer_rnnt_base
torchaudio.prototype.models.conformer_rnnt_model
torchaudio.prototype.models.conformer_rnnt_model
torchaudio.prototype.models.conformer_wav2vec2_base
torchaudio.prototype.models.conformer_wav2vec2_base
torchaudio.prototype.models.conformer_wav2vec2_model
torchaudio.prototype.models.conformer_wav2vec2_model
torchaudio.prototype.models.conformer_wav2vec2_pretrain_base
torchaudio.prototype.models.conformer_wav2vec2_pretrain_base
torchaudio.prototype.models.conformer_wav2vec2_pretrain_large
torchaudio.prototype.models.conformer_wav2vec2_pretrain_large
torchaudio.prototype.models.conformer_wav2vec2_pretrain_model
torchaudio.prototype.models.conformer_wav2vec2_pretrain_model
torchaudio.prototype.models.emformer_hubert_base
torchaudio.prototype.models.emformer_hubert_base
torchaudio.prototype.models.emformer_hubert_model
torchaudio.prototype.models.emformer_hubert_model
torchaudio.prototype.models.hifigan_vocoder
torchaudio.prototype.models.hifigan_vocoder
torchaudio.prototype.models.hifigan_vocoder_v1
torchaudio.prototype.models.hifigan_vocoder_v1
torchaudio.prototype.models.hifigan_vocoder_v2
torchaudio.prototype.models.hifigan_vocoder_v2
torchaudio.prototype.models.hifigan_vocoder_v3
torchaudio.prototype.models.hifigan_vocoder_v3
torchaudio.prototype.pipelines
torchaudio.prototype.transforms
torchaudio.prototype.transforms
torchaudio.save
torchaudio.save
torchaudio.sox_effects
torchaudio.sox_effects
torchaudio.sox_effects.apply_effects_file
torchaudio.sox_effects.apply_effects_file
torchaudio.sox_effects.apply_effects_tensor
torchaudio.sox_effects.apply_effects_tensor
torchaudio.sox_effects.effect_names
torchaudio.sox_effects.effect_names
torchaudio.transforms
torchaudio.transforms
torchaudio.utils
torchaudio.utils
torio
torio
torio.io
torio.io
torio.utils
torio.utils
torio::io::StreamingMediaDecoder
torio::io::StreamingMediaDecoder
torio::io::StreamingMediaEncoder
torio::io::StreamingMediaEncoder
transcribe
transcribe_streaming
tremolo
vibrato
wav2vec 2.0 / HuBERT - Fine-tuned ASR
wav2vec 2.0 / HuBERT - Forced Alignment
wav2vec 2.0 / HuBERT / WavLM - SSL
write_audio_chunk
write_audio_chunk
write_video_chunk
write_video_chunk
Values
torchaudio.models.Hypothesis
torchaudio.pipelines.CONVTASNET_BASE_LIBRI2MIX
torchaudio.pipelines.EMFORMER_RNNT_BASE_LIBRISPEECH
torchaudio.pipelines.HDEMUCS_HIGH_MUSDB
torchaudio.pipelines.HDEMUCS_HIGH_MUSDB_PLUS
torchaudio.pipelines.HUBERT_ASR_LARGE
torchaudio.pipelines.HUBERT_ASR_XLARGE
torchaudio.pipelines.HUBERT_BASE
torchaudio.pipelines.HUBERT_LARGE
torchaudio.pipelines.HUBERT_XLARGE
torchaudio.pipelines.MMS_FA
torchaudio.pipelines.SQUIM_OBJECTIVE
torchaudio.pipelines.SQUIM_SUBJECTIVE
torchaudio.pipelines.TACOTRON2_GRIFFINLIM_CHAR_LJSPEECH
torchaudio.pipelines.TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH
torchaudio.pipelines.TACOTRON2_WAVERNN_CHAR_LJSPEECH
torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
torchaudio.pipelines.VOXPOPULI_ASR_BASE_10K_DE
torchaudio.pipelines.VOXPOPULI_ASR_BASE_10K_EN
torchaudio.pipelines.VOXPOPULI_ASR_BASE_10K_ES
torchaudio.pipelines.VOXPOPULI_ASR_BASE_10K_FR
torchaudio.pipelines.VOXPOPULI_ASR_BASE_10K_IT
torchaudio.pipelines.WAV2VEC2_ASR_BASE_100H
torchaudio.pipelines.WAV2VEC2_ASR_BASE_10M
torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H
torchaudio.pipelines.WAV2VEC2_ASR_LARGE_100H
torchaudio.pipelines.WAV2VEC2_ASR_LARGE_10M
torchaudio.pipelines.WAV2VEC2_ASR_LARGE_960H
torchaudio.pipelines.WAV2VEC2_ASR_LARGE_LV60K_100H
torchaudio.pipelines.WAV2VEC2_ASR_LARGE_LV60K_10M
torchaudio.pipelines.WAV2VEC2_ASR_LARGE_LV60K_960H
torchaudio.pipelines.WAV2VEC2_BASE
torchaudio.pipelines.WAV2VEC2_LARGE
torchaudio.pipelines.WAV2VEC2_LARGE_LV60K
torchaudio.pipelines.WAV2VEC2_XLSR53
torchaudio.pipelines.WAV2VEC2_XLSR_1B
torchaudio.pipelines.WAV2VEC2_XLSR_2B
torchaudio.pipelines.WAV2VEC2_XLSR_300M
torchaudio.pipelines.WAVLM_BASE
torchaudio.pipelines.WAVLM_BASE_PLUS
torchaudio.pipelines.WAVLM_LARGE
torchaudio.prototype.pipelines.EMFORMER_RNNT_BASE_MUSTC
torchaudio.prototype.pipelines.EMFORMER_RNNT_BASE_TEDLIUM3
torchaudio.prototype.pipelines.HIFIGAN_VOCODER_V3_LJSPEECH
torchaudio.prototype.pipelines.VGGISH