Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions contentvec/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@
from .numel_dataset import NumelDataset
from .num_samples_dataset import NumSamplesDataset
from .offset_tokens_dataset import OffsetTokensDataset
from .padding_mask_dataset import (
LeftPaddingMaskDataset,
PaddingMaskDataset,
RightPaddingMaskDataset,
)
from .pad_dataset import LeftPadDataset, PadDataset, RightPadDataset
from .prepend_dataset import PrependDataset
from .prepend_token_dataset import PrependTokenDataset
Expand All @@ -49,6 +54,7 @@
from .roll_dataset import RollDataset
from .round_robin_zip_datasets import RoundRobinZipDatasets
from .sort_dataset import SortDataset
from .speech_dlm_dataset import SpeechDLMDataset
from .strip_token_dataset import StripTokenDataset
from .subsample_dataset import SubsampleDataset
from .token_block_dataset import TokenBlockDataset
Expand All @@ -58,6 +64,7 @@
from .multilingual.sampled_multi_dataset import SampledMultiDataset
from .multilingual.sampled_multi_epoch_dataset import SampledMultiEpochDataset
from .fasta_dataset import FastaDataset, EncodedFastaDataset
from .transform_eos_concat_langpair_dataset import TransformEosConcatLangPairDataset

from .iterators import (
CountingIterator,
Expand Down Expand Up @@ -120,11 +127,13 @@
"SampledMultiEpochDataset",
"ShardedIterator",
"SortDataset",
"SpeechDLMDataset",
"StripTokenDataset",
"SubsampleDataset",
"TokenBlockDataset",
"TransformEosDataset",
"TransformEosLangPairDataset",
"TransformEosConcatLangPairDataset",
"TruncateDataset",
"TruncatedDictionary",
]
1 change: 1 addition & 0 deletions contentvec/models/hubert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from .hubert import * # noqa
from .contentvec import * # noqa
from .hubert_asr import * # noqa
1 change: 1 addition & 0 deletions contentvec/models/wav2vec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@
from .wav2vec import * # noqa
from .wav2vec2 import * # noqa
from .wav2vec2_asr import * # noqa
from .wav2vec2_laser import * # noqa
from .wav2vec2_1 import *
30 changes: 27 additions & 3 deletions contentvec/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@
from .conv_tbc import ConvTBC
from .cross_entropy import cross_entropy
from .downsampled_multihead_attention import DownsampledMultiHeadAttention
from .dynamic_convolution import DynamicConv, DynamicConv1dTBC
from .dynamic_convolution import DynamicConv, DynamicConv1dTBC, DynamicConv_scripatable
from .dynamic_crf_layer import DynamicCRF
from .ema_module import EMAModuleConfig, EMAModule
from .fairseq_dropout import FairseqDropout
from .fp32_batch_norm import Fp32BatchNorm
from .fp32_group_norm import Fp32GroupNorm, GroupNormMasked
from .fp32_instance_norm import Fp32InstanceNorm
from .gelu import gelu, gelu_accurate
from .grad_multiply import GradMultiply
from .gumbel_vector_quantizer import GumbelVectorQuantizer
Expand All @@ -30,7 +33,7 @@
from .lstm_cell_with_zoneout import LSTMCellWithZoneOut
from .multihead_attention import MultiheadAttention
from .positional_embedding import PositionalEmbedding
from .same_pad import SamePad
from .same_pad import SamePad, SamePad2d
from .scalar_bias import ScalarBias
from .sinusoidal_positional_embedding import SinusoidalPositionalEmbedding
from .transformer_sentence_encoder_layer import TransformerSentenceEncoderLayer
Expand All @@ -39,6 +42,15 @@
from .unfold import unfold1d
from .transformer_layer import TransformerDecoderLayer, TransformerEncoderLayer
from .vggblock import VGGBlock
from .espnet_multihead_attention import (
ESPNETMultiHeadedAttention,
RelPositionMultiHeadedAttention,
RotaryPositionMultiHeadedAttention,
)
from .rotary_positional_embedding import RotaryPositionalEmbedding
from .positional_encoding import (
RelPositionalEncoding,
)

__all__ = [
"AdaptiveInput",
Expand All @@ -51,10 +63,15 @@
"DownsampledMultiHeadAttention",
"DynamicConv1dTBC",
"DynamicConv",
"DynamicConv_scripatable",
"DynamicCRF",
"EMAModule",
"EMAModuleConfig",
"FairseqDropout",
"Fp32BatchNorm",
"Fp32GroupNorm",
"Fp32LayerNorm",
"Fp32InstanceNorm",
"gelu",
"gelu_accurate",
"GradMultiply",
Expand All @@ -73,6 +90,7 @@
"MultiheadAttention",
"PositionalEmbedding",
"SamePad",
"SamePad2d",
"ScalarBias",
"SinusoidalPositionalEmbedding",
"TransformerSentenceEncoderLayer",
Expand All @@ -82,4 +100,10 @@
"TransposeLast",
"VGGBlock",
"unfold1d",
]
"ESPNETMultiheadedAttention",
"PositionalEmbedding",
"RelPositionMultiHeadedAttention",
"RelPositionalEncoding",
"RotaryPositionalEmbedding",
"RotaryPositionMultiHeadedAttention",
]
2 changes: 1 addition & 1 deletion setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
fi
cd ${cwd}/fairseq
# checkout the fairseq version to use
git reset --hard 0b21875e45f332bedbcc0617dcf9379d3c03855f
git reset --hard 3f6ba43f07a6e9e2acf957fc24e57251a7a3f55c

if [ $(pip freeze | grep fairseq | wc -l ) -gt 0 ]; then
echo "Already installed fairseq. Skip..."
Expand Down