Skip to content
230 changes: 229 additions & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,17 @@ def __init__(self):
self.noautoopen,
self.paperspace,
self.is_cli,
self.simple_cli,
self.simple_cli_args,
) = self.arg_parse()

self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()

@staticmethod
def arg_parse() -> tuple:
exe = sys.executable or "python"
parser = argparse.ArgumentParser()
subparser = parser.add_subparsers()
parser.add_argument("--port", type=int, default=7865, help="Listen port")
parser.add_argument("--pycmd", type=str, default=exe, help="Python command")
parser.add_argument("--colab", action="store_true", help="Launch in colab")
Expand All @@ -114,8 +117,231 @@ def arg_parse() -> tuple:
action="store_true",
help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!",
)
parser.add_argument( # Fork Feature. Embed a CLI into the infer-web.py
"--simple_cli", choices=["infer", "pre-process", "extract-feature", "train", "train-feature", "extract-model", "uvr", ""], default="", help="Use the simpler CLI instead of the cli interface. Choose from 1) pre-process 2) extract-feature 3) WIP."
)

# Arguments for simple cli usage.
parser.add_argument(
"--exp_name", type=str, default="mi-test", help="Experiment name"
)
parser.add_argument(
"--trainset_dir",
type=str,
default="",
help="Trainset directory",
)
parser.add_argument(
"--sample_rate", choices=["32k", "40k", "48k"], default="40k", help="Sample rate: 40k (32k, 40k, 48k)"
)
parser.add_argument(
"--n_workers", type=int, default=8, help="Number of cpu threads to work"
)
parser.add_argument(
"--gpu", type=int, default=0, help="GPU device index to use"
)
parser.add_argument(
"--is_pitch_guidance",
type=bool,
default=True,
help="Use pitch guidance (1 for True 0 for False)",
)
parser.add_argument(
"--f0_method",
type=str,
default="crepe",
help="F0 extraction method",
)
parser.add_argument(
"--crepe_hop_length",
type=int,
default=128,
help="Hop length for crepe",
)
parser.add_argument(
"--rvc_version",
choices=["v1", "v2"],
default="v2",
help="RVC version",
)
parser.add_argument(
"--speaker_id",
type=int,
default=0,
help="Speaker id for multi-speaker model",
)
parser.add_argument(
"--save_epoch_iter",
type=int,
default=5,
help="Save model every n iterations",
)
parser.add_argument(
"--epochs", type=int, default=20, help="Number of epochs to train"
)
parser.add_argument(
"--batch_size", type=int, default=8, help="Batch size for training"
)
parser.add_argument(
"--latest_ckpt_only",
type=bool,
default=False,
help="Save only the latest checkpoint",
)
parser.add_argument(
"--cache_trainset",
type=bool,
default=False,
help="Whether to cache training set to vram",
)
parser.add_argument(
"--save_small_model",
type=bool,
default=False,
help="Save extracted small model every generation?",
)

parser.add_argument(
"--model_file_name",
type=str,
default="",
help="Model name with .pth in ./weights",
)
parser.add_argument(
"--source_audio_path",
type=str,
default="",
help="Source audio path for inference",
)
parser.add_argument(
"--output_file_name",
type=str,
default="output.wav",
help="Output file name to be placed in './audio-outputs'",
)
parser.add_argument(
"--feature_index_path",
type=str,
default="",
help="Feature index file path",
)
parser.add_argument(
"--transposition",
type=int,
default=0,
help="Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12)",
)
parser.add_argument(
"--infer_f0_method",
type=str,
default="crepe",
help="F0 extraction method for inference",
)
parser.add_argument(
"--harvest_median_filter_radius",
type=int,
default=3,
help="Harvest median filter radius, default 3.",
)
parser.add_argument(
"--post_sample_rate",
type=int,
default=0,
help="Resample the output audio in post-processing to the final sample rate. Set to 0 for no resampling.",
)
parser.add_argument(
"--mix_volume_envelope",
type=float,
default=0.25,
help="Use the volume envelope of the input to replace or mix with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is used.",
)
parser.add_argument(
"--feature_index_ratio",
type=float,
default=0.33,
help="Feature index ratio for inference.",
)
parser.add_argument(
"--voiceless_consonant_protection",
type=float,
default=0.33,
help="Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music. Set to 0.5 to disable. Decrease the value to increase protection, but it may reduce indexing accuracy."
)
parser.add_argument(
"--model_path",
type=str,
default="",
help="Model path for extract-model",
)
parser.add_argument(
"--model_save_name",
type=str,
default="",
help="Model save name for extract-model",
)
parser.add_argument(
"--model_info",
type=str,
default="",
help="Model info for extract-model",
)
parser.add_argument(
"--cmd_help",
action="store_true",
help="Print help for simple cli",
)
# Add --agg and --format
parser.add_argument(
"--agg",
type=int,
default=10,
help="Aggregation for uvr5",
)
parser.add_argument(
"--format",
type=str,
default="flac",
help="Audio format",
)
parser.add_argument(
"--uvr5_weight_name",
type=str,
default="",
help="UVR5 weight name",
)
parser.add_argument(
"--formant_shift",
action="store_true",
help="Whether to formant shift the inference audio before conversion: False (if set to false, you can ignore setting the quefrency and timbre values for formanting)",
)
parser.add_argument(
"--formant_quefrency",
type=float,
default=8.0,
help="Quefrency for formanting: 8.0 (no need to set if arg14 is False/false)",
)
parser.add_argument(
"--formant_timbre",
type=float,
default=1.2,
help="Timbre for formanting: 1.2 (no need to set if arg14 is False/false)",
)

cmd_opts = parser.parse_args()

args_to_assign = ['exp_name', 'trainset_dir', 'sample_rate', 'n_workers', 'gpu',
'is_pitch_guidance', 'f0_method', 'crepe_hop_length', 'rvc_version',
'speaker_id', 'save_epoch_iter', 'epochs', 'batch_size',
'latest_ckpt_only', 'cache_trainset', 'save_small_model',
'model_file_name', 'source_audio_path', 'output_file_name',
'feature_index_path', 'transposition', 'infer_f0_method',
'harvest_median_filter_radius', 'post_sample_rate',
'mix_volume_envelope', 'feature_index_ratio',
'voiceless_consonant_protection', 'model_path',
'model_save_name', 'model_info', 'cmd_help', 'agg', 'format', 'uvr5_weight_name',
'formant_shift', 'formant_quefrency', 'formant_timbre']
simple_cli_args = argparse.Namespace(**{arg: getattr(cmd_opts, arg) for arg in args_to_assign})

cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865

return (
Expand All @@ -126,6 +352,8 @@ def arg_parse() -> tuple:
cmd_opts.noautoopen,
cmd_opts.paperspace,
cmd_opts.is_cli,
cmd_opts.simple_cli,
simple_cli_args,
)

# has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
Expand Down
Loading