Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions Dockerfile.mac
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
FROM python:3.10-slim

EXPOSE 7865

WORKDIR /app

RUN apt-get update && \
apt-get install -y -qq ffmpeg aria2 git build-essential && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*


COPY requirements.txt .

RUN pip install --upgrade "pip<24.1" && \
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \
pip install --no-cache-dir -r requirements.txt && \
pip install fairseq==0.12.2 && \
pip install gradio==3.34.0 gradio-client==0.2.7

COPY . .


RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d assets/pretrained_v2/ -o D40k.pth && \
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d assets/pretrained_v2/ -o G40k.pth && \
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -d assets/pretrained_v2/ -o f0D40k.pth && \
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth -d assets/pretrained_v2/ -o f0G40k.pth

RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d assets/hubert -o hubert_base.pt && \
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt -d assets/rmvpe -o rmvpe.pt

RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-%E4%BA%BA%E5%A3%B0vocals%2B%E9%9D%9E%E4%BA%BA%E5%A3%B0instrumentals.pth" -d assets/uvr5_weights/ -o "HP2-人声vocals+非人声instrumentals.pth" && \
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-%E4%B8%BB%E6%97%8B%E5%BE%8B%E4%BA%BA%E5%A3%B0vocals%2B%E5%85%B6%E4%BB%96instrumentals.pth" -d assets/uvr5_weights/ -o "HP5-主旋律人声vocals+其他instrumentals.pth"

VOLUME [ "/app/weights", "/app/logs", "/app/assets/weights" ]

CMD ["python", "infer-web.py"]
3 changes: 2 additions & 1 deletion configs/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ def device_config(self) -> tuple:
self.preprocess_per = 3.0
elif self.has_mps():
logger.info("No supported Nvidia GPU found")
self.device = self.instead = "mps"
logger.info("MPS available but using CPU for stability")
self.device = self.instead = "cpu"
self.is_half = False
self.use_fp32_config()
else:
Expand Down
36 changes: 36 additions & 0 deletions datasets/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Voice Datasets

This directory contains the audio datasets for training custom RVC models.

## Structure

Each subdirectory corresponds to a specific voice type:

- `male_low/`: Bass/Baritone male voices
- `male_mid/`: Tenor/Mid-range male voices
- `female_low/`: Alto/Contralto female voices
- `female_high/`: Soprano/High-range female voices
- `anime_airy/`: Breath/Airy anime-style voices
- `accent_non_native/`: Voices with distinct non-native accents
- `singing_male/`: Male singing vocals
- `singing_female/`: Female singing vocals
- `child/`: Child voices
- `elderly/`: Elderly voices

## How to Add Data

1. **Collect Audio**: Gather 10-15 minutes of clean, single-speaker audio for the desired category.
2. **Place Files**: Put the raw audio files (mp3, wav, etc.) into a temporary folder or directly here.
3. **Process**: Use the provided tool to normalize and split the audio.

```bash
# Example: Processing a raw file into the male_low dataset
python tools/audio_preprocessor.py -i raw_audio/my_voice.mp3 -o datasets/male_low
```

## Requirements

- **Format**: WAV (will be converted automatically)
- **Sample Rate**: 40kHz or 48kHz (will be converted automatically)
- **Channels**: Mono (will be converted automatically)
- **Quality**: No background noise, music, or reverb. Use UVR5 to clean if necessary.
18 changes: 18 additions & 0 deletions docker-compose.mac.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
version: '3.8'

services:
rvc-webui:
build:
context: .
dockerfile: Dockerfile.mac
ports:
- "7865:7865"
volumes:
- ./weights:/app/weights
- ./logs:/app/logs
- ./assets/weights:/app/assets/weights
- ./datasets:/app/datasets
environment:
- PYTHONUNBUFFERED=1
restart: unless-stopped
platform: linux/amd64
Binary file added experiments/output_test.wav
Binary file not shown.
Binary file added experiments/voice1_to_voice2.wav
Binary file not shown.
28 changes: 26 additions & 2 deletions infer-web.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ def forward_dml(ctx, x, scale):
if if_gpu_ok and len(gpu_infos) > 0:
gpu_info = "\n".join(gpu_infos)
default_batch_size = min(mem) // 2
elif torch.backends.mps.is_available():
if_gpu_ok = True
gpu_infos.append("0\tApple Silicon MPS")
gpu_info = "Apple Silicon MPS detected"
default_batch_size = 4
else:
gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
default_batch_size = 1
Expand Down Expand Up @@ -220,6 +225,14 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
f.close()

# Verify trainset_dir exists
if not os.path.exists(trainset_dir):
error_msg = f"Training folder does not exist: {trainset_dir}"
logger.error(error_msg)
yield error_msg
return

cmd = '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' % (
config.python_cmd,
trainset_dir,
Expand All @@ -231,8 +244,19 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
config.preprocess_per,
)
logger.info("Execute: " + cmd)
# , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
p = Popen(cmd, shell=True)
print(f"Starting preprocessing: {cmd}")
# Use shell=False with proper argument list for better reliability
cmd_args = [
config.python_cmd,
"infer/modules/train/preprocess.py",
trainset_dir,
str(sr),
str(n_p),
f"{now_dir}/logs/{exp_dir}",
str(config.noparallel),
str(config.preprocess_per),
]
p = Popen(cmd_args, cwd=now_dir)
# 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
done = [False]
threading.Thread(
Expand Down
Loading