RVC-Project · aruncbcinc · Dec 8, 2025
diff --git a/Dockerfile.mac b/Dockerfile.mac
@@ -0,0 +1,37 @@
+FROM python:3.10-slim
+
+EXPOSE 7865
+
+WORKDIR /app
+
+RUN apt-get update && \
+    apt-get install -y -qq ffmpeg aria2 git build-essential && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+
+COPY requirements.txt .
+
+RUN pip install --upgrade "pip<24.1" && \
+    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \
+    pip install --no-cache-dir -r requirements.txt && \
+    pip install fairseq==0.12.2 && \
+    pip install gradio==3.34.0 gradio-client==0.2.7
+
+COPY . .
+
+
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d assets/pretrained_v2/ -o D40k.pth && \
+    aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d assets/pretrained_v2/ -o G40k.pth && \
+    aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -d assets/pretrained_v2/ -o f0D40k.pth && \
+    aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth -d assets/pretrained_v2/ -o f0G40k.pth
+
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d assets/hubert -o hubert_base.pt && \
+    aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt -d assets/rmvpe -o rmvpe.pt
+
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-%E4%BA%BA%E5%A3%B0vocals%2B%E9%9D%9E%E4%BA%BA%E5%A3%B0instrumentals.pth" -d assets/uvr5_weights/ -o "HP2-人声vocals+非人声instrumentals.pth" && \
+    aria2c --console-log-level=error -c -x 16 -s 16 -k 1M "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-%E4%B8%BB%E6%97%8B%E5%BE%8B%E4%BA%BA%E5%A3%B0vocals%2B%E5%85%B6%E4%BB%96instrumentals.pth" -d assets/uvr5_weights/ -o "HP5-主旋律人声vocals+其他instrumentals.pth"
+
+VOLUME [ "/app/weights", "/app/logs", "/app/assets/weights" ]
+
+CMD ["python", "infer-web.py"]
diff --git a/configs/config.py b/configs/config.py
@@ -167,7 +167,8 @@ def device_config(self) -> tuple:
                 self.preprocess_per = 3.0
         elif self.has_mps():
             logger.info("No supported Nvidia GPU found")
-            self.device = self.instead = "mps"
+            logger.info("MPS available but using CPU for stability")
+            self.device = self.instead = "cpu"
             self.is_half = False
             self.use_fp32_config()
         else:

diff --git a/datasets/README.md b/datasets/README.md
@@ -0,0 +1,36 @@
+# Voice Datasets
+
+This directory contains the audio datasets for training custom RVC models.
+
+## Structure
+
+Each subdirectory corresponds to a specific voice type:
+
+- `male_low/`: Bass/Baritone male voices
+- `male_mid/`: Tenor/Mid-range male voices
+- `female_low/`: Alto/Contralto female voices
+- `female_high/`: Soprano/High-range female voices
+- `anime_airy/`: Breath/Airy anime-style voices
+- `accent_non_native/`: Voices with distinct non-native accents
+- `singing_male/`: Male singing vocals
+- `singing_female/`: Female singing vocals
+- `child/`: Child voices
+- `elderly/`: Elderly voices
+
+## How to Add Data
+
+1.  **Collect Audio**: Gather 10-15 minutes of clean, single-speaker audio for the desired category.
+2.  **Place Files**: Put the raw audio files (mp3, wav, etc.) into a temporary folder or directly here.
+3.  **Process**: Use the provided tool to normalize and split the audio.
+
+```bash
+# Example: Processing a raw file into the male_low dataset
+python tools/audio_preprocessor.py -i raw_audio/my_voice.mp3 -o datasets/male_low
+```
+
+## Requirements
+
+- **Format**: WAV (will be converted automatically)
+- **Sample Rate**: 40kHz or 48kHz (will be converted automatically)
+- **Channels**: Mono (will be converted automatically)
+- **Quality**: No background noise, music, or reverb. Use UVR5 to clean if necessary.
diff --git a/docker-compose.mac.yml b/docker-compose.mac.yml
@@ -0,0 +1,18 @@
+version: '3.8'
+
+services:
+  rvc-webui:
+    build:
+      context: .
+      dockerfile: Dockerfile.mac
+    ports:
+      - "7865:7865"
+    volumes:
+      - ./weights:/app/weights
+      - ./logs:/app/logs
+      - ./assets/weights:/app/assets/weights
+      - ./datasets:/app/datasets
+    environment:
+      - PYTHONUNBUFFERED=1
+    restart: unless-stopped
+    platform: linux/amd64
diff --git a/experiments/output_test.wav b/experiments/output_test.wav
diff --git a/experiments/voice1_to_voice2.wav b/experiments/voice1_to_voice2.wav
diff --git a/infer-web.py b/infer-web.py
@@ -114,6 +114,11 @@ def forward_dml(ctx, x, scale):
 if if_gpu_ok and len(gpu_infos) > 0:
     gpu_info = "\n".join(gpu_infos)
     default_batch_size = min(mem) // 2
+elif torch.backends.mps.is_available():
+    if_gpu_ok = True
+    gpu_infos.append("0\tApple Silicon MPS")
+    gpu_info = "Apple Silicon MPS detected"
+    default_batch_size = 4
 else:
     gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
     default_batch_size = 1
@@ -220,6 +225,14 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
     os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
     f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
     f.close()
+
+    # Verify trainset_dir exists
+    if not os.path.exists(trainset_dir):
+        error_msg = f"Training folder does not exist: {trainset_dir}"
+        logger.error(error_msg)
+        yield error_msg
+        return
+
     cmd = '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' % (
         config.python_cmd,
         trainset_dir,
@@ -231,8 +244,19 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
         config.preprocess_per,
     )
     logger.info("Execute: " + cmd)
-    # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
-    p = Popen(cmd, shell=True)
+    print(f"Starting preprocessing: {cmd}")
+    # Use shell=False with proper argument list for better reliability
+    cmd_args = [
+        config.python_cmd,
+        "infer/modules/train/preprocess.py",
+        trainset_dir,
+        str(sr),
+        str(n_p),
+        f"{now_dir}/logs/{exp_dir}",
+        str(config.noparallel),
+        str(config.preprocess_per),
+    ]
+    p = Popen(cmd_args, cwd=now_dir)
     # 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
     done = [False]
     threading.Thread(