Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 16 additions & 19 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ __pycache__/

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
/build/
/develop-eggs/
/dist/
/downloads/
/eggs/
/.eggs/
/lib/
/lib64/
/parts/
/sdist/
/var/
/wheels/
/share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
Expand Down Expand Up @@ -127,6 +127,8 @@ venv/
ENV/
env.bak/
venv.bak/
pyvenv.cfg
/Scripts/

# Spyder project settings
.spyderproject
Expand All @@ -142,6 +144,7 @@ venv.bak/
.mypy_cache/
.dmypy.json
dmypy.json
/share/man/man1/isympy.*

# Pyre type checker
.pyre/
Expand All @@ -152,9 +155,3 @@ dmypy.json
# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
14 changes: 0 additions & 14 deletions conf/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,3 @@ augment:

misc:
num_workers: 10














2 changes: 1 addition & 1 deletion scnet/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def apply_model(model, mix, shifts=1, split=True, segment=20, samplerate=44100,

Args:
shifts (int): if > 0, will shift in time `mix` by a random amount between 0 and 0.5 sec
and apply the oppositve shift to the output. This is repeated `shifts` time and
and apply the opposite shift to the output. This is repeated `shifts`-times and
all predictions are averaged. This effectively makes the model time equivariant
and improves SDR by up to 0.2 points.
split (bool): if True, the input will be broken down in 8 seconds extracts
Expand Down
28 changes: 25 additions & 3 deletions scnet/augment.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#From HT demucs https://github.com/facebookresearch/demucs/tree/release_v4?tab=readme-ov-file

import random
import torch as th
from torch import nn
Expand Down Expand Up @@ -62,7 +60,7 @@ def __init__(self, proba=1, group_size=4):
"""
Shuffle sources within one batch.
Each batch is divided into groups of size `group_size` and shuffling is done within
each group separatly. This allow to keep the same probability distribution no matter
each group separately. This allow to keep the same probability distribution no matter
the number of GPUs. Without this grouping, using more GPUs would lead to a higher
probability of keeping two sources from the same track together which can impact
performance.
Expand Down Expand Up @@ -102,3 +100,27 @@ def forward(self, wav):
scales = th.empty(batch, streams, 1, 1, device=device).uniform_(self.min, self.max)
wav *= scales
return wav

# Some or all of the work in this file may be restricted by the following copyright.
"""
MIT License

Copyright (c) Meta, Inc. and its affiliates.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. """
26 changes: 24 additions & 2 deletions scnet/ema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#From HT demucs https://github.com/facebookresearch/demucs/tree/release_v4?tab=readme-ov-file

from contextlib import contextmanager
import torch
from .utils import swap_state
Expand Down Expand Up @@ -57,3 +55,27 @@ def load_state_dict(self, state):
self.count = state['count']
for k, v in state['state'].items():
self.state[k].copy_(v)

# Some or all of the work in this file may be restricted by the following copyright.
"""
MIT License

Copyright (c) Meta, Inc. and its affiliates.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. """
7 changes: 3 additions & 4 deletions scnet/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
import yaml



class Seperator:
class Separator:
def __init__(self, model, checkpoint_path):
self.separator = load_model(model, checkpoint_path)

Expand Down Expand Up @@ -128,5 +127,5 @@ def parse_args():

model = SCNet(**config.model)
model.eval()
seperator = Seperator(model, args.checkpoint_path)
seperator.process_directory(args.input_dir, args.output_dir)
separator = Separator(model, args.checkpoint_path)
separator.process_directory(args.input_dir, args.output_dir)
1 change: 1 addition & 0 deletions scnet/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def main():
os.makedirs(args.save_path)

if not os.path.isfile(args.config_path):
import sys
print(f"Error: config file {args.config_path} does not exist.")
sys.exit(1)

Expand Down
31 changes: 25 additions & 6 deletions scnet/wav.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#From HT demucs https://github.com/facebookresearch/demucs/tree/release_v4?tab=readme-ov-file

from collections import OrderedDict
import hashlib
import math
Expand Down Expand Up @@ -104,13 +102,13 @@ def __init__(
metadata (dict): output from `build_metadata`.
sources (list[str]): list of source names.
segment (None or float): segment length in seconds. If `None`, returns entire tracks.
shift (None or float): stride in seconds bewteen samples.
shift (None or float): stride in seconds between samples.
normalize (bool): normalizes input audio, **based on the metadata content**,
i.e. the entire track is normalized, not individual extracts.
samplerate (int): target sample rate. if the file sample rate
is different, it will be resampled on the fly.
channels (int): target nb of channels. if different, will be
changed onthe fly.
changed on the fly.
ext (str): extension for audio files (default is .wav).

samplerate and channels are converted on the fly.
Expand Down Expand Up @@ -193,5 +191,26 @@ def get_wav_datasets(args):
normalize=args.normalize, **kw_cv)
return train_set, valid_set



# Some or all of the work in this file may be restricted by the following copyright.
"""
MIT License

Copyright (c) Meta, Inc. and its affiliates.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. """