Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions bark_infinity/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,8 @@ def generate_text_semantic(
tokenizer = model_container["tokenizer"]
encoded_text = np.array(_tokenize(tokenizer, text)) + TEXT_ENCODING_OFFSET
if OFFLOAD_CPU:
device = _grab_best_device(use_gpu=False)
models_devices["text"] = device
model.to(models_devices["text"])
device = next(model.parameters()).device
if len(encoded_text) > 256:
Expand Down Expand Up @@ -551,6 +553,8 @@ def generate_coarse(
preload_models()
model = models["coarse"]
if OFFLOAD_CPU:
device = _grab_best_device(use_gpu=False)
models_devices["coarse"] = device
model.to(models_devices["coarse"])
device = next(model.parameters()).device
# start loop
Expand Down Expand Up @@ -692,6 +696,8 @@ def generate_fine(
preload_models()
model = models["fine"]
if OFFLOAD_CPU:
device = _grab_best_device(use_gpu=False)
models_devices["fine"] = device
model.to(models_devices["fine"])
device = next(model.parameters()).device
# make input arr
Expand Down Expand Up @@ -797,6 +803,8 @@ def codec_decode(fine_tokens):
preload_models()
model = models["codec"]
if OFFLOAD_CPU:
device = _grab_best_device(use_gpu=False)
models_devices["codec"] = device
model.to(models_devices["codec"])
device = next(model.parameters()).device
arr = torch.from_numpy(fine_tokens)[None]
Expand Down
41 changes: 39 additions & 2 deletions bark_webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,44 @@ def generate_audio_long_gradio_clones(input, audio_prompt_input, bark_speaker_as

output_dir = f"cloned_voices/{output_voice}_samples"

return generate_audio_long_gradio(input, audio_prompt_input, bark_speaker_as_the_prompt, npz_dropdown, generated_voices, cloned_voices, bark_infinity_voices, confused_travolta_mode, stable_mode_interval, seperate_prompts, seperate_prompts_flipper, split_character_goal_length, split_character_max_length, process_text_by_each, in_groups_of_size, group_text_by_counting, split_type_string, prompt_text_prefix, seed, text_splits_only,output_iterations,hoarder_mode, text_temp, waveform_temp, semantic_min_eos_p, output_dir, output_filename, output_format, add_silence_between_segments, semantic_top_k, semantic_top_p, coarse_top_k, coarse_top_p, specific_npz_file, specific_npz_folder, split_character_jitter, extra_args_str, progress=gr.Progress(track_tqdm=True))
return generate_audio_long_gradio(
input,
audio_prompt_input,
bark_speaker_as_the_prompt,
npz_dropdown,
generated_voices,
cloned_voices,
bark_infinity_voices,
confused_travolta_mode,
stable_mode_interval,
seperate_prompts,
seperate_prompts_flipper,
split_character_goal_length,
split_character_max_length,
process_text_by_each,
in_groups_of_size,
group_text_by_counting,
split_type_string,
prompt_text_prefix,
seed,
text_splits_only,
output_iterations,hoarder_mode,
text_temp,
waveform_temp,
semantic_min_eos_p,
output_dir,
output_filename,
output_format,
add_silence_between_segments,
semantic_top_k,
semantic_top_p,
coarse_top_k,
coarse_top_p,
specific_npz_file,
specific_npz_folder,
split_character_jitter,
extra_args_str,
progress=gr.Progress(track_tqdm=True))

def create_npz_dropdown_dir(directories, label):
npz_files_by_subfolder = defaultdict(list)
Expand Down Expand Up @@ -1213,7 +1250,7 @@ def clear_logs():



clone_voice_button.click(clone_voice_gradio, inputs=[input_audio_filename, input_audio_filename_secondary, semantic_step_interval, output_voice, create_samples_for_clones, even_more_clones], outputs=dummy).success(generate_audio_long_gradio_clones,inputs=[input, audio_prompt_input, bark_speaker_as_the_prompt, npz_dropdown, generated_voices, cloned_voices, bark_infinity_voices, confused_travolta_mode,stable_mode_interval,seperate_prompts, seperate_prompts_flipper, split_character_goal_length,split_character_max_length, process_text_by_each, in_groups_of_size, group_text_by_counting, split_type_string, prompt_text_prefix, seed, text_splits_only, output_iterations, hoarder_mode, text_temp, waveform_temp,semantic_min_eos_p, output_dir, output_filename, output_format, add_silence_between_segments, semantic_top_k, semantic_top_p, coarse_top_k, coarse_top_p, specific_npz_file, dummy, split_character_jitter, extra_args_input, output_voice], outputs=[audio_output])
clone_voice_button.click(clone_voice_gradio, inputs=[input_audio_filename, input_audio_filename_secondary, semantic_step_interval, output_voice, create_samples_for_clones, even_more_clones], outputs=dummy).success(generate_audio_long_gradio_clones,inputs=[input, audio_prompt_input, bark_speaker_as_the_prompt, npz_dropdown, generated_voices, cloned_voices, bark_infinity_voices, confused_travolta_mode,stable_mode_interval,seperate_prompts, seperate_prompts_flipper, split_character_goal_length,split_character_max_length, process_text_by_each, in_groups_of_size, group_text_by_counting, split_type_string, prompt_text_prefix, seed, text_splits_only, output_iterations, hoarder_mode, text_temp, waveform_temp,semantic_min_eos_p, output_dir, output_filename, output_format, add_silence_between_segments, semantic_top_k, semantic_top_p, coarse_top_k, coarse_top_p, specific_npz_file, specific_npz_folder, split_character_jitter, extra_args_input, output_voice], outputs=[audio_output])


cancel_button.click(fn=try_to_cancel, inputs=model_checkboxes, outputs=None, cancels=[generate_event])
Expand Down