Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ bool save_results(const SDCliParams& cli_params,
auto write_image = [&](const fs::path& path, int idx) {
const sd_image_t& img = results[idx];
if (!img.data)
return;
return false;

std::string params = get_image_params(cli_params, ctx_params, gen_params, gen_params.seed + idx);
int ok = 0;
Expand All @@ -419,8 +419,11 @@ bool save_results(const SDCliParams& cli_params,
ok = stbi_write_png(path.string().c_str(), img.width, img.height, img.channel, img.data, 0, params.c_str());
}
LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure");
return ok != 0;
};

int sucessful_reults = 0;

if (std::regex_search(cli_params.output_path, format_specifier_regex)) {
if (!is_jpg && ext_lower != ".png")
ext = ".png";
Expand All @@ -429,19 +432,26 @@ bool save_results(const SDCliParams& cli_params,

for (int i = 0; i < num_results; ++i) {
fs::path img_path = format_frame_idx(pattern.string(), output_begin_idx + i);
write_image(img_path, i);
if (write_image(img_path, i)) {
sucessful_reults++;
}
}
return true;
LOG_INFO("%d/%d images saved", sucessful_reults, num_results);
return sucessful_reults != 0;
}

if (cli_params.mode == VID_GEN && num_results > 1) {
if (ext_lower != ".avi")
ext = ".avi";
fs::path video_path = base_path;
video_path += ext;
create_mjpg_avi_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps);
LOG_INFO("save result MJPG AVI video to '%s'", video_path.string().c_str());
return true;
if (create_mjpg_avi_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps) == 0) {
LOG_INFO("save result MJPG AVI video to '%s'", video_path.string().c_str());
return true;
} else {
LOG_ERROR("Failed to save result MPG AVI video to '%s'", video_path.string().c_str());
return false;
}
}

if (!is_jpg && ext_lower != ".png")
Expand All @@ -453,10 +463,12 @@ bool save_results(const SDCliParams& cli_params,
img_path += "_" + std::to_string(output_begin_idx + i);
}
img_path += ext;
write_image(img_path, i);
if (write_image(img_path, i)) {
sucessful_reults++;
}
}

return true;
LOG_INFO("%d/%d images saved", sucessful_reults, num_results);
return sucessful_reults != 0;
}

int main(int argc, const char* argv[]) {
Expand Down
15 changes: 9 additions & 6 deletions ggml_extend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -778,7 +778,7 @@ __STATIC_INLINE__ ggml_tensor* ggml_ext_silu_act(ggml_context* ctx, ggml_tensor*
return x;
}

typedef std::function<void(ggml_tensor*, ggml_tensor*, bool)> on_tile_process;
typedef std::function<bool(ggml_tensor*, ggml_tensor*, bool)> on_tile_process;

__STATIC_INLINE__ void sd_tiling_calc_tiles(int& num_tiles_dim,
float& tile_overlap_factor_dim,
Expand Down Expand Up @@ -929,12 +929,15 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,

int64_t t1 = ggml_time_ms();
ggml_ext_tensor_split_2d(input, input_tile, x_in, y_in);
on_processing(input_tile, output_tile, false);
ggml_ext_tensor_merge_2d(output_tile, output, x_out, y_out, overlap_x_out, overlap_y_out, dx, dy);
if (on_processing(input_tile, output_tile, false)) {
ggml_ext_tensor_merge_2d(output_tile, output, x_out, y_out, overlap_x_out, overlap_y_out, dx, dy);

int64_t t2 = ggml_time_ms();
last_time = (t2 - t1) / 1000.0f;
pretty_progress(tile_count, num_tiles, last_time);
int64_t t2 = ggml_time_ms();
last_time = (t2 - t1) / 1000.0f;
pretty_progress(tile_count, num_tiles, last_time);
} else {
LOG_ERROR("Failed to process patch %d at (%d, %d)", tile_count, x, y);
}
tile_count++;
}
last_x = false;
Expand Down
25 changes: 17 additions & 8 deletions stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1542,7 +1542,7 @@ class StableDiffusionGGML {
if (vae_tiling_params.enabled) {
// split latent in 32x32 tiles and compute in several steps
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
first_stage_model->compute(n_threads, in, true, &out, nullptr);
return first_stage_model->compute(n_threads, in, true, &out, nullptr);
};
silent_tiling(latents, result, get_vae_scale_factor(), 32, 0.5f, on_tiling);

Expand All @@ -1561,7 +1561,7 @@ class StableDiffusionGGML {
if (vae_tiling_params.enabled) {
// split latent in 64x64 tiles and compute in several steps
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
tae_first_stage->compute(n_threads, in, true, &out, nullptr);
return tae_first_stage->compute(n_threads, in, true, &out, nullptr);
};
silent_tiling(latents, result, get_vae_scale_factor(), 64, 0.5f, on_tiling);
} else {
Expand Down Expand Up @@ -2530,7 +2530,7 @@ class StableDiffusionGGML {
LOG_DEBUG("VAE Tile size: %dx%d", tile_size_x, tile_size_y);

auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
first_stage_model->compute(n_threads, in, false, &out, work_ctx);
return first_stage_model->compute(n_threads, in, false, &out, work_ctx);
};
sd_tiling_non_square(x, result, vae_scale_factor, tile_size_x, tile_size_y, tile_overlap, on_tiling);
} else {
Expand All @@ -2541,7 +2541,7 @@ class StableDiffusionGGML {
if (vae_tiling_params.enabled && !encode_video) {
// split latent in 32x32 tiles and compute in several steps
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
tae_first_stage->compute(n_threads, in, false, &out, nullptr);
return tae_first_stage->compute(n_threads, in, false, &out, nullptr);
};
sd_tiling(x, result, vae_scale_factor, 64, 0.5f, on_tiling);
} else {
Expand Down Expand Up @@ -2659,23 +2659,31 @@ class StableDiffusionGGML {

// split latent in 32x32 tiles and compute in several steps
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
first_stage_model->compute(n_threads, in, true, &out, nullptr);
return first_stage_model->compute(n_threads, in, true, &out, nullptr);
};
sd_tiling_non_square(x, result, vae_scale_factor, tile_size_x, tile_size_y, tile_overlap, on_tiling);
} else {
first_stage_model->compute(n_threads, x, true, &result, work_ctx);
if(!first_stage_model->compute(n_threads, x, true, &result, work_ctx)){
LOG_ERROR("Failed to decode latetnts");
first_stage_model->free_compute_buffer();
return nullptr;
}
}
first_stage_model->free_compute_buffer();
process_vae_output_tensor(result);
} else {
if (vae_tiling_params.enabled) {
// split latent in 64x64 tiles and compute in several steps
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
tae_first_stage->compute(n_threads, in, true, &out);
return tae_first_stage->compute(n_threads, in, true, &out);
};
sd_tiling(x, result, vae_scale_factor, 64, 0.5f, on_tiling);
} else {
tae_first_stage->compute(n_threads, x, true, &result);
if(!tae_first_stage->compute(n_threads, x, true, &result)){
LOG_ERROR("Failed to decode latetnts");
tae_first_stage->free_compute_buffer();
return nullptr;
}
}
tae_first_stage->free_compute_buffer();
}
Expand Down Expand Up @@ -3440,6 +3448,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
ggml_free(work_ctx);
return nullptr;
}
memset(result_images, 0, batch_count * sizeof(sd_image_t));

for (size_t i = 0; i < decoded_images.size(); i++) {
result_images[i].width = width;
Expand Down
2 changes: 1 addition & 1 deletion upscaler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ struct UpscalerGGML {

ggml_tensor* upscaled = ggml_new_tensor_4d(upscale_ctx, GGML_TYPE_F32, output_width, output_height, 3, 1);
auto on_tiling = [&](ggml_tensor* in, ggml_tensor* out, bool init) {
esrgan_upscaler->compute(n_threads, in, &out);
return esrgan_upscaler->compute(n_threads, in, &out);
};
int64_t t0 = ggml_time_ms();
sd_tiling(input_image_tensor, upscaled, esrgan_upscaler->scale, esrgan_upscaler->tile_size, 0.25f, on_tiling);
Expand Down
Loading