Skip to content
12 changes: 9 additions & 3 deletions common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@

#include "ggml_extend.hpp"

#ifdef SD_USE_VULKAN
#include "ggml-vulkan.h"
#endif

class DownSampleBlock : public GGMLBlock {
protected:
int channels;
Expand Down Expand Up @@ -248,9 +252,6 @@ class FeedForward : public GGMLBlock {
float scale = 1.f;
if (precision_fix) {
scale = 1.f / 128.f;
#ifdef SD_USE_VULKAN
force_prec_f32 = true;
#endif
}
// The purpose of the scale here is to prevent NaN issues in certain situations.
// For example, when using Vulkan without enabling force_prec_f32,
Expand All @@ -264,6 +265,11 @@ class FeedForward : public GGMLBlock {

auto net_0 = std::dynamic_pointer_cast<UnaryBlock>(blocks["net.0"]);
auto net_2 = std::dynamic_pointer_cast<Linear>(blocks["net.2"]);
#ifdef SD_USE_VULKAN
if(ggml_backend_is_vk(ctx->backend)){
net_2->set_force_prec_f32(true);
}
#endif

x = net_0->forward(ctx, x); // [ne3, ne2, ne1, inner_dim]
x = net_2->forward(ctx, x); // [ne3, ne2, ne1, dim_out]
Expand Down
38 changes: 35 additions & 3 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct SDCliParams {
bool color = false;

bool normal_exit = false;
bool skip_usage = false;

ArgOptions get_options() {
ArgOptions options;
Expand Down Expand Up @@ -143,7 +144,27 @@ struct SDCliParams {

auto on_help_arg = [&](int argc, const char** argv, int index) {
normal_exit = true;
return -1;
return VALID_BREAK_OPT;
};

auto on_rpc_arg = [&](int argc, const char** argv, int index) {
if (++index >= argc) {
return -1;
}
const char* rpc_device = argv[index];
add_rpc_device(rpc_device);
return 1;
};

auto on_list_devices_arg = [&](int argc, const char** argv, int index) {
size_t buff_size = backend_list_size();
char* buff = (char*)malloc(buff_size);
list_backends_to_buffer(buff, buff_size);
printf("List of available GGML devices:\nName\tDescription\n-------------------\n%s\n", buff);
free(buff);
normal_exit = true;
skip_usage = true;
return VALID_BREAK_OPT;
};

options.manual_options = {
Expand All @@ -159,6 +180,14 @@ struct SDCliParams {
"--help",
"show this help message and exit",
on_help_arg},
{"",
"--rpc",
"add a rpc device",
on_rpc_arg},
{"",
"--list-devices",
"list available ggml compute devices",
on_list_devices_arg},
};

return options;
Expand Down Expand Up @@ -213,7 +242,9 @@ void parse_args(int argc, const char** argv, SDCliParams& cli_params, SDContextP
std::vector<ArgOptions> options_vec = {cli_params.get_options(), ctx_params.get_options(), gen_params.get_options()};

if (!parse_options(argc, argv, options_vec)) {
print_usage(argc, argv, options_vec);
if (!cli_params.skip_usage){
print_usage(argc, argv, options_vec);
}
exit(cli_params.normal_exit ? 0 : 1);
}

Expand Down Expand Up @@ -783,7 +814,8 @@ int main(int argc, const char* argv[]) {
ctx_params.offload_params_to_cpu,
ctx_params.diffusion_conv_direct,
ctx_params.n_threads,
gen_params.upscale_tile_size);
gen_params.upscale_tile_size,
ctx_params.upscaler_backend_device.c_str());

if (upscaler_ctx == nullptr) {
LOG_ERROR("new_upscaler_ctx failed");
Expand Down
94 changes: 72 additions & 22 deletions examples/common/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ namespace fs = std::filesystem;
#define SAFE_STR(s) ((s) ? (s) : "")
#define BOOL_STR(b) ((b) ? "true" : "false")

#define VALID_BREAK_OPT -42

const char* modes_str[] = {
"img_gen",
"vid_gen",
Expand Down Expand Up @@ -401,16 +403,26 @@ static bool parse_options(int argc, const char** argv, const std::vector<ArgOpti
}))
break;

bool kill_flow = false;
if (match_and_apply(options.manual_options, [&](auto& option) {
int ret = option.cb(argc, argv, i);
if (ret == VALID_BREAK_OPT) {
// not an error, but still break out of the loop (e.g. --help)
kill_flow = true;
return;
}
if (ret < 0) {
invalid_arg = true;
return;
}
i += ret;
found_arg = true;
}))
})) {
if (kill_flow) {
return false;
}
break;
}
}

if (invalid_arg) {
Expand Down Expand Up @@ -447,16 +459,23 @@ struct SDContextParams {
std::string tensor_type_rules;
std::string lora_model_dir;

std::string main_backend_device;
std::string diffusion_backend_device;
std::string clip_backend_device;
std::string vae_backend_device;
std::string tae_backend_device;
std::string control_net_backend_device;
std::string upscaler_backend_device;
std::string photomaker_backend_device;
std::string vision_backend_device;

std::map<std::string, std::string> embedding_map;
std::vector<sd_embedding_t> embedding_vec;

rng_type_t rng_type = CUDA_RNG;
rng_type_t sampler_rng_type = RNG_TYPE_COUNT;
bool offload_params_to_cpu = false;
bool enable_mmap = false;
bool control_net_cpu = false;
bool clip_on_cpu = false;
bool vae_on_cpu = false;
bool diffusion_flash_attn = false;
bool diffusion_conv_direct = false;
bool vae_conv_direct = false;
Expand Down Expand Up @@ -561,6 +580,43 @@ struct SDContextParams {
"--upscale-model",
"path to esrgan model.",
&esrgan_path},
{"",
"--main-backend-device",
"default device to use for all backends (defaults to main gpu device if hardware acceleration is available, otherwise cpu)",
&main_backend_device},
{"",
"--diffusion-backend-device",
"device to use for diffusion (defaults to main-backend-device)",
&diffusion_backend_device},
{"",
"--clip-backend-device",
"device to use for clip (defaults to main-backend-device)",
&clip_backend_device},
{"",
"--vae-backend-device",
"device to use for vae (defaults to main-backend-device). Also applies to tae, unless tae-backend-device is specified",
&vae_backend_device},
{"",
"--tae-backend-device",
"device to use for tae (defaults to vae-backend-device)",
&tae_backend_device},
{"",
"--control-net-backend-device",
"device to use for control net (defaults to main-backend-device)",
&control_net_backend_device},
{"",
"--upscaler-backend-device",
"device to use for upscaling models (defaults to main-backend-device)",
&upscaler_backend_device},
{"",
"--photomaker-backend-device",
"device to use for photomaker (defaults to main-backend-device)",
&photomaker_backend_device},
{"",
"--vision-backend-device",
"device to use for clip-vision model (defaults to clip-backend-device)",
&vision_backend_device},

};

options.int_options = {
Expand Down Expand Up @@ -603,18 +659,6 @@ struct SDContextParams {
"--mmap",
"whether to memory-map model",
true, &enable_mmap},
{"",
"--control-net-cpu",
"keep controlnet in cpu (for low vram)",
true, &control_net_cpu},
{"",
"--clip-on-cpu",
"keep clip in cpu (for low vram)",
true, &clip_on_cpu},
{"",
"--vae-on-cpu",
"keep vae in cpu (for low vram)",
true, &vae_on_cpu},
{"",
"--diffusion-fa",
"use flash attention in the diffusion model",
Expand Down Expand Up @@ -875,6 +919,7 @@ struct SDContextParams {

std::string embeddings_str = emb_ss.str();
std::ostringstream oss;
// TODO backend devices
oss << "SDContextParams {\n"
<< " n_threads: " << n_threads << ",\n"
<< " model_path: \"" << model_path << "\",\n"
Expand All @@ -901,9 +946,9 @@ struct SDContextParams {
<< " flow_shift: " << (std::isinf(flow_shift) ? "INF" : std::to_string(flow_shift)) << "\n"
<< " offload_params_to_cpu: " << (offload_params_to_cpu ? "true" : "false") << ",\n"
<< " enable_mmap: " << (enable_mmap ? "true" : "false") << ",\n"
<< " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n"
<< " clip_on_cpu: " << (clip_on_cpu ? "true" : "false") << ",\n"
<< " vae_on_cpu: " << (vae_on_cpu ? "true" : "false") << ",\n"
// << " control_net_cpu: " << (control_net_cpu ? "true" : "false") << ",\n"
// << " clip_on_cpu: " << (clip_on_cpu ? "true" : "false") << ",\n"
// << " vae_on_cpu: " << (vae_on_cpu ? "true" : "false") << ",\n"
<< " diffusion_flash_attn: " << (diffusion_flash_attn ? "true" : "false") << ",\n"
<< " diffusion_conv_direct: " << (diffusion_conv_direct ? "true" : "false") << ",\n"
<< " vae_conv_direct: " << (vae_conv_direct ? "true" : "false") << ",\n"
Expand Down Expand Up @@ -965,9 +1010,6 @@ struct SDContextParams {
lora_apply_mode,
offload_params_to_cpu,
enable_mmap,
clip_on_cpu,
control_net_cpu,
vae_on_cpu,
diffusion_flash_attn,
taesd_preview,
diffusion_conv_direct,
Expand All @@ -980,6 +1022,14 @@ struct SDContextParams {
chroma_t5_mask_pad,
qwen_image_zero_cond_t,
flow_shift,
main_backend_device.c_str(),
diffusion_backend_device.c_str(),
clip_backend_device.c_str(),
vae_backend_device.c_str(),
tae_backend_device.c_str(),
control_net_backend_device.c_str(),
photomaker_backend_device.c_str(),
vision_backend_device.c_str(),
};
return sd_ctx_params;
}
Expand Down
64 changes: 44 additions & 20 deletions ggml_extend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,6 @@

#include "model.h"

#ifdef SD_USE_CUDA
#include "ggml-cuda.h"
#endif

#ifdef SD_USE_METAL
#include "ggml-metal.h"
#endif

#ifdef SD_USE_VULKAN
#include "ggml-vulkan.h"
#endif

#ifdef SD_USE_OPENCL
#include "ggml-opencl.h"
#endif

#ifdef SD_USE_SYCL
#include "ggml-sycl.h"
#endif

#include "rng.hpp"
#include "util.h"

Expand Down Expand Up @@ -88,6 +68,42 @@ __STATIC_INLINE__ void ggml_log_callback_default(ggml_log_level level, const cha
}
}

__STATIC_INLINE__ bool backend_name_exists(std::string name) {
const int device_count = ggml_backend_dev_count();
for (int i = 0; i < device_count; i++) {
if (name == ggml_backend_dev_name(ggml_backend_dev_get(i))) {
return true;
}
}
return false;
}

__STATIC_INLINE__ std::string sanitize_backend_name(std::string name) {
if (name == "" || backend_name_exists(name)) {
return name;
} else {
LOG_WARN("Backend %s not found, using default backend", name.c_str());
return "";
}
}

__STATIC_INLINE__ std::string get_default_backend_name() {
// should pick the same backend as ggml_backend_init_best
ggml_backend_dev_t dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU);
dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU);
dev = dev ? dev : ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU);
return ggml_backend_dev_name(dev);
}

__STATIC_INLINE__ ggml_backend_t init_named_backend(std::string name = "") {
LOG_DEBUG("Initializing backend: %s", name.c_str());
if (name.empty()) {
return ggml_backend_init_best();
} else {
return ggml_backend_init_by_name(name.c_str(), nullptr);
}
}

static_assert(GGML_MAX_NAME >= 128, "GGML_MAX_NAME must be at least 128");

// n-mode tensor-matrix product
Expand Down Expand Up @@ -2192,6 +2208,14 @@ class Linear : public UnaryBlock {
force_prec_f32(force_prec_f32),
scale(scale) {}

void set_scale(float scale_){
scale = scale_;
}

void set_force_prec_f32(bool force_prec_f32_){
force_prec_f32 = force_prec_f32_;
}

struct ggml_tensor* forward(GGMLRunnerContext* ctx, struct ggml_tensor* x) {
struct ggml_tensor* w = params["weight"];
struct ggml_tensor* b = nullptr;
Expand Down
12 changes: 0 additions & 12 deletions model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,6 @@
#include "name_conversion.h"
#include "stable-diffusion.h"

#ifdef SD_USE_METAL
#include "ggml-metal.h"
#endif

#ifdef SD_USE_VULKAN
#include "ggml-vulkan.h"
#endif

#ifdef SD_USE_OPENCL
#include "ggml-opencl.h"
#endif

#define ST_HEADER_SIZE_LEN 8

uint64_t read_u64(uint8_t* buffer) {
Expand Down
Loading
Loading