Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions invokeai/backend/model_manager/configs/controlnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@
class ControlAdapterDefaultSettings(BaseModel):
# This could be narrowed to controlnet processor nodes, but they change. Leaving this a string is safer.
preprocessor: str | None
fp8_storage: bool | None = Field(
default=None,
description="Store weights in FP8 to reduce VRAM usage (~50% savings). Weights are cast to compute dtype during inference.",
)
model_config = ConfigDict(extra="forbid")

@classmethod
Expand Down
4 changes: 4 additions & 0 deletions invokeai/backend/model_manager/configs/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ class MainModelDefaultSettings(BaseModel):
height: int | None = Field(default=None, multiple_of=8, ge=64, description="Default height for this model")
guidance: float | None = Field(default=None, ge=1, description="Default Guidance for this model")
cpu_only: bool | None = Field(default=None, description="Whether this model should run on CPU only")
fp8_storage: bool | None = Field(
default=None,
description="Store weights in FP8 to reduce VRAM usage (~50% savings). Weights are cast to compute dtype during inference.",
)

model_config = ConfigDict(extra="forbid")

Expand Down
48 changes: 48 additions & 0 deletions invokeai/backend/model_manager/load/load_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,54 @@ def get_size_fs(
variant=config.repo_variant if isinstance(config, Diffusers_Config_Base) else None,
)

def _should_use_fp8(self, config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> bool:
"""Check if FP8 layerwise casting should be applied to a model."""
# FP8 storage only works on CUDA
if self._torch_device.type != "cuda":
return False

# Don't apply FP8 to text encoders, tokenizers, schedulers, etc.
_excluded_submodel_types = {
SubModelType.TextEncoder,
SubModelType.TextEncoder2,
SubModelType.TextEncoder3,
SubModelType.Tokenizer,
SubModelType.Tokenizer2,
SubModelType.Tokenizer3,
SubModelType.Scheduler,
SubModelType.SafetyChecker,
}
if submodel_type in _excluded_submodel_types:
return False

# Check default_settings.fp8_storage (Main models, ControlNet)
if hasattr(config, "default_settings") and config.default_settings is not None:
if hasattr(config.default_settings, "fp8_storage") and config.default_settings.fp8_storage is True:
return True

return False

def _apply_fp8_layerwise_casting(
self, model: AnyModel, config: AnyModelConfig, submodel_type: Optional[SubModelType] = None
) -> AnyModel:
"""Apply FP8 layerwise casting to a model if enabled in its config."""
if not self._should_use_fp8(config, submodel_type):
return model

from diffusers.models.modeling_utils import ModelMixin

if not isinstance(model, ModelMixin):
return model

model.enable_layerwise_casting(
storage_dtype=torch.float8_e4m3fn,
compute_dtype=self._torch_dtype,
)
self._logger.info(
f"FP8 layerwise casting enabled for {config.name} (storage=float8_e4m3fn, compute={self._torch_dtype})"
)
return model

# This needs to be implemented in the subclass
def _load_model(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,5 @@ def _load_model(
else:
raise e

result = self._apply_fp8_layerwise_casting(result, config, submodel_type)
return result
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,11 @@ def _load_model(
submodel_type: Optional[SubModelType] = None,
) -> AnyModel:
if isinstance(config, ControlNet_Checkpoint_Config_Base):
return ControlNetModel.from_single_file(
result = ControlNetModel.from_single_file(
config.path,
torch_dtype=self._torch_dtype,
)
result = self._apply_fp8_layerwise_casting(result, config, submodel_type)
return result
else:
return super()._load_model(config, submodel_type)
4 changes: 4 additions & 0 deletions invokeai/backend/model_manager/load/model_loaders/flux.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def _load_model(
local_files_only=True,
)

model = self._apply_fp8_layerwise_casting(model, config, submodel_type)
return model


Expand Down Expand Up @@ -201,6 +202,7 @@ def _load_model(
vae_dtype = self._torch_dtype
model.to(vae_dtype)

model = self._apply_fp8_layerwise_casting(model, config, submodel_type)
return model

def _convert_flux2_vae_bfl_to_diffusers(self, sd: dict) -> dict:
Expand Down Expand Up @@ -639,6 +641,7 @@ def _load_model(
else:
raise e

result = self._apply_fp8_layerwise_casting(result, config, submodel_type)
return result


Expand Down Expand Up @@ -715,6 +718,7 @@ def _load_model(
if guidance_emb.linear_2.bias is not None:
guidance_emb.linear_2.bias.data.zero_()

result = self._apply_fp8_layerwise_casting(result, config, submodel_type)
return result


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def _load_model(
result = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, local_files_only=True)
else:
raise e
result = self._apply_fp8_layerwise_casting(result, config, submodel_type)
return result

# TO DO: Add exception handling
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def _load_model(
else:
raise e

result = self._apply_fp8_layerwise_casting(result, config, submodel_type)
return result

def _load_from_singlefile(
Expand Down Expand Up @@ -152,5 +153,8 @@ def _load_from_singlefile(
if subtype == submodel_type:
continue
if submodel := getattr(pipeline, subtype.value, None):
self._apply_fp8_layerwise_casting(submodel, config, subtype)
self._ram_cache.put(get_model_cache_key(config.key, subtype), model=submodel)
return getattr(pipeline, submodel_type.value)
result = getattr(pipeline, submodel_type.value)
result = self._apply_fp8_layerwise_casting(result, config, submodel_type)
return result
4 changes: 3 additions & 1 deletion invokeai/backend/model_manager/load/model_loaders/vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ def _load_model(
submodel_type: Optional[SubModelType] = None,
) -> AnyModel:
if isinstance(config, VAE_Checkpoint_Config_Base):
return AutoencoderKL.from_single_file(
result = AutoencoderKL.from_single_file(
config.path,
torch_dtype=self._torch_dtype,
)
result = self._apply_fp8_layerwise_casting(result, config, submodel_type)
return result
else:
return super()._load_model(config, submodel_type)
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def _load_model(
else:
raise e

result = self._apply_fp8_layerwise_casting(result, config, submodel_type)
return result


Expand Down
8 changes: 8 additions & 0 deletions invokeai/frontend/web/public/locales/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,7 @@
"convertToDiffusersHelpText5": "Please make sure you have enough disk space. Models generally vary between 2GB-7GB in size.",
"convertToDiffusersHelpText6": "Do you wish to convert this model?",
"cpuOnly": "CPU Only",
"fp8Storage": "FP8 Storage (Save VRAM)",
"runOnCpu": "Run text encoder model on CPU only",
"noDefaultSettings": "No default settings configured for this model. Visit the Model Manager to add default settings.",
"defaultSettings": "Default Settings",
Expand Down Expand Up @@ -2198,6 +2199,13 @@
"When enabled, only the text encoder component will run on CPU instead of GPU.",
"This saves VRAM for the denoiser while only slightly impacting performance. The conditioning outputs are automatically moved to GPU for the denoiser."
]
},
"fp8Storage": {
"heading": "FP8 Storage",
"paragraphs": [
"Stores model weights in FP8 format in VRAM, reducing memory usage by approximately 50% compared to FP16.",
"During inference, weights are cast layer-by-layer to the compute precision (FP16/BF16), so image quality is preserved. Works on all CUDA GPUs."
]
}
},
"workflows": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ export type Feature =
| 'tileOverlap'
| 'optimizedDenoising'
| 'fluxDevLicense'
| 'cpuOnly';
| 'cpuOnly'
| 'fp8Storage';

export type PopoverData = PopoverProps & {
image?: string;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ export const useControlAdapterModelDefaultSettings = (
isEnabled: !isNil(modelConfig?.default_settings?.preprocessor),
value: modelConfig?.default_settings?.preprocessor || 'none',
},
fp8Storage: {
isEnabled: !isNil(modelConfig?.default_settings?.fp8_storage),
value: modelConfig?.default_settings?.fp8_storage ?? false,
},
};
}, [modelConfig?.default_settings]);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ export const useMainModelDefaultSettings = (modelConfig: MainModelConfig) => {
isEnabled: !isNil(modelConfig?.default_settings?.guidance),
value: modelConfig?.default_settings?.guidance ?? 4,
},
fp8Storage: {
isEnabled: !isNil(modelConfig?.default_settings?.fp8_storage),
value: modelConfig?.default_settings?.fp8_storage ?? false,
},
};
}, [modelConfig]);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { Button, Flex, Heading, SimpleGrid } from '@invoke-ai/ui-library';
import { useControlAdapterModelDefaultSettings } from 'features/modelManagerV2/hooks/useControlAdapterModelDefaultSettings';
import { useIsModelManagerEnabled } from 'features/modelManagerV2/hooks/useIsModelManagerEnabled';
import { DefaultFp8StorageControlAdapter } from 'features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultFp8StorageControlAdapter';
import { DefaultPreprocessor } from 'features/modelManagerV2/subpanels/ModelPanel/ControlAdapterModelDefaultSettings/DefaultPreprocessor';
import type { FormField } from 'features/modelManagerV2/subpanels/ModelPanel/MainModelDefaultSettings/MainModelDefaultSettings';
import { toast } from 'features/toast/toast';
Expand All @@ -14,6 +15,7 @@ import type { ControlLoRAModelConfig, ControlNetModelConfig, T2IAdapterModelConf

export type ControlAdapterModelDefaultSettingsFormData = {
preprocessor: FormField<string>;
fp8Storage: FormField<boolean>;
};

type Props = {
Expand All @@ -40,6 +42,7 @@ export const ControlAdapterModelDefaultSettings = memo(({ modelConfig }: Props)
(data) => {
const body = {
preprocessor: data.preprocessor.isEnabled ? data.preprocessor.value : null,
fp8_storage: data.fp8Storage.isEnabled ? data.fp8Storage.value : null,
};

updateModel({
Expand Down Expand Up @@ -88,6 +91,7 @@ export const ControlAdapterModelDefaultSettings = memo(({ modelConfig }: Props)

<SimpleGrid columns={2} gap={8}>
<DefaultPreprocessor control={control} name="preprocessor" />
<DefaultFp8StorageControlAdapter control={control} name="fp8Storage" />
</SimpleGrid>
</>
);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { Flex, FormControl, FormLabel, Switch } from '@invoke-ai/ui-library';
import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover';
import { SettingToggle } from 'features/modelManagerV2/subpanels/ModelPanel/SettingToggle';
import type { ChangeEvent } from 'react';
import { memo, useCallback, useMemo } from 'react';
import type { UseControllerProps } from 'react-hook-form';
import { useController } from 'react-hook-form';
import { useTranslation } from 'react-i18next';

import type { ControlAdapterModelDefaultSettingsFormData } from './ControlAdapterModelDefaultSettings';

type DefaultFp8StorageType = ControlAdapterModelDefaultSettingsFormData['fp8Storage'];

export const DefaultFp8StorageControlAdapter = memo(
(props: UseControllerProps<ControlAdapterModelDefaultSettingsFormData>) => {
const { t } = useTranslation();
const { field } = useController(props);

const onChange = useCallback(
(e: ChangeEvent<HTMLInputElement>) => {
const updatedValue = {
...(field.value as DefaultFp8StorageType),
value: e.target.checked,
};
field.onChange(updatedValue);
},
[field]
);

const value = useMemo(() => {
return (field.value as DefaultFp8StorageType).value;
}, [field.value]);

const isDisabled = useMemo(() => {
return !(field.value as DefaultFp8StorageType).isEnabled;
}, [field.value]);

return (
<FormControl flexDir="column" gap={1} alignItems="flex-start">
<Flex justifyContent="space-between" w="full">
<InformationalPopover feature="fp8Storage">
<FormLabel>{t('modelManager.fp8Storage')}</FormLabel>
</InformationalPopover>
<SettingToggle control={props.control} name="fp8Storage" />
</Flex>
<Switch isChecked={value} onChange={onChange} isDisabled={isDisabled} />
</FormControl>
);
}
);

DefaultFp8StorageControlAdapter.displayName = 'DefaultFp8StorageControlAdapter';
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { Flex, FormControl, FormLabel, Switch } from '@invoke-ai/ui-library';
import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover';
import { SettingToggle } from 'features/modelManagerV2/subpanels/ModelPanel/SettingToggle';
import type { ChangeEvent } from 'react';
import { memo, useCallback, useMemo } from 'react';
import type { UseControllerProps } from 'react-hook-form';
import { useController } from 'react-hook-form';
import { useTranslation } from 'react-i18next';

import type { MainModelDefaultSettingsFormData } from './MainModelDefaultSettings';

type DefaultFp8StorageType = MainModelDefaultSettingsFormData['fp8Storage'];

export const DefaultFp8Storage = memo((props: UseControllerProps<MainModelDefaultSettingsFormData>) => {
const { t } = useTranslation();
const { field } = useController(props);

const onChange = useCallback(
(e: ChangeEvent<HTMLInputElement>) => {
const updatedValue = {
...(field.value as DefaultFp8StorageType),
value: e.target.checked,
};
field.onChange(updatedValue);
},
[field]
);

const value = useMemo(() => {
return (field.value as DefaultFp8StorageType).value;
}, [field.value]);

const isDisabled = useMemo(() => {
return !(field.value as DefaultFp8StorageType).isEnabled;
}, [field.value]);

return (
<FormControl flexDir="column" gap={1} alignItems="flex-start">
<Flex justifyContent="space-between" w="full">
<InformationalPopover feature="fp8Storage">
<FormLabel>{t('modelManager.fp8Storage')}</FormLabel>
</InformationalPopover>
<SettingToggle control={props.control} name="fp8Storage" />
</Flex>
<Switch isChecked={value} onChange={onChange} isDisabled={isDisabled} />
</FormControl>
);
});

DefaultFp8Storage.displayName = 'DefaultFp8Storage';
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import type { MainModelConfig } from 'services/api/types';

import { DefaultCfgRescaleMultiplier } from './DefaultCfgRescaleMultiplier';
import { DefaultCfgScale } from './DefaultCfgScale';
import { DefaultFp8Storage } from './DefaultFp8Storage';
import { DefaultGuidance } from './DefaultGuidance';
import { DefaultScheduler } from './DefaultScheduler';
import { DefaultSteps } from './DefaultSteps';
Expand All @@ -39,6 +40,7 @@ export type MainModelDefaultSettingsFormData = {
width: FormField<number>;
height: FormField<number>;
guidance: FormField<number>;
fp8Storage: FormField<boolean>;
};

type Props = {
Expand Down Expand Up @@ -85,6 +87,7 @@ export const MainModelDefaultSettings = memo(({ modelConfig }: Props) => {
width: data.width.isEnabled ? data.width.value : null,
height: data.height.isEnabled ? data.height.value : null,
guidance: data.guidance.isEnabled ? data.guidance.value : null,
fp8_storage: data.fp8Storage.isEnabled ? data.fp8Storage.value : null,
};

updateModel({
Expand Down Expand Up @@ -141,6 +144,7 @@ export const MainModelDefaultSettings = memo(({ modelConfig }: Props) => {
{!isFluxFamily && <DefaultCfgRescaleMultiplier control={control} name="cfgRescaleMultiplier" />}
<DefaultWidth control={control} optimalDimension={optimalDimension} />
<DefaultHeight control={control} optimalDimension={optimalDimension} />
<DefaultFp8Storage control={control} name="fp8Storage" />
</SimpleGrid>
</>
);
Expand Down
Loading
Loading