Skip to content

Regression in memory management in a commit made after v0.22.0, Flux 2 Dev can no longer be run in full precision on 3090 #14165

@clockworkwhale

Description

@clockworkwhale

In v0.22.0 and previous versions, I can run Flux 2.Dev in full precision on my 3090 and dynamic vram's offloading makes it work.

With today's codebase, that's no longer possible: it OOMs with the below traceback. Unfortunately I don't know which specific commit breaks it but it was made within the last week.

If I revert to v0.22.0 from last week, it works again.

To remove confounders from the test, the below traceback was conducted in a fresh conda environment using a fresh ComfyUI in a new folder. No custom nodes, launch parameters were --listen and --disable-auto-launch only.

[INFO] Requested to load Flux2
[INFO] Model Flux2 prepared for dynamic VRAM loading. 61461MB Staged. 0 patches attached. Force pre-loaded 128 weights: 71 KB.
0%| | 0/20 [00:13<?, ?it/s, Model Initializing ... ]
[ERROR] !!! Exception during processing !!! CUDA error: out of memory
Search for cudaErrorMemoryAllocation' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information. CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1 Compile with TORCH_USE_CUDA_DSA` to enable device-side assertions.

[ERROR] Traceback (most recent call last):
File "C:\ComfyUI\execution.py", line 536, in execute
output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\execution.py", line 336, in get_output_data
return_values = await _async_map_node_over_list(prompt_id, unique_id, obj, input_data_all, obj.FUNCTION, allow_interrupt=True, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\execution.py", line 310, in _async_map_node_over_list
await process_inputs(input_dict, i)
File "C:\ComfyUI\execution.py", line 298, in process_inputs
result = f(**inputs)
File "C:\ComfyUI\nodes.py", line 1586, in sample
return common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise)
File "C:\ComfyUI\nodes.py", line 1550, in common_ksampler
samples = comfy.sample.sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image,
denoise=denoise, disable_noise=disable_noise, start_step=start_step, last_step=last_step,
force_full_denoise=force_full_denoise, noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed)
File "C:\ComfyUI\comfy\sample.py", line 74, in sample
samples = sampler.sample(noise, positive, negative, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask, sigmas=sigmas, callback=callback, disable_pbar=disable_pbar, seed=seed)
File "C:\ComfyUI\comfy\samplers.py", line 1447, in sample
return sample(self.model, noise, positive, negative, cfg, self.device, sampler, sigmas, self.model_options, latent_image=latent_image, denoise_mask=denoise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed)
File "C:\ComfyUI\comfy\samplers.py", line 1337, in sample
return cfg_guider.sample(noise, latent_image, sampler, sigmas, denoise_mask, callback, disable_pbar, seed)
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\samplers.py", line 1319, in sample
output = executor.execute(noise, latent_image, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=latent_shapes)
File "C:\ComfyUI\comfy\patcher_extension.py", line 113, in execute
return self.original(*args, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\samplers.py", line 1257, in outer_sample
output = self.inner_sample(noise, latent_image, device, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=latent_shapes)
File "C:\ComfyUI\comfy\samplers.py", line 1232, in inner_sample
samples = executor.execute(self, sigmas, extra_args, callback, noise, latent_image, denoise_mask, disable_pbar)
File "C:\ComfyUI\comfy\patcher_extension.py", line 113, in execute
return self.original(*args, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\samplers.py", line 1002, in sample
samples = self.sampler_function(model_k, noise, sigmas, extra_args=extra_args, callback=k_callback, disable=disable_pbar, **self.extra_options)
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\utils_contextlib.py", line 124, in decorate_context
return func(*args, **kwargs)
File "C:\ComfyUI\comfy\k_diffusion\sampling.py", line 205, in sample_euler
denoised = model(x, sigma_hat * s_in, **extra_args)
File "C:\ComfyUI\comfy\samplers.py", line 642, in call
out = self.inner_model(x, sigma, model_options=model_options, seed=seed)
File "C:\ComfyUI\comfy\samplers.py", line 1205, in call
return self.outer_predict_noise(*args, **kwargs)
~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\samplers.py", line 1212, in outer_predict_noise
).execute(x, timestep, model_options, seed)
~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\patcher_extension.py", line 113, in execute
return self.original(*args, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\samplers.py", line 1215, in predict_noise
return sampling_function(self.inner_model, x, timestep, self.conds.get("negative", None), self.conds.get("positive", None), self.cfg, model_options=model_options, seed=seed)
File "C:\ComfyUI\comfy\samplers.py", line 622, in sampling_function
out = calc_cond_batch(model, conds, x, timestep, model_options)
File "C:\ComfyUI\comfy\samplers.py", line 210, in calc_cond_batch
return _calc_cond_batch_outer(model, conds, x_in, timestep, model_options)
File "C:\ComfyUI\comfy\samplers.py", line 218, in _calc_cond_batch_outer
return executor.execute(model, conds, x_in, timestep, model_options)
~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\patcher_extension.py", line 113, in execute
return self.original(*args, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\samplers.py", line 334, in calc_cond_batch
output = model.apply_model(input_x, timestep
, **c).chunk(batch_chunks)
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\model_base.py", line 185, in apply_model
return comfy.patcher_extension.WrapperExecutor.new_class_executor(
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
...<2 lines>...
comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.APPLY_MODEL, transformer_options)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
).execute(x, t, c_concat, c_crossattn, control, transformer_options, **kwargs)
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\patcher_extension.py", line 113, in execute
return self.original(*args, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\model_base.py", line 229, in _apply_model
model_output = self.diffusion_model(xc, t, context=context, control=control, transformer_options=transformer_options, **extra_conds)
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\nn\modules\module.py", line 1778, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\nn\modules\module.py", line 1789, in _call_impl
return forward_call(*args, **kwargs)
File "C:\ComfyUI\comfy\ldm\flux\model.py", line 345, in forward
return comfy.patcher_extension.WrapperExecutor.new_class_executor(
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
...<2 lines>...
comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
).execute(x, timestep, context, y, guidance, ref_latents, control, transformer_options, **kwargs)
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\patcher_extension.py", line 113, in execute
return self.original(*args, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\ldm\flux\model.py", line 406, in _forward
out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, timestep_zero_index=timestep_zero_index, transformer_options=transformer_options, attn_mask=kwargs.get("attention_mask", None))
File "C:\ComfyUI\comfy\ldm\flux\model.py", line 296, in forward_orig
img = block(img, vec=vec, pe=pe, attn_mask=attn_mask, transformer_options=transformer_options, **extra_kwargs)
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\nn\modules\module.py", line 1778, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\nn\modules\module.py", line 1789, in _call_impl
return forward_call(*args, **kwargs)
File "C:\ComfyUI\comfy\ldm\flux\layers.py", line 325, in forward
qkv, mlp = torch.split(self.linear1(apply_mod(self.pre_norm(x), (1 + mod.scale), mod.shift, modulation_dims)), [3 * self.hidden_size, self.mlp_hidden_dim_first], dim=-1)
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\nn\modules\module.py", line 1778, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\nn\modules\module.py", line 1789, in _call_impl
return forward_call(*args, **kwargs)
File "C:\ComfyUI\comfy\ops.py", line 545, in forward
return self.forward_comfy_cast_weights(*args, **kwargs)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\ops.py", line 537, in forward_comfy_cast_weights
weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\ops.py", line 365, in cast_bias_weight
offload_stream = cast_modules_with_vbar([s], dtype, device, bias_dtype, non_blocking)
File "C:\ComfyUI\comfy\ops.py", line 215, in cast_modules_with_vbar
handle_pin(s, pin, xfer_source, xfer_dest, size=dest_size)
~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\ops.py", line 203, in handle_pin
comfy.model_management.cast_to_gathered(source, pin, non_blocking=non_blocking, stream=offload_stream, r2=dest)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\model_management.py", line 1447, in cast_to_gathered
if comfy.memory_management.read_tensor_file_slice_into(tensor, dest_view, stream=stream, destination2=dest2_view):
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\memory_management.py", line 59, in read_tensor_file_slice_into
hostbuf.read_file_slice(file_obj, info.offset, info.size,
~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
offset=destination.data_ptr() - hostbuf.get_raw_address(),
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
stream=stream_ptr,
^^^^^^^^^^^^^^^^^^
device_ptr=device_ptr,
^^^^^^^^^^^^^^^^^^^^^^
device=None if destination2 is None else destination2.device.index)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\comfy_aimdo\host_buffer.py", line 85, in read_file_slice
raise RuntimeError("HostBuffer.read_file_slice failed")
RuntimeError: HostBuffer.read_file_slice failed

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "C:\ComfyUI\execution.py", line 541, in execute
comfy.model_management.reset_cast_buffers()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^
File "C:\ComfyUI\comfy\model_management.py", line 1366, in reset_cast_buffers
offload_stream.synchronize()
~~~~~~~~~~~~~~~~~~~~~~~~~~^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\cuda\streams.py", line 108, in synchronize
super().synchronize()
~~~~~~~~~~~~~~~~~~~^^
torch.AcceleratorError: CUDA error: out of memory
Search for cudaErrorMemoryAllocation' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information. CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1 Compile with TORCH_USE_CUDA_DSA` to enable device-side assertions.

[INFO] Memory summary:
|===========================================================================|

PyTorch CUDA memory summary, device ID 0
CUDA OOMs: 0
===========================================================================
Metric
---------------------------------------------------------------------------
Allocated memory
from large pool
from small pool
---------------------------------------------------------------------------
Active memory
from large pool
from small pool
---------------------------------------------------------------------------
Requested memory
from large pool
from small pool
---------------------------------------------------------------------------
GPU reserved memory
from large pool
from small pool
---------------------------------------------------------------------------
Non-releasable memory
from large pool
from small pool
---------------------------------------------------------------------------
Allocations
from large pool
from small pool
---------------------------------------------------------------------------
Active allocs
from large pool
from small pool
---------------------------------------------------------------------------
GPU reserved segments
from large pool
from small pool
---------------------------------------------------------------------------
Non-releasable allocs
from large pool
from small pool
---------------------------------------------------------------------------
Oversize allocations
---------------------------------------------------------------------------
Oversize GPU segments
===========================================================================

[ERROR] Got an OOM, unloading all loaded models.
Exception in thread Thread-1 (prompt_worker):
Traceback (most recent call last):
File "C:\ComfyUI\execution.py", line 536, in execute
output_data, output_ui, has_subgraph, has_pending_tasks = await get_output_data(prompt_id, unique_id, obj, input_data_all, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\execution.py", line 336, in get_output_data
return_values = await _async_map_node_over_list(prompt_id, unique_id, obj, input_data_all, obj.FUNCTION, allow_interrupt=True, execution_block_cb=execution_block_cb, pre_execute_cb=pre_execute_cb, v3_data=v3_data)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\execution.py", line 310, in _async_map_node_over_list
await process_inputs(input_dict, i)
File "C:\ComfyUI\execution.py", line 298, in process_inputs
result = f(**inputs)
File "C:\ComfyUI\nodes.py", line 1586, in sample
return common_ksampler(model, seed, steps, cfg, sampler_name, scheduler, positive, negative, latent_image, denoise=denoise)
File "C:\ComfyUI\nodes.py", line 1550, in common_ksampler
samples = comfy.sample.sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative, latent_image,
denoise=denoise, disable_noise=disable_noise, start_step=start_step, last_step=last_step,
force_full_denoise=force_full_denoise, noise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed)
File "C:\ComfyUI\comfy\sample.py", line 74, in sample
samples = sampler.sample(noise, positive, negative, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask, sigmas=sigmas, callback=callback, disable_pbar=disable_pbar, seed=seed)
File "C:\ComfyUI\comfy\samplers.py", line 1447, in sample
return sample(self.model, noise, positive, negative, cfg, self.device, sampler, sigmas, self.model_options, latent_image=latent_image, denoise_mask=denoise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed)
File "C:\ComfyUI\comfy\samplers.py", line 1337, in sample
return cfg_guider.sample(noise, latent_image, sampler, sigmas, denoise_mask, callback, disable_pbar, seed)
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\samplers.py", line 1319, in sample
output = executor.execute(noise, latent_image, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=latent_shapes)
File "C:\ComfyUI\comfy\patcher_extension.py", line 113, in execute
return self.original(*args, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\samplers.py", line 1257, in outer_sample
output = self.inner_sample(noise, latent_image, device, sampler, sigmas, denoise_mask, callback, disable_pbar, seed, latent_shapes=latent_shapes)
File "C:\ComfyUI\comfy\samplers.py", line 1232, in inner_sample
samples = executor.execute(self, sigmas, extra_args, callback, noise, latent_image, denoise_mask, disable_pbar)
File "C:\ComfyUI\comfy\patcher_extension.py", line 113, in execute
return self.original(*args, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\samplers.py", line 1002, in sample
samples = self.sampler_function(model_k, noise, sigmas, extra_args=extra_args, callback=k_callback, disable=disable_pbar, **self.extra_options)
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\utils_contextlib.py", line 124, in decorate_context
return func(*args, **kwargs)
File "C:\ComfyUI\comfy\k_diffusion\sampling.py", line 205, in sample_euler
denoised = model(x, sigma_hat * s_in, **extra_args)
File "C:\ComfyUI\comfy\samplers.py", line 642, in call
out = self.inner_model(x, sigma, model_options=model_options, seed=seed)
File "C:\ComfyUI\comfy\samplers.py", line 1205, in call
return self.outer_predict_noise(*args, **kwargs)
~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\samplers.py", line 1212, in outer_predict_noise
).execute(x, timestep, model_options, seed)
~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\patcher_extension.py", line 113, in execute
return self.original(*args, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\samplers.py", line 1215, in predict_noise
return sampling_function(self.inner_model, x, timestep, self.conds.get("negative", None), self.conds.get("positive", None), self.cfg, model_options=model_options, seed=seed)
File "C:\ComfyUI\comfy\samplers.py", line 622, in sampling_function
out = calc_cond_batch(model, conds, x, timestep, model_options)
File "C:\ComfyUI\comfy\samplers.py", line 210, in calc_cond_batch
return _calc_cond_batch_outer(model, conds, x_in, timestep, model_options)
File "C:\ComfyUI\comfy\samplers.py", line 218, in _calc_cond_batch_outer
return executor.execute(model, conds, x_in, timestep, model_options)
~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\patcher_extension.py", line 113, in execute
return self.original(*args, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\samplers.py", line 334, in calc_cond_batch
output = model.apply_model(input_x, timestep
, **c).chunk(batch_chunks)
~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\model_base.py", line 185, in apply_model
return comfy.patcher_extension.WrapperExecutor.new_class_executor(
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
...<2 lines>...
comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.APPLY_MODEL, transformer_options)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
).execute(x, t, c_concat, c_crossattn, control, transformer_options, **kwargs)
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\patcher_extension.py", line 113, in execute
return self.original(*args, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\model_base.py", line 229, in _apply_model
model_output = self.diffusion_model(xc, t, context=context, control=control, transformer_options=transformer_options, **extra_conds)
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\nn\modules\module.py", line 1778, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\nn\modules\module.py", line 1789, in _call_impl
return forward_call(*args, **kwargs)
File "C:\ComfyUI\comfy\ldm\flux\model.py", line 345, in forward
return comfy.patcher_extension.WrapperExecutor.new_class_executor(
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
...<2 lines>...
comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.DIFFUSION_MODEL, transformer_options)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
).execute(x, timestep, context, y, guidance, ref_latents, control, transformer_options, **kwargs)
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\patcher_extension.py", line 113, in execute
return self.original(*args, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\ldm\flux\model.py", line 406, in _forward
out = self.forward_orig(img, img_ids, context, txt_ids, timestep, y, guidance, control, timestep_zero_index=timestep_zero_index, transformer_options=transformer_options, attn_mask=kwargs.get("attention_mask", None))
File "C:\ComfyUI\comfy\ldm\flux\model.py", line 296, in forward_orig
img = block(img, vec=vec, pe=pe, attn_mask=attn_mask, transformer_options=transformer_options, **extra_kwargs)
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\nn\modules\module.py", line 1778, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\nn\modules\module.py", line 1789, in _call_impl
return forward_call(*args, **kwargs)
File "C:\ComfyUI\comfy\ldm\flux\layers.py", line 325, in forward
qkv, mlp = torch.split(self.linear1(apply_mod(self.pre_norm(x), (1 + mod.scale), mod.shift, modulation_dims)), [3 * self.hidden_size, self.mlp_hidden_dim_first], dim=-1)
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\nn\modules\module.py", line 1778, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\nn\modules\module.py", line 1789, in _call_impl
return forward_call(*args, **kwargs)
File "C:\ComfyUI\comfy\ops.py", line 545, in forward
return self.forward_comfy_cast_weights(*args, **kwargs)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\ops.py", line 537, in forward_comfy_cast_weights
weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True)
~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\ops.py", line 365, in cast_bias_weight
offload_stream = cast_modules_with_vbar([s], dtype, device, bias_dtype, non_blocking)
File "C:\ComfyUI\comfy\ops.py", line 215, in cast_modules_with_vbar
handle_pin(s, pin, xfer_source, xfer_dest, size=dest_size)
~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\ops.py", line 203, in handle_pin
comfy.model_management.cast_to_gathered(source, pin, non_blocking=non_blocking, stream=offload_stream, r2=dest)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\model_management.py", line 1447, in cast_to_gathered
if comfy.memory_management.read_tensor_file_slice_into(tensor, dest_view, stream=stream, destination2=dest2_view):
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\memory_management.py", line 59, in read_tensor_file_slice_into
hostbuf.read_file_slice(file_obj, info.offset, info.size,
~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
offset=destination.data_ptr() - hostbuf.get_raw_address(),
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
stream=stream_ptr,
^^^^^^^^^^^^^^^^^^
device_ptr=device_ptr,
^^^^^^^^^^^^^^^^^^^^^^
device=None if destination2 is None else destination2.device.index)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\comfy_aimdo\host_buffer.py", line 85, in read_file_slice
raise RuntimeError("HostBuffer.read_file_slice failed")
RuntimeError: HostBuffer.read_file_slice failed

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "C:\ComfyUI\execution.py", line 541, in execute
comfy.model_management.reset_cast_buffers()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^
File "C:\ComfyUI\comfy\model_management.py", line 1366, in reset_cast_buffers
offload_stream.synchronize()
~~~~~~~~~~~~~~~~~~~~~~~~~~^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\cuda\streams.py", line 108, in synchronize
super().synchronize()
~~~~~~~~~~~~~~~~~~~^^
torch.AcceleratorError: CUDA error: out of memory
Search for cudaErrorMemoryAllocation' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information. CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1 Compile with TORCH_USE_CUDA_DSA` to enable device-side assertions.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "C:\Users\pauld.conda\envs\comfy\Lib\threading.py", line 1044, in _bootstrap_inner
self.run()
~~~~~~~~^^
File "C:\Users\pauld.conda\envs\comfy\Lib\threading.py", line 995, in run
self._target(*self._args, **self._kwargs)
~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\main.py", line 327, in prompt_worker
e.execute(item[2], prompt_id, extra_data, item[4])
~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\execution.py", line 714, in execute
asyncio.run(self.execute_async(prompt, prompt_id, extra_data, execute_outputs))
~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\asyncio\runners.py", line 195, in run
return runner.run(main)
~~~~~~~~~~^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\asyncio\runners.py", line 118, in run
return self._loop.run_until_complete(task)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^
File "C:\Users\pauld.conda\envs\comfy\Lib\asyncio\base_events.py", line 725, in run_until_complete
return future.result()
~~~~~~~~~~~~~^^
File "C:\ComfyUI\execution.py", line 774, in execute_async
result, error, ex = await execute(self.server, dynamic_prompt, self.caches, node_id, extra_data, executed, prompt_id, execution_list, pending_subgraph_results, pending_async_nodes, ui_node_outputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\ComfyUI\execution.py", line 632, in execute
comfy.model_management.unload_all_models()
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^
File "C:\ComfyUI\comfy\model_management.py", line 1964, in unload_all_models
free_memory(1e30, device)
~~~~~~~~~~~^^^^^^^^^^^^^^
File "C:\ComfyUI\comfy\model_management.py", line 790, in free_memory
cleanup_models_gc()
~~~~~~~~~~~~~~~~~^^
File "C:\ComfyUI\comfy\model_management.py", line 941, in cleanup_models_gc
reset_cast_buffers()
~~~~~~~~~~~~~~~~~~^^
File "C:\ComfyUI\comfy\model_management.py", line 1366, in reset_cast_buffers
offload_stream.synchronize()
~~~~~~~~~~~~~~~~~~~~~~~~~~^^
File "C:\Users\pauld.conda\envs\comfy\Lib\site-packages\torch\cuda\streams.py", line 108, in synchronize
super().synchronize()
~~~~~~~~~~~~~~~~~~~^^
torch.AcceleratorError: CUDA error: out of memory
Search for cudaErrorMemoryAllocation' in https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html for more information. CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. For debugging consider passing CUDA_LAUNCH_BLOCKING=1 Compile with TORCH_USE_CUDA_DSA` to enable device-side assertions.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions