diff --git a/.gitmodules b/.gitmodules index a8e75f14..b14d3344 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "frontend"] path = frontend - url = https://github.com/hanamizuki-ai/lora-gui-dist + url = https://github.com/hdfhssg/lora-gui-dist [submodule "mikazuki/dataset-tag-editor"] path = mikazuki/dataset-tag-editor url = https://github.com/Akegarasu/dataset-tag-editor diff --git a/config/default.toml b/config/default.toml index 3e2cbd31..b7bb8f4d 100644 --- a/config/default.toml +++ b/config/default.toml @@ -38,6 +38,11 @@ lowram = false clip_skip = 2 mixed_precision = "fp16" save_precision = "fp16" +enable_contrastive = false +negative_sampling_method = "Random-Noise" +noise_strength = 1.0 +contrastive_weight = 0.05 +contrastive_warmup_steps = 100 [sample_prompt] sample_sampler = "euler_a" diff --git a/config/lora.toml b/config/lora.toml index 66212978..3fb7642c 100644 --- a/config/lora.toml +++ b/config/lora.toml @@ -41,6 +41,11 @@ max_train_epochs = 10 resolution = "512,512" clip_skip = 2 mixed_precision = "fp16" +enable_contrastive = false +negative_sampling_method = "Random-Noise" +noise_strength = 1.0 +contrastive_weight = 0.05 +contrastive_warmup_steps = 100 [sample_prompt_arguments] sample_sampler = "euler_a" @@ -59,4 +64,4 @@ save_precision = "fp16" [others] cache_latents = true shuffle_caption = true -enable_bucket = true \ No newline at end of file +enable_bucket = true diff --git a/mikazuki/schema/dreambooth.ts b/mikazuki/schema/dreambooth.ts index c6a71a31..4b8d1632 100644 --- a/mikazuki/schema/dreambooth.ts +++ b/mikazuki/schema/dreambooth.ts @@ -132,7 +132,47 @@ Schema.intersect([ optimizer_args_custom: Schema.array(String).role("table").description("自定义 optimizer_args,一行一个"), }) ]), - + Schema.intersect([ + // 对比学习主开关 + Schema.object({ + enable_contrastive: Schema.boolean() + .default(false) + .description('启用对比学习模块'), + }).description('对比学习配置'), + + // 当 enable_contrastive = true 时,才展示下面这些配置;否则只用空对象 + Schema.union([ + Schema.object({ + enable_contrastive: Schema.const(true).required(), + + negative_sampling_method: Schema.union([ + 'Random-Noise', 'Permutation', 'Random-index', 'Circular', 'Hard-Negative', + ]) + .default('Random-Noise') + .description('选择负样本生成策略'), + + noise_strength: Schema.number() + .step(0.5) + .min(0) + .max(10) + .default(1.0) + .description('噪音强度参数'), + + contrastive_weight: Schema.number() + .step(0.05) + .min(0) + .max(1) + .default(0.05) + .description('对比损失权重'), + + contrastive_warmup_steps: Schema.number() + .step(10) + .default(100) + .description('使用随机负样本的步数'), + }), + Schema.object({}), + ]), + ]), Schema.intersect([ Schema.object({ enable_preview: Schema.boolean().default(false).description("启用训练预览图"), diff --git a/mikazuki/schema/flux-lora.ts b/mikazuki/schema/flux-lora.ts index 94277405..94d4127d 100644 --- a/mikazuki/schema/flux-lora.ts +++ b/mikazuki/schema/flux-lora.ts @@ -54,6 +54,7 @@ Schema.intersect([ scale_weight_norms: Schema.number().step(0.01).min(0).description("最大范数正则化。如果使用,推荐为 1"), network_args_custom: Schema.array(String).role('table').description('自定义 network_args,一行一个'), enable_base_weight: Schema.boolean().default(false).description('启用基础权重(差异炼丹)'), + network_scale: Schema.number().step(0.05).default(1.0).description('网络缩放系数,应该小于等于1.0'), }).description("网络设置"), // lycoris 参数 @@ -62,7 +63,47 @@ Schema.intersect([ SHARED_SCHEMAS.NETWORK_OPTION_BASEWEIGHT, ]), - + Schema.intersect([ + // 对比学习主开关 + Schema.object({ + enable_contrastive: Schema.boolean() + .default(false) + .description('启用对比学习模块'), + }).description('对比学习配置'), + + // 当 enable_contrastive = true 时,才展示下面这些配置;否则只用空对象 + Schema.union([ + Schema.object({ + enable_contrastive: Schema.const(true).required(), + + negative_sampling_method: Schema.union([ + 'Random-Noise', 'Permutation', 'Random-index', 'Circular', 'Hard-Negative', + ]) + .default('Random-Noise') + .description('选择负样本生成策略'), + + noise_strength: Schema.number() + .step(0.5) + .min(0) + .max(10) + .default(1.0) + .description('噪音强度参数'), + + contrastive_weight: Schema.number() + .step(0.05) + .min(0) + .max(1) + .default(0.05) + .description('对比损失权重'), + + contrastive_warmup_steps: Schema.number() + .step(10) + .default(100) + .description('使用随机负样本的步数'), + }), + Schema.object({}), + ]), + ]), // 预览图设置 SHARED_SCHEMAS.PREVIEW_IMAGE, diff --git a/mikazuki/schema/lora-basic.ts b/mikazuki/schema/lora-basic.ts index 8ee03e62..28c02a89 100644 --- a/mikazuki/schema/lora-basic.ts +++ b/mikazuki/schema/lora-basic.ts @@ -46,7 +46,47 @@ Schema.intersect([ ]).default("AdamW8bit").description("优化器设置"), }) ]), - + Schema.intersect([ + // 对比学习主开关 + Schema.object({ + enable_contrastive: Schema.boolean() + .default(false) + .description('启用对比学习模块'), + }).description('对比学习配置'), + + // 当 enable_contrastive = true 时,才展示下面这些配置;否则只用空对象 + Schema.union([ + Schema.object({ + enable_contrastive: Schema.const(true).required(), + + negative_sampling_method: Schema.union([ + 'Random-Noise', 'Permutation', 'Random-index', 'Circular', 'Hard-Negative', + ]) + .default('Random-Noise') + .description('选择负样本生成策略'), + + noise_strength: Schema.number() + .step(0.5) + .min(0) + .max(10) + .default(1.0) + .description('噪音强度参数'), + + contrastive_weight: Schema.number() + .step(0.05) + .min(0) + .max(1) + .default(0.05) + .description('对比损失权重'), + + contrastive_warmup_steps: Schema.number() + .step(10) + .default(100) + .description('使用随机负样本的步数'), + }), + Schema.object({}), + ]), + ]), Schema.intersect([ Schema.object({ enable_preview: Schema.boolean().default(false).description('启用训练预览图'), diff --git a/mikazuki/schema/lora-master.ts b/mikazuki/schema/lora-master.ts index 50e731a8..10547622 100644 --- a/mikazuki/schema/lora-master.ts +++ b/mikazuki/schema/lora-master.ts @@ -55,6 +55,7 @@ Schema.intersect([ network_args_custom: Schema.array(String).role('table').description('自定义 network_args,一行一个'), enable_block_weights: Schema.boolean().default(false).description('启用分层学习率训练(只支持网络模块 networks.lora)'), enable_base_weight: Schema.boolean().default(false).description('启用基础权重(差异炼丹)'), + network_scale: Schema.number().step(0.05).default(1.0).description('网络缩放系数,应该小于等于1.0'), }).description("网络设置"), // lycoris 参数 @@ -69,7 +70,47 @@ Schema.intersect([ SHARED_SCHEMAS.NETWORK_OPTION_BASEWEIGHT, ]), - + Schema.intersect([ + // 对比学习主开关 + Schema.object({ + enable_contrastive: Schema.boolean() + .default(false) + .description('启用对比学习模块'), + }).description('对比学习配置'), + + // 当 enable_contrastive = true 时,才展示下面这些配置;否则只用空对象 + Schema.union([ + Schema.object({ + enable_contrastive: Schema.const(true).required(), + + negative_sampling_method: Schema.union([ + 'Random-Noise', 'Permutation', 'Random-index', 'Circular', 'Hard-Negative', + ]) + .default('Random-Noise') + .description('选择负样本生成策略'), + + noise_strength: Schema.number() + .step(0.5) + .min(0) + .max(10) + .default(1.0) + .description('噪音强度参数'), + + contrastive_weight: Schema.number() + .step(0.05) + .min(0) + .max(1) + .default(0.05) + .description('对比损失权重'), + + contrastive_warmup_steps: Schema.number() + .step(10) + .default(100) + .description('使用随机负样本的步数'), + }), + Schema.object({}), + ]), + ]), // 预览图设置 SHARED_SCHEMAS.PREVIEW_IMAGE, diff --git a/mikazuki/schema/lumina2-lora.ts b/mikazuki/schema/lumina2-lora.ts index 1233c987..86925dbc 100644 --- a/mikazuki/schema/lumina2-lora.ts +++ b/mikazuki/schema/lumina2-lora.ts @@ -56,6 +56,7 @@ Schema.intersect([ scale_weight_norms: Schema.number().step(0.01).min(0).default(1.0).description("最大范数正则化。如果使用,推荐为 1"), network_args_custom: Schema.array(String).role('table').description('自定义 network_args,一行一个'), enable_base_weight: Schema.boolean().default(false).description('启用基础权重(差异炼丹)'), + network_scale: Schema.number().step(0.05).default(1.0).description('网络缩放系数,应该小于等于1.0'), }).description("网络设置"), // lycoris 参数 @@ -64,7 +65,47 @@ Schema.intersect([ SHARED_SCHEMAS.NETWORK_OPTION_BASEWEIGHT, ]), - + Schema.intersect([ + // 对比学习主开关 + Schema.object({ + enable_contrastive: Schema.boolean() + .default(false) + .description('启用对比学习模块'), + }).description('对比学习配置'), + + // 当 enable_contrastive = true 时,才展示下面这些配置;否则只用空对象 + Schema.union([ + Schema.object({ + enable_contrastive: Schema.const(true).required(), + + negative_sampling_method: Schema.union([ + 'Random-Noise', 'Permutation', 'Random-index', 'Circular', 'Hard-Negative', + ]) + .default('Random-Noise') + .description('选择负样本生成策略'), + + noise_strength: Schema.number() + .step(0.5) + .min(0) + .max(10) + .default(1.0) + .description('噪音强度参数'), + + contrastive_weight: Schema.number() + .step(0.05) + .min(0) + .max(1) + .default(0.05) + .description('对比损失权重'), + + contrastive_warmup_steps: Schema.number() + .step(10) + .default(100) + .description('使用随机负样本的步数'), + }), + Schema.object({}), + ]), + ]), // 预览图设置 SHARED_SCHEMAS.PREVIEW_IMAGE, @@ -96,4 +137,4 @@ Schema.intersect([ // 分布式训练 SHARED_SCHEMAS.DISTRIBUTED_TRAINING -]); \ No newline at end of file +]); diff --git a/mikazuki/schema/sd3-lora.ts b/mikazuki/schema/sd3-lora.ts index aa033b9c..8d99e24a 100644 --- a/mikazuki/schema/sd3-lora.ts +++ b/mikazuki/schema/sd3-lora.ts @@ -25,7 +25,7 @@ Schema.intersect([ // 保存设置 SHARED_SCHEMAS.SAVE_SETTINGS, - + Schema.object({ max_train_epochs: Schema.number().min(1).default(20).description("最大训练 epoch(轮数)"), train_batch_size: Schema.number().min(1).default(1).description("批量大小, 越高显存占用越高"), @@ -46,6 +46,7 @@ Schema.intersect([ network_alpha: Schema.number().min(1).default(1).description("常用值:等于 network_dim 或 network_dim*1/2 或 1。使用较小的 alpha 需要提升学习率"), network_args_custom: Schema.array(String).role('table').description('自定义 network_args,一行一个'), enable_base_weight: Schema.boolean().default(false).description('启用基础权重(差异炼丹)'), + network_scale: Schema.number().step(0.05).default(1.0).description('网络缩放系数,应该小于等于1.0'), }).description("网络设置"), // lycoris 参数 @@ -54,7 +55,47 @@ Schema.intersect([ SHARED_SCHEMAS.NETWORK_OPTION_BASEWEIGHT, ]), - + Schema.intersect([ + // 对比学习主开关 + Schema.object({ + enable_contrastive: Schema.boolean() + .default(false) + .description('启用对比学习模块'), + }).description('对比学习配置'), + + // 当 enable_contrastive = true 时,才展示下面这些配置;否则只用空对象 + Schema.union([ + Schema.object({ + enable_contrastive: Schema.const(true).required(), + + negative_sampling_method: Schema.union([ + 'Random-Noise', 'Permutation', 'Random-index', 'Circular', 'Hard-Negative', + ]) + .default('Random-Noise') + .description('选择负样本生成策略'), + + noise_strength: Schema.number() + .step(0.5) + .min(0) + .max(10) + .default(1.0) + .description('噪音强度参数'), + + contrastive_weight: Schema.number() + .step(0.05) + .min(0) + .max(1) + .default(0.05) + .description('对比损失权重'), + + contrastive_warmup_steps: Schema.number() + .step(10) + .default(100) + .description('使用随机负样本的步数'), + }), + Schema.object({}), + ]), + ]), // 预览图设置 SHARED_SCHEMAS.PREVIEW_IMAGE, diff --git a/mikazuki/schema/shared.ts b/mikazuki/schema/shared.ts index 1ab52a6d..d2134a34 100644 --- a/mikazuki/schema/shared.ts +++ b/mikazuki/schema/shared.ts @@ -171,6 +171,7 @@ Schema.object({ optimizer_args_custom: Schema.array(String).role('table').description('自定义 optimizer_args,一行一个'), }) + ]), PREVIEW_IMAGE: Schema.intersect([ @@ -240,4 +241,4 @@ } return data -})() \ No newline at end of file +})() diff --git a/scripts/dev/contrastive.py b/scripts/dev/contrastive.py new file mode 100644 index 00000000..826f16f2 --- /dev/null +++ b/scripts/dev/contrastive.py @@ -0,0 +1,40 @@ +import torch + +def contrastive_target(latents, noise, method='Random-Noise', + noise_strength=0.1): + ''' + Generate Negative samples for contrastive learning. + Args: + latents: The input latent representations. + noise: The input noise. + method: The method used for generating negative samples. + noise_strength: The strength of the noise applied. + Returns: + latents_neg: The negative latent representations. + noise_neg: The negative noise. + ''' + if method == 'Random-Noise' or latents.shape[0] == 1: + latents_neg = latents + torch.randn_like(latents) * noise_strength + noise_neg = noise + torch.randn_like(noise) * noise_strength + elif method == 'Permutation': + perm = torch.randperm(latents.shape[0]) + latents_neg = latents[perm] + noise_neg = noise[perm] + elif method == 'Random-index': + idx = torch.randint(0, latents.shape[0], (latents.shape[0],)) + latents_neg = latents[idx] + noise_neg = noise[idx] + elif method == 'Circular': + perm = torch.arange(latents.shape[0]) + perm = torch.roll(perm, shifts=1) + latents_neg = latents[perm] + noise_neg = noise[perm] + elif method == 'Hard-Negative': + sim = torch.cdist(latents, latents, p=2) # 欧氏距离矩阵 + sim.fill_diagonal_(float('inf')) # 忽略自身 + idx_hard = sim.argmin(dim=1) + latents_neg = latents[idx_hard] + noise_neg = noise[idx_hard] + else: + raise ValueError(f'Unknown method: {method}') + return latents_neg, noise_neg diff --git a/scripts/dev/flux_train.py b/scripts/dev/flux_train.py index 6f98adea..0d42dfc6 100644 --- a/scripts/dev/flux_train.py +++ b/scripts/dev/flux_train.py @@ -19,7 +19,7 @@ import time from typing import List, Optional, Tuple, Union import toml - +from contrastive import contrastive_target from tqdm import tqdm import torch @@ -665,10 +665,17 @@ def grad_hook(parameter: torch.Tensor): # flow matching loss: this is different from SD3 target = noise - latents - + + model_pred = model_pred*args.network_scale # calculate loss huber_c = train_util.get_huber_threshold_if_needed(args, timesteps, noise_scheduler) loss = train_util.conditional_loss(model_pred.float(), target.float(), args.loss_type, "none", huber_c) + if args.enable_contrastive and epoch>= args.contrastive_warmup_steps: + latents_neg, noise_neg = contrastive_target(latents, noise, method=args.negative_sampling_method,noise_strength=args.noise_strength) + target_neg = noise_neg - latents_neg + loss_neg = train_util.conditional_loss( + noise_pred_neg.float(), target_neg.float(), args.loss_type, "none", huber_c) + loss = loss - args.contrastive_weight*loss_neg if weighting is not None: loss = loss * weighting if args.masked_loss or ("alpha_masks" in batch and batch["alpha_masks"] is not None): diff --git a/scripts/dev/train_network.py b/scripts/dev/train_network.py index 2d279b3b..c5bfd432 100644 --- a/scripts/dev/train_network.py +++ b/scripts/dev/train_network.py @@ -11,7 +11,7 @@ from multiprocessing import Value import numpy as np import toml - +from contrastive import contrastive_target from tqdm import tqdm import torch @@ -441,9 +441,10 @@ def process_batch( train_unet, is_train=is_train, ) - + noise_pred = noise_pred*args.network_scale huber_c = train_util.get_huber_threshold_if_needed(args, timesteps, noise_scheduler) loss = train_util.conditional_loss(noise_pred.float(), target.float(), args.loss_type, "none", huber_c) + if weighting is not None: loss = loss * weighting if args.masked_loss or ("alpha_masks" in batch and batch["alpha_masks"] is not None): diff --git a/scripts/stable/contrastive.py b/scripts/stable/contrastive.py new file mode 100644 index 00000000..826f16f2 --- /dev/null +++ b/scripts/stable/contrastive.py @@ -0,0 +1,40 @@ +import torch + +def contrastive_target(latents, noise, method='Random-Noise', + noise_strength=0.1): + ''' + Generate Negative samples for contrastive learning. + Args: + latents: The input latent representations. + noise: The input noise. + method: The method used for generating negative samples. + noise_strength: The strength of the noise applied. + Returns: + latents_neg: The negative latent representations. + noise_neg: The negative noise. + ''' + if method == 'Random-Noise' or latents.shape[0] == 1: + latents_neg = latents + torch.randn_like(latents) * noise_strength + noise_neg = noise + torch.randn_like(noise) * noise_strength + elif method == 'Permutation': + perm = torch.randperm(latents.shape[0]) + latents_neg = latents[perm] + noise_neg = noise[perm] + elif method == 'Random-index': + idx = torch.randint(0, latents.shape[0], (latents.shape[0],)) + latents_neg = latents[idx] + noise_neg = noise[idx] + elif method == 'Circular': + perm = torch.arange(latents.shape[0]) + perm = torch.roll(perm, shifts=1) + latents_neg = latents[perm] + noise_neg = noise[perm] + elif method == 'Hard-Negative': + sim = torch.cdist(latents, latents, p=2) # 欧氏距离矩阵 + sim.fill_diagonal_(float('inf')) # 忽略自身 + idx_hard = sim.argmin(dim=1) + latents_neg = latents[idx_hard] + noise_neg = noise[idx_hard] + else: + raise ValueError(f'Unknown method: {method}') + return latents_neg, noise_neg diff --git a/scripts/stable/train_network.py b/scripts/stable/train_network.py index 7bf125dc..cbaf787a 100644 --- a/scripts/stable/train_network.py +++ b/scripts/stable/train_network.py @@ -8,7 +8,7 @@ import json from multiprocessing import Value import toml - +from contrastive import contrastive_target from tqdm import tqdm import torch @@ -973,7 +973,7 @@ def remove_model(old_ckpt_name): text_encoder_conds, batch, weight_dtype, - ) + )*args.network_scale if args.v_parameterization: # v-parameterization training @@ -984,6 +984,19 @@ def remove_model(old_ckpt_name): loss = train_util.conditional_loss( noise_pred.float(), target.float(), reduction="none", loss_type=args.loss_type, huber_c=huber_c ) + if args.enable_contrastive and epoch>= args.contrastive_warmup_steps: + latents_neg, noise_neg = contrastive_target(latents, noise, method=args.negative_sampling_method,noise_strength=args.noise_strength) + if args.v_parameterization: + # v-parameterization training + target_neg = noise_scheduler.get_velocity(latents_neg, noise_neg, timesteps) + else: + target_neg = noise_neg + loss_neg = train_util.conditional_loss( + noise_pred_neg.float(), target_neg.float(), reduction="none", loss_type=args.loss_type, huber_c=huber_c + ) + loss = loss - args.contrastive_weight*loss_neg + + if args.masked_loss or ("alpha_masks" in batch and batch["alpha_masks"] is not None): loss = apply_masked_loss(loss, batch) loss = loss.mean([1, 2, 3])