Skip to content

Commit cfdca9b

Browse files
committed
Change to .Net6, so that it can be used easily.
1 parent 17a5b60 commit cfdca9b

14 files changed

Lines changed: 83 additions & 79 deletions

File tree

StableDiffusionDemo_Console/Program.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ internal class Program
66
{
77
static void Main(string[] args)
88
{
9-
string sdModelPath = @".\tPonynai3V4.safetensors";
10-
string vaeModelPath = @".\sdxl.vae.safetensors";
9+
string sdModelPath = @".\Chilloutmix.safetensors";
10+
string vaeModelPath = @".\vae.safetensors";
1111

1212
string esrganModelPath = @".\RealESRGAN_x4plus.pth";
1313
string i2iPrompt = "High quality, best quality, moon, grass, tree, boat.";

StableDiffusionDemo_Console/StableDiffusionDemo_Console.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
<PropertyGroup>
44
<OutputType>Exe</OutputType>
5-
<TargetFramework>net8.0</TargetFramework>
5+
<TargetFramework>net6.0</TargetFramework>
66
<ImplicitUsings>enable</ImplicitUsings>
77
<Nullable>enable</Nullable>
88
</PropertyGroup>

StableDiffusionDemo_Winform/FormMain.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,11 @@ private void Button_ModelLoad_Click(object sender, EventArgs e)
3939
SDScalarType scalarType = ComboBox_Precition.SelectedIndex == 0 ? SDScalarType.Float16 : SDScalarType.Float32;
4040
Task.Run(() =>
4141
{
42-
base.Invoke(() => Button_ModelLoad.Enabled = false);
42+
base.Invoke(() =>
43+
{
44+
Button_ModelLoad.Enabled = false;
45+
Button_Generate.Enabled = false;
46+
});
4347
sd = new StableDiffusion(deviceType, scalarType);
4448
sd.StepProgress += Sd_StepProgress;
4549
sd.LoadModel(modelPath, vaeModelPath);

StableDiffusionDemo_Winform/StableDiffusionDemo_Winform.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
<PropertyGroup>
44
<OutputType>WinExe</OutputType>
5-
<TargetFramework>net8.0-windows</TargetFramework>
5+
<TargetFramework>net6.0-windows7.0</TargetFramework>
66
<Nullable>enable</Nullable>
77
<UseWindowsForms>true</UseWindowsForms>
88
<ImplicitUsings>enable</ImplicitUsings>

StableDiffusionSharp/Modules/Clip.cs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ private class CLIPTextEmbeddings : Module<Tensor, Tensor>
8080
private readonly Parameter position_ids;
8181
public CLIPTextEmbeddings(long n_vocab, long n_embd, long n_token, Device? device = null, ScalarType? dtype = null) : base(nameof(CLIPTextEmbeddings))
8282
{
83-
position_ids = Parameter(zeros(size: [1, n_token], device: device, dtype: dtype));
83+
position_ids = Parameter(zeros(size: new long[] { 1, n_token }, device: device, dtype: dtype));
8484
token_embedding = Embedding(n_vocab, n_embd, device: device, dtype: dtype);
8585
position_embedding = Embedding(n_token, n_embd, device: device, dtype: dtype);
8686
RegisterComponents();
@@ -269,8 +269,8 @@ private class ViT_bigG_Clip : Module<Tensor, int, bool, bool, Tensor>
269269
public ViT_bigG_Clip(long n_vocab = 49408, long n_token = 77, long num_layers = 32, long n_heads = 20, long embed_dim = 1280, long intermediate_size = 1280 * 4, Device? device = null, ScalarType? dtype = null) : base(nameof(ViT_bigG_Clip))
270270
{
271271
token_embedding = Embedding(n_vocab, embed_dim, device: device, dtype: dtype);
272-
positional_embedding = Parameter(zeros(size: [n_token, embed_dim], device: device, dtype: dtype));
273-
text_projection = Parameter(zeros(size: [embed_dim, embed_dim], device: device, dtype: dtype));
272+
positional_embedding = Parameter(zeros(size: new long[] { n_token, embed_dim }, device: device, dtype: dtype));
273+
text_projection = Parameter(zeros(size: new long[] { embed_dim, embed_dim }, device: device, dtype: dtype));
274274
transformer = new Transformer(num_layers, embed_dim, n_heads, intermediate_size, Activations.GELU, device: device, dtype: dtype);
275275
ln_final = LayerNorm(embed_dim, device: device, dtype: dtype);
276276
RegisterComponents();
@@ -394,8 +394,8 @@ private class MultiheadAttention : Module<Tensor, Tensor>
394394
public MultiheadAttention(long embed_dim, long heads, Device? device = null, ScalarType? dtype = null) : base(nameof(MultiheadAttention))
395395
{
396396
this.heads = heads;
397-
in_proj_weight = Parameter(zeros([3 * embed_dim, embed_dim], device: device, dtype: dtype));
398-
in_proj_bias = Parameter(zeros([3 * embed_dim], device: device, dtype: dtype));
397+
in_proj_weight = Parameter(zeros(new long[] { 3 * embed_dim, embed_dim }, device: device, dtype: dtype));
398+
in_proj_bias = Parameter(zeros(new long[] { 3 * embed_dim }, device: device, dtype: dtype));
399399
out_proj = Linear(embed_dim, embed_dim, hasBias: true, device: device, dtype: dtype);
400400

401401
RegisterComponents();
@@ -476,7 +476,7 @@ public override (Tensor, Tensor) forward(Tensor token, long num_skip)
476476
{
477477
Device device = cond_stage_model.parameters().First().device;
478478
long padLength = n_token - token.shape[1];
479-
Tensor token1 = functional.pad(token, [0, padLength, 0, 0], value: endToken);
479+
Tensor token1 = functional.pad(token, new long[] { 0, padLength, 0, 0 }, value: endToken);
480480
return (cond_stage_model.forward(token1, num_skip, true).MoveToOuterDisposeScope(), zeros(1).MoveToOuterDisposeScope());
481481
}
482482
}
@@ -516,13 +516,13 @@ public override (Tensor, Tensor) forward(Tensor token)
516516
using (NewDisposeScope())
517517
{
518518
long padLength = n_token - token.shape[1];
519-
Tensor token1 = functional.pad(token, [0, padLength, 0, 0], value: endToken);
520-
Tensor token2 = functional.pad(token, [0, padLength, 0, 0]);
519+
Tensor token1 = functional.pad(token, new long[] { 0, padLength, 0, 0 }, value: endToken);
520+
Tensor token2 = functional.pad(token, new long[] { 0, padLength, 0, 0 });
521521

522522
Tensor vit_l_result = ((ViT_L_Clip)embedders[0]).forward(token1, 1, false);
523523
Tensor vit_bigG_result = ((Model)embedders[1]).forward(token2, 1, false, false);
524524
Tensor vit_bigG_vec = ((Model)embedders[1]).forward(token2, 0, false, true);
525-
Tensor crossattn = cat([vit_l_result, vit_bigG_result], -1);
525+
Tensor crossattn = cat(new Tensor[] { vit_l_result, vit_bigG_result }, -1);
526526
return (crossattn.MoveToOuterDisposeScope(), vit_bigG_vec.MoveToOuterDisposeScope());
527527
}
528528
}

StableDiffusionSharp/Modules/Esrgan.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,10 @@ public override Tensor forward(Tensor x)
5353
using (NewDisposeScope())
5454
{
5555
Tensor x1 = lrelu.forward(conv1.forward(x));
56-
Tensor x2 = lrelu.forward(conv2.forward(cat([x, x1], 1)));
57-
Tensor x3 = lrelu.forward(conv3.forward(cat([x, x1, x2], 1)));
58-
Tensor x4 = lrelu.forward(conv4.forward(cat([x, x1, x2, x3], 1)));
59-
Tensor x5 = conv5.forward(cat([x, x1, x2, x3, x4], 1));
56+
Tensor x2 = lrelu.forward(conv2.forward(cat(new Tensor[] { x, x1 }, 1)));
57+
Tensor x3 = lrelu.forward(conv3.forward(cat(new Tensor[] { x, x1, x2 }, 1)));
58+
Tensor x4 = lrelu.forward(conv4.forward(cat(new Tensor[] { x, x1, x2, x3 }, 1)));
59+
Tensor x5 = conv5.forward(cat(new Tensor[] { x, x1, x2, x3, x4 }, 1));
6060
// Empirically, we use 0.2 to scale the residual for better performance
6161
return (x5 * 0.2 + x).MoveToOuterDisposeScope();
6262
}
@@ -153,8 +153,8 @@ public override Tensor forward(Tensor x)
153153
Tensor body_feat = conv_body.forward(body.forward(feat));
154154
feat = feat + body_feat;
155155
// upsample
156-
feat = lrelu.forward(conv_up1.forward(functional.interpolate(feat, scale_factor: [2, 2], mode: InterpolationMode.Nearest)));
157-
feat = lrelu.forward(conv_up2.forward(functional.interpolate(feat, scale_factor: [2, 2], mode: InterpolationMode.Nearest)));
156+
feat = lrelu.forward(conv_up1.forward(functional.interpolate(feat, scale_factor: new double[] { 2, 2 }, mode: InterpolationMode.Nearest)));
157+
feat = lrelu.forward(conv_up2.forward(functional.interpolate(feat, scale_factor: new double[] { 2, 2 }, mode: InterpolationMode.Nearest)));
158158
Tensor @out = conv_last.forward(lrelu.forward(conv_hr.forward(feat)));
159159
return @out.MoveToOuterDisposeScope();
160160
}

StableDiffusionSharp/Modules/SDModel.cs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ public SDModel(Device? device = null, ScalarType? dtype = null)
7575

7676
public virtual void LoadModel(string modelPath, string vaeModelPath, string vocabPath = @".\models\clip\vocab.json", string mergesPath = @".\models\clip\merges.txt")
7777
{
78+
is_loaded = false;
7879
ModelType modelType = ModelLoader.ModelLoader.GetModelType(modelPath);
7980

8081
cliper = modelType switch
@@ -142,8 +143,8 @@ private static Tensor GetTimeEmbedding(Tensor timestep, int max_period = 10000,
142143
int half = dim / 2;
143144
var freqs = torch.pow(max_period, -torch.arange(0, half, dtype: torch.float32) / half);
144145
var x = timestep * freqs.unsqueeze(0);
145-
x = torch.cat([x, x]);
146-
return torch.cat([torch.cos(x), torch.sin(x)], dim: -1);
146+
x = torch.cat(new Tensor[] { x, x });
147+
return torch.cat(new Tensor[] { torch.cos(x), torch.sin(x) }, dim: -1);
147148
}
148149
}
149150

@@ -159,10 +160,10 @@ private static Tensor GetTimeEmbedding(Tensor timestep, int max_period = 10000,
159160
(Tensor cond_context, Tensor cond_pooled) = cliper.forward(cond_tokens, clip_skip);
160161
Tensor uncond_tokens = tokenizer.Tokenize(nprompt).to(device);
161162
(Tensor uncond_context, Tensor uncond_pooled) = cliper.forward(uncond_tokens, clip_skip);
162-
Tensor context = cat([cond_context, uncond_context]);
163+
Tensor context = cat(new Tensor[] { cond_context, uncond_context });
163164
tempPromptHash = (prompt + nprompt).GetHashCode();
164165
tempTextContext = context;
165-
tempPooled = cat([cond_pooled, uncond_pooled]);
166+
tempPooled = cat(new Tensor[] { cond_pooled, uncond_pooled });
166167
tempTextContext = tempTextContext.MoveToOuterDisposeScope();
167168
tempPooled = tempPooled.MoveToOuterDisposeScope();
168169
}
@@ -212,7 +213,7 @@ public virtual ImageMagick.MagickImage TextToImage(string prompt, string nprompt
212213
(Tensor context, Tensor vector) = Clip(prompt, nprompt, clip_skip);
213214
using var _ = NewDisposeScope();
214215
Console.WriteLine("Getting latents......");
215-
Tensor latents = randn([1, 4, height, width]).to(dtype, device);
216+
Tensor latents = randn(new long[] { 1, 4, height, width }).to(dtype, device);
216217

217218
BasicSampler sampler = samplerType switch
218219
{

StableDiffusionSharp/Modules/Tokenizer.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
using Microsoft.ML.Tokenizers;
22
using System.Reflection;
3-
using TorchSharp;
43
using static TorchSharp.torch;
54

65
namespace StableDiffusionSharp.Modules

StableDiffusionSharp/Modules/Unet.cs

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public override Tensor forward(Tensor x, Tensor y)
3838
long batch_size = input_shape[0];
3939
long sequence_length = input_shape[1];
4040

41-
long[] interim_shape = [batch_size, -1, n_heads_, d_head];
41+
long[] interim_shape = new long[] { batch_size, -1, n_heads_, d_head };
4242
Tensor q = to_q.forward(x);
4343
Tensor k = to_k.forward(y);
4444
Tensor v = to_v.forward(y);
@@ -190,7 +190,7 @@ public override Tensor forward(Tensor x, Tensor context)
190190
x = proj_in.forward(x);
191191
}
192192

193-
x = x.view([n, c, h * w]);
193+
x = x.view(new long[] { n, c, h * w });
194194
x = x.transpose(-1, -2);
195195

196196
if (use_linear)
@@ -208,7 +208,7 @@ public override Tensor forward(Tensor x, Tensor context)
208208
x = proj_out.forward(x);
209209
}
210210
x = x.transpose(-1, -2);
211-
x = x.view([n, c, h, w]);
211+
x = x.view(new long[] { n, c, h, w });
212212
if (!use_linear)
213213
{
214214
x = proj_out.forward(x);
@@ -241,7 +241,7 @@ public Upsample(int in_channels, bool with_conv = true, Device? device = null, S
241241
}
242242
public override Tensor forward(Tensor x)
243243
{
244-
var output = functional.interpolate(x, scale_factor: [2.0, 2.0], mode: InterpolationMode.Nearest);
244+
var output = functional.interpolate(x, scale_factor: new double[] { 2.0, 2.0 }, mode: InterpolationMode.Nearest);
245245
if (with_conv && conv is not null)
246246
{
247247
output = conv.forward(output);
@@ -359,19 +359,19 @@ private class UNet : Module<Tensor, Tensor, Tensor, Tensor>
359359
public UNet(int model_channels, int in_channels, int[]? channel_mult = null, int num_res_blocks = 2, int num_atten_blocks = 1, int context_dim = 768, int num_heads = 8, float dropout = 0.0f, bool use_timestep = true, Device? device = null, ScalarType? dtype = null) : base(nameof(UNet))
360360
{
361361
bool mask = false;
362-
channel_mult = channel_mult ?? [1, 2, 4, 4];
362+
channel_mult = channel_mult ?? new int[] { 1, 2, 4, 4 };
363363

364364
ch = model_channels;
365365
time_embed_dim = model_channels * 4;
366366
this.in_channels = in_channels;
367367
this.use_timestep = use_timestep;
368368

369-
List<int> input_block_channels = [model_channels];
369+
List<int> input_block_channels = new List<int> { model_channels };
370370

371371
if (use_timestep)
372372
{
373373
// timestep embedding
374-
time_embed = Sequential([Linear(model_channels, time_embed_dim, device: device, dtype: dtype), SiLU(), Linear(time_embed_dim, time_embed_dim, device: device, dtype: dtype)]);
374+
time_embed = Sequential(new Module<Tensor, Tensor>[] { Linear(model_channels, time_embed_dim, device: device, dtype: dtype), SiLU(), Linear(time_embed_dim, time_embed_dim, device: device, dtype: dtype) });
375375
}
376376

377377
// downsampling
@@ -462,7 +462,7 @@ public override Tensor forward(Tensor x, Tensor context, Tensor time)
462462
foreach (TimestepEmbedSequential layers in output_blocks)
463463
{
464464
Tensor index = skip_connections.Last();
465-
x = cat([x, index], 1);
465+
x = cat(new Tensor[] { x, index }, 1);
466466
skip_connections.RemoveAt(skip_connections.Count - 1);
467467
x = layers.forward(x, context, time);
468468
}
@@ -528,7 +528,7 @@ private class UNet : Module<Tensor, Tensor, Tensor, Tensor, Tensor>
528528

529529
public UNet(int model_channels, int in_channels, int[]? channel_mult = null, int num_res_blocks = 2, int context_dim = 768, int adm_in_channels = 2816, int num_heads = 20, float dropout = 0.0f, bool use_timestep = true, Device? device = null, ScalarType? dtype = null) : base(nameof(SDUnet))
530530
{
531-
channel_mult = channel_mult ?? [1, 2, 4];
531+
channel_mult = channel_mult ?? new int[] { 1, 2, 4 };
532532

533533
ch = model_channels;
534534
time_embed_dim = model_channels * 4;
@@ -538,7 +538,7 @@ public UNet(int model_channels, int in_channels, int[]? channel_mult = null, int
538538
bool useLinear = true;
539539
bool mask = false;
540540

541-
List<int> input_block_channels = [model_channels];
541+
List<int> input_block_channels = new List<int> { model_channels };
542542

543543
if (use_timestep)
544544
{
@@ -590,10 +590,10 @@ public override Tensor forward(Tensor x, Tensor context, Tensor time, Tensor y)
590590
{
591591
int dim = 512;
592592
Tensor embed = time_embed.forward(time);
593-
Tensor time_ids = tensor(new float[] { dim, dim, 0, 0, dim, dim }, embed.dtype, embed.device).repeat([2, 1]);
593+
Tensor time_ids = tensor(new float[] { dim, dim, 0, 0, dim, dim }, embed.dtype, embed.device).repeat(new long[] { 2, 1 });
594594
Tensor time_embeds = get_timestep_embedding(time_ids.flatten(), dim / 2, true, 0, 1);
595-
time_embeds = time_embeds.reshape([2, -1]);
596-
y = cat([y, time_embeds], dim: -1);
595+
time_embeds = time_embeds.reshape(new long[] { 2, -1 });
596+
y = cat(new Tensor[] { y, time_embeds }, dim: -1);
597597
Tensor label_embed = label_emb.forward(y.to(embed.dtype, embed.device));
598598
embed = embed + label_embed;
599599

@@ -607,7 +607,7 @@ public override Tensor forward(Tensor x, Tensor context, Tensor time, Tensor y)
607607
foreach (TimestepEmbedSequential layers in output_blocks)
608608
{
609609
Tensor index = skip_connections.Last();
610-
x = cat([x, index], 1);
610+
x = cat(new Tensor[] { x, index }, 1);
611611
skip_connections.RemoveAt(skip_connections.Count - 1);
612612
x = layers.forward(x, context, embed);
613613
}
@@ -685,12 +685,12 @@ private static Tensor get_timestep_embedding(Tensor timesteps, int embedding_dim
685685
emb = scale * emb;
686686

687687
// concat sine and cosine embeddings
688-
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim: -1);
688+
emb = torch.cat(new Tensor[] { torch.sin(emb), torch.cos(emb) }, dim: -1);
689689

690690
// flip sine and cosine embeddings
691691
if (flip_sin_to_cos)
692692
{
693-
emb = torch.cat([emb[.., half_dim..], emb[.., ..half_dim]], dim: -1);
693+
emb = torch.cat(new Tensor[] { emb[.., half_dim..], emb[.., ..half_dim] }, dim: -1);
694694
}
695695

696696
// zero pad

StableDiffusionSharp/Modules/VAE.cs

Lines changed: 6 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ public override Tensor forward(Tensor x)
126126
{
127127
if (with_conv && conv != null)
128128
{
129-
long[] pad = [0, 1, 0, 1];
129+
long[] pad = new long[] { 0, 1, 0, 1 };
130130
x = functional.pad(x, pad, mode: PaddingModes.Constant, value: 0);
131131
x = conv.forward(x);
132132
}
@@ -153,7 +153,7 @@ public Upsample(int in_channels, bool with_conv = true, Device? device = null, S
153153
}
154154
public override Tensor forward(Tensor x)
155155
{
156-
var output = functional.interpolate(x, scale_factor: [2.0, 2.0], mode: InterpolationMode.Nearest);
156+
var output = functional.interpolate(x, scale_factor: new double[] { 2.0, 2.0 }, mode: InterpolationMode.Nearest);
157157
if (with_conv && conv != null)
158158
{
159159
output = conv.forward(output);
@@ -180,15 +180,16 @@ private class VAEEncoder : Module<Tensor, Tensor>
180180
public VAEEncoder(int ch = 128, int[]? ch_mult = null, int num_res_blocks = 2, int in_channels = 3, int z_channels = 16, bool double_z = true, Device? device = null, ScalarType? dtype = null) : base(nameof(VAEEncoder))
181181
{
182182
this.double_z = double_z;
183-
ch_mult ??= [1, 2, 4, 4];
183+
ch_mult ??= new int[] { 1, 2, 4, 4 };
184184
num_resolutions = ch_mult.Length;
185185
this.num_res_blocks = num_res_blocks;
186186

187187
// Input convolution
188188
conv_in = Conv2d(in_channels, ch, kernel_size: 3, stride: 1, padding: 1, device: device, dtype: dtype);
189189

190190
// Downsampling layers
191-
in_ch_mult = [1, .. ch_mult];
191+
in_ch_mult = new List<int> { 1 };
192+
in_ch_mult.AddRange(ch_mult);
192193
down = Sequential();
193194

194195
block_in = ch * in_ch_mult[0];
@@ -267,7 +268,7 @@ private class VAEDecoder : Module<Tensor, Tensor>
267268

268269
public VAEDecoder(int ch = 128, int out_ch = 3, int[]? ch_mult = null, int num_res_blocks = 2, int resolution = 256, int z_channels = 16, Device? device = null, ScalarType? dtype = null) : base(nameof(VAEDecoder))
269270
{
270-
ch_mult ??= [1, 2, 4, 4];
271+
ch_mult ??= new int[] { 1, 2, 4, 4 };
271272
num_resolutions = ch_mult.Length;
272273
this.num_res_blocks = num_res_blocks;
273274
int block_in = ch * ch_mult[num_resolutions - 1];
@@ -378,33 +379,5 @@ public override Tensor forward(Tensor input)
378379
return first_stage_model.forward(input);
379380
}
380381
}
381-
382-
private static long GetVideoCardMemory()
383-
{
384-
if (!cuda.is_available())
385-
{
386-
return 0;
387-
}
388-
else
389-
{
390-
using (var factory = new SharpDX.DXGI.Factory1())
391-
{
392-
var adapter = factory.Adapters[0];
393-
using (var adapter3 = adapter.QueryInterface<SharpDX.DXGI.Adapter3>())
394-
{
395-
if (adapter3 == null)
396-
{
397-
throw new ArgumentException($"Adapter {adapter.Description.Description} not support");
398-
}
399-
var memoryInfo = adapter3.QueryVideoMemoryInfo(0, SharpDX.DXGI.MemorySegmentGroup.Local);
400-
long totalVRAM = adapter.Description.DedicatedVideoMemory;
401-
long usedVRAM = memoryInfo.CurrentUsage;
402-
long freeVRAM = memoryInfo.Budget - usedVRAM;
403-
return freeVRAM;
404-
}
405-
}
406-
}
407-
}
408-
409382
}
410383
}

0 commit comments

Comments
 (0)