LAMSC/neural_nets.py at master · jiangfeibo/LAMSC · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchsummary import summary
import numpy as np
from torch.autograd import Variable

# definition of SC model with ASC
class SCNet(nn.Module):
    def __init__(self,input_dim=3, ASC=False):
        super(SCNet, self).__init__()
        self.conv1 = nn.Conv2d(input_dim, 128, kernel_size=5,bias=False)
        self.pool = nn.MaxPool2d((2, 2),return_indices=True)
        self.conv2 = nn.Conv2d(128, 32, kernel_size=5,bias=False)
        self.use_ASC = ASC
        self.Mask = MaskNet(32) # mask network
        self.convt1= nn.ConvTranspose2d(32, 128, kernel_size=5)
        self.convt2 = nn.ConvTranspose2d(128, input_dim, kernel_size=5)
        self.uppool = nn.MaxUnpool2d(2, 2)

    def forward(self, x = None, latent = None):
        if latent == None:
            x = F.leaky_relu(self.conv1(x))
            x, self.indices1 = self.pool(x)
            x = F.leaky_relu(self.conv2(x))
            x, self.indices2 = self.pool(x)
            self.x_shape = x.shape
            if self.use_ASC: # using masknet to mask semantics
                x = self.Mask(x)
            latent = x.view(x.size(0), -1)
            return latent
        else:
            x = latent.view(self.x_shape)
            x = self.uppool(x,self.indices2)
            x = F.leaky_relu(self.convt1(x))
            x = self.uppool(x,self.indices1)
            x = F.tanh(self.convt2(x))
            return x

# definition of the mask network
class MaskNet(nn.Module):
    def __init__(self,input_dim=32):
        super(MaskNet, self).__init__()
        self.conv1 = nn.Conv2d(input_dim, 128, kernel_size=3,padding=1)
        self.conv2 = nn.Conv2d(128, input_dim, kernel_size=3,padding=1)

    def forward(self, x):
        y = self.conv1(x)
        y = F.relu(y)
        mask = self.conv2(y) + torch.abs(x)
        # mask = torch.sign(mask)
        # mask = F.relu(mask)
        mask = (mask > 0).float()
        x = torch.mul(x, mask)
        # print(x.shape)
        # index = torch.where(x!=0)
        # retain_x = x[index]
        # print("compression bit:", retain_x.element_size() * retain_x.nelement())
        return x

# definition of the channel network in ASI
class ChannelAttention(nn.Module):
    def __init__(self, in_dims):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.conv1 = nn.Conv2d(in_dims, 128, 1, bias=False)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(128, in_dims, 1, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = self.conv2(self.relu(self.conv1(self.avg_pool(x))))
        max_out = self.conv2(self.relu(self.conv1(self.max_pool(x))))
        out = avg_out + max_out
        return self.sigmoid(out)

# definition of the spatial network in ASI
class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7):
        super(SpatialAttention, self).__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=(kernel_size - 1) // 2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        x = self.conv(x)
        return self.sigmoid(x)

# definition of the attention network in ASI
class AttentionNet(nn.Module):
    def __init__(self, in_dims=5*3, kernel_size=7):
        super(AttentionNet, self).__init__()
        self.ca = ChannelAttention(in_dims)
        self.sa = SpatialAttention(kernel_size)
        self.out1 = nn.Linear(61440,128)
        self.out2 = nn.Linear(128,in_dims//3)

    def forward(self, x):
        x = x * self.ca(x)
        x = x * self.sa(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.out1(x))
        x = F.sigmoid(self.out2(x))
        return x


if __name__ == '__main__':
    # net = SCNet()
    # net.to("cuda")
    # summary(net,(3,64,64),device="cuda")

    net = AttentionNet(in_dims=5*3)
    net.to("cuda")
    summary(net, (15, 64, 64), device="cuda")