add comment

dontLoveBugs · dontLoveBugs · commit 2725103ea937 · 2019-02-20T21:25:15.000+08:00
diff --git a/main.py b/main.py
@@ -3,4 +3,6 @@
  @Time    : 2019/2/19 16:06
  @Author  : Wang Xin
  @Email   : wangxin_buaa@163.com
-"""
+"""
+
+# TODO: 手写体数字识别
diff --git a/network/deform_conv/deform_conv.py b/network/deform_conv/deform_conv.py
@@ -11,7 +11,6 @@
 import torch.nn as nn
 from torch.autograd import Variable
 
-
 """
     https://github.com/ChunhuanLin/deform_conv_pytorch/blob/master/deform_conv.py
 """
@@ -45,47 +44,119 @@ def forward(self, x, offset):
         # (b, 2N, h, w)
         p = self._get_p(offset, dtype)
 
+        print('p size:', p.size())
+        print('p = ', p)
+
         # (b, h, w, 2N)
         p = p.contiguous().permute(0, 2, 3, 1)
+
+        print('p size:', p.size())
+
+        """
+            if q is float, using bilinear interpolate, it has four integer position.
+            The four position is left top, right top, left bottom, right bottom, defined as q_lt, q_rb, q_lb, q_rt
+        """
         q_lt = Variable(p.data, requires_grad=False).floor()
+
+        """
+            *┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄→x
+            ┊  .(x, y)   .(x+1, y)
+            ┊   
+            ┊  .(x, y+1) .(x+1, y+1)
+            ┊
+            ↓
+            y
+            
+            for right bottom point, it'x = left top'y + 1, it'y = left top'y + 1
+        """
         q_rb = q_lt + 1
 
+        """
+            x.size(2) is h, x.size(3) is w, make 0 <= p_y <= h - 1, 0 <= p_x <= w-1
+        """
         q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2) - 1), torch.clamp(q_lt[..., N:], 0, x.size(3) - 1)],
                          dim=-1).long()
+
+        """
+        
+        """
         q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2) - 1), torch.clamp(q_rb[..., N:], 0, x.size(3) - 1)],
                          dim=-1).long()
+
+        """
+            For the left bottom point, it'x is equal to left top, it'y is equal to right bottom
+        """
         q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], -1)
+
+        """
+            y from q_rb, x from q_lt
+            for right top point, 
+        """
         q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], -1)
 
+        print('q_lt size:', q_lt.size())
+        print('q_rb size:', q_rb.size())
+        print('q_lb size:', q_lb.size())
+        print('q_rt size:', q_rt.size())
+        print('N = ', N)
+        print('q_lt[..., :] size:', q_lt[..., :N].size())
+
+
+        """
+            find p_y <= padding or p_y >= h - 1 - padding, find p_x <= padding or p_x >= x - 1 - padding
+            which make the point in the area where the pixel value is meaningful.
+        """
         # (b, h, w, N)
         mask = torch.cat([p[..., :N].lt(self.padding) + p[..., :N].gt(x.size(2) - 1 - self.padding),
                           p[..., N:].lt(self.padding) + p[..., N:].gt(x.size(3) - 1 - self.padding)], dim=-1).type_as(p)
         mask = mask.detach()
-        floor_p = p - (p - torch.floor(p))
+        print('mask:', mask)
+
+        floor_p = torch.floor(p)
+        print('floor_p = ', floor_p)
+
+
+        """
+           when mask is 1, take floor_p;
+           when mask is 0, take original p.
+        """
         p = p * (1 - mask) + floor_p * mask
         p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2) - 1), torch.clamp(p[..., N:], 0, x.size(3) - 1)], dim=-1)
 
+        """
+            In the paper, G(q, p) = g(q_x, p_x) * g(q_y, p_y)
+            g(a, b) = max(0, 1-|a-b|)
+        """
         # bilinear kernel (b, h, w, N)
         g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:]))
         g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:]))
         g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:]))
         g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:]))
 
+        print('g_lt = ', g_lt)
+
         # (b, c, h, w, N)
         x_q_lt = self._get_x_q(x, q_lt, N)
         x_q_rb = self._get_x_q(x, q_rb, N)
         x_q_lb = self._get_x_q(x, q_lb, N)
         x_q_rt = self._get_x_q(x, q_rt, N)
 
+
+        """
+            In the paper, x(p) = ΣG(p, q) * x(q), G is bilinear kernal
+        """
         # (b, c, h, w, N)
         x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \
                    g_rb.unsqueeze(dim=1) * x_q_rb + \
                    g_lb.unsqueeze(dim=1) * x_q_lb + \
                    g_rt.unsqueeze(dim=1) * x_q_rt
 
+        print('#01 x_offset size:', x_offset.size())
+
         x_offset = self._reshape_x_offset(x_offset, ks)
-        out = self.conv_kernel(x_offset)
+        print('#02 x_offset size:', x_offset.size())
 
+        out = self.conv_kernel(x_offset)
         return out
 
     def _get_p_n(self, N, dtype):
@@ -113,8 +184,12 @@ def _get_p(self, offset, dtype):
 
         # (1, 2N, 1, 1)
         p_n = self._get_p_n(N, dtype)
+        print('p_n:', p_n)
+        print('p_n size:', p_n.size())
         # (1, 2N, h, w)
         p_0 = self._get_p_0(h, w, N, dtype)
+        print('p_0:', p_0)
+        print('p_0 size:', p_0.size())
         p = p_0 + p_n + offset
         return p
 
diff --git a/network/deform_conv/deform_conv_v2.py b/network/deform_conv/deform_conv_v2.py
@@ -27,13 +27,13 @@ def __init__(self, inc, outc, kernel_size=3, padding=1, stride=1, bias=None, mod
         self.zero_padding = nn.ZeroPad2d(padding)
         self.conv = nn.Conv2d(inc, outc, kernel_size=kernel_size, stride=kernel_size, bias=bias)
 
-        self.p_conv = nn.Conv2d(inc, 2 * kernel_size * kernel_size, kernel_size=3, padding=1, stride=stride)
+        self.p_conv = nn.Conv2d(inc, 2*kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride)
         nn.init.constant_(self.p_conv.weight, 0)
         self.p_conv.register_backward_hook(self._set_lr)
 
         self.modulation = modulation
         if modulation:
-            self.m_conv = nn.Conv2d(inc, kernel_size * kernel_size, kernel_size=3, padding=1, stride=stride)
+            self.m_conv = nn.Conv2d(inc, kernel_size*kernel_size, kernel_size=3, padding=1, stride=stride)
             nn.init.constant_(self.m_conv.weight, 0.5)
             self.m_conv.register_backward_hook(self._set_lr)
 
@@ -62,15 +62,13 @@ def forward(self, x):
         q_lt = p.detach().floor()
         q_rb = q_lt + 1
 
-        q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2) - 1), torch.clamp(q_lt[..., N:], 0, x.size(3) - 1)],
-                         dim=-1).long()
-        q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2) - 1), torch.clamp(q_rb[..., N:], 0, x.size(3) - 1)],
-                         dim=-1).long()
+        q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2)-1), torch.clamp(q_lt[..., N:], 0, x.size(3)-1)], dim=-1).long()
+        q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2)-1), torch.clamp(q_rb[..., N:], 0, x.size(3)-1)], dim=-1).long()
         q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], dim=-1)
         q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], dim=-1)
 
         # clip p
-        p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2) - 1), torch.clamp(p[..., N:], 0, x.size(3) - 1)], dim=-1)
+        p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2)-1), torch.clamp(p[..., N:], 0, x.size(3)-1)], dim=-1)
 
         # bilinear kernel (b, h, w, N)
         g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:]))
@@ -104,26 +102,26 @@ def forward(self, x):
 
     def _get_p_n(self, N, dtype):
         p_n_x, p_n_y = torch.meshgrid(
-            torch.arange(-(self.kernel_size - 1) // 2, (self.kernel_size - 1) // 2 + 1),
-            torch.arange(-(self.kernel_size - 1) // 2, (self.kernel_size - 1) // 2 + 1))
+            torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1),
+            torch.arange(-(self.kernel_size-1)//2, (self.kernel_size-1)//2+1))
         # (2N, 1)
         p_n = torch.cat([torch.flatten(p_n_x), torch.flatten(p_n_y)], 0)
-        p_n = p_n.view(1, 2 * N, 1, 1).type(dtype)
+        p_n = p_n.view(1, 2*N, 1, 1).type(dtype)
 
         return p_n
 
     def _get_p_0(self, h, w, N, dtype):
         p_0_x, p_0_y = torch.meshgrid(
-            torch.arange(1, h * self.stride + 1, self.stride),
-            torch.arange(1, w * self.stride + 1, self.stride))
+            torch.arange(1, h*self.stride+1, self.stride),
+            torch.arange(1, w*self.stride+1, self.stride))
         p_0_x = torch.flatten(p_0_x).view(1, 1, h, w).repeat(1, N, 1, 1)
         p_0_y = torch.flatten(p_0_y).view(1, 1, h, w).repeat(1, N, 1, 1)
         p_0 = torch.cat([p_0_x, p_0_y], 1).type(dtype)
 
         return p_0
 
     def _get_p(self, offset, dtype):
-        N, h, w = offset.size(1) // 2, offset.size(2), offset.size(3)
+        N, h, w = offset.size(1)//2, offset.size(2), offset.size(3)
 
         # (1, 2N, 1, 1)
         p_n = self._get_p_n(N, dtype)
@@ -140,7 +138,7 @@ def _get_x_q(self, x, q, N):
         x = x.contiguous().view(b, c, -1)
 
         # (b, h, w, N)
-        index = q[..., :N] * padded_w + q[..., N:]  # offset_x*w + offset_y
+        index = q[..., :N]*padded_w + q[..., N:]  # offset_x*w + offset_y
         # (b, c, h*w*N)
         index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1)
 
@@ -151,9 +149,8 @@ def _get_x_q(self, x, q, N):
     @staticmethod
     def _reshape_x_offset(x_offset, ks):
         b, c, h, w, N = x_offset.size()
-        x_offset = torch.cat([x_offset[..., s:s + ks].contiguous().view(b, c, h, w * ks) for s in range(0, N, ks)],
-                             dim=-1)
-        x_offset = x_offset.contiguous().view(b, c, h * ks, w * ks)
+        x_offset = torch.cat([x_offset[..., s:s+ks].contiguous().view(b, c, h, w*ks) for s in range(0, N, ks)], dim=-1)
+        x_offset = x_offset.contiguous().view(b, c, h*ks, w*ks)
 
         return x_offset