ResNet-18 모델을 구현하기 위해 단순화된 코드이다.
해당 코드와 설명은 아래의 게시물을 참고하였다.
ResNet-18 Implementation Code¶
code & description reference : https://yhkim4504.tistory.com/3
In [23]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from torch import nn
from torch import Tensor
from PIL import Image
from torchvision.transforms import Compose, Resize, ToTensor
from einops import rearrange, reduce, repeat
from einops.layers.torch import Rearrange, Reduce
from torchsummary import summary
from typing import Any, Callable, List, Optional, Type
In [6]:
def conv3x3(in_planes : int, out_planes : int, stride : int=1, groups : int =1, dilation : int=1) -> nn.Conv2d:
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation,
groups=groups, bias=False, dilation =dilation)
def conv1x1(in_planes : int, out_planes : int, stride : int=1) -> nn.Conv2d:
"1x1 convolution"
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride = stride, bias=False)
- in_planes : 입력 필터 개수
- out_planes : 출력 필터 개수
- groups : input, output의 connection 제어, default : 1=> 모든 입력이 모든 출력과 convolution 연산
- dilation : 커널 원소 간의 거리(늘릴수록 같은 파라미터수로 더 넓은 범위를 파악), deafult= 1
Residual Block¶
- init
- 처음에 normalizaiton layer가 없는 경우, nn.BatcnNorm2d로 지정
- conv1, bn1, relu, conv2, bn2순서로 forward에 필요한 layer를 정의
- downsample은 forward 시 residual을 구현할 경우 (f(x)+x), f(x)와 x의 텐서사이즈가 다른 경우 사용
- forward
- identity 입력 변수에 입력텐서 x를 저장
- 신경망을 거친 뒤 out+identity 한 결과에 relu를 거침
- downsampling이 필요한 경우 down sample
- downsample layer는 Resnet class에서 정의하여 입력
In [95]:
class BasicBlock(nn.Module):
def __init__(
self,
inplanes:int,
planes:int,
stride:int=1,
downsample: Optional[nn.Module]=None,
groups:int=1,
dilation:int=1,
norm_layer: Optional[Callable[..., nn.Module]]=None
)-> None:
super(BasicBlock,self).__init__()
#Normalization layer
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self.conv1= conv3x3(inplanes, planes, stride)
self.bn1=norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2= norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x:Tensor)-> Tensor:
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
#downsampling이 필요한 경우 downsample 레이어를 block에 인자로 넣어주어야 함
if self.downsample is not None:
identity = self.downsample(x)
out += identity #residual connection
out = self.relu(out)
return out
- Optional : None이 허용되는 함수의 매개변수에 대한 타입을 명시할 때 사용
Callable : 함수에 대한 타입 어노테이션
ex) Callable[[str], str] : str타입의 인자를 받고, 결과값을 str로 반환
Bottleneck¶
In [96]:
class Bottleneck(nn.Module):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion: int = 4
def __init__(
self,
inplanes: int,
planes: int,
stride: int = 1,
downsample: Optional[nn.Module] = None,
groups: int = 1,
base_width: int = 64,
dilation: int = 1,
norm_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.0)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x: Tensor) -> Tensor:
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
ResNet class¶
In [97]:
class ResNet(nn.Module):
def __init__(
self,
block:Type[Union[BasicBlock, Bottleneck]],
layers:List[int],
num_classes : int=1000,
zero_init_residual : bool=False,
norm_layer: Optional[Callable[..., nn.Module]]=None
)-> None:
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer #batch norm layer
self.inplanes = 64 #input shape
self.dilation = 1
self.groups = 1
#input block
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride =2, padding =3, bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace = True)
self.maxpool = nn.MaxPool2d(kernel_size = 3, stride= 2, padding=1)
#residual blocks
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=False)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=False)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=False)
self.avgpool = nn.AdaptiveAvgPool2d((1,1))
self.fc = nn.Linear(512,num_classes)
#weight initalizaiton
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity = 'relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# zero-initialize the last BN in each residual branch
# so that the residual branch starts with zero, and each residual block behaves like an identity
# Ths improves the model by 0.2~0.3%
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block:Type[Union[BasicBlock, Bottleneck]],
planes:int, blocks:int, stride: int=1, dilate:bool=False)->nn.Sequential:
norm_layer = self._norm_layer
downsample = None
#downsampling 필요한 경우 downsample layer 생성
if stride !=1 or self.inplanes != planes:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes, stride),
norm_layer(planes),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups, self.dilation, norm_layer))
self.inplanes = planes
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups, dilation = self.dilation,
norm_layer = norm_layer))
return nn.Sequential(*layers)
def forward(self, x:Tensor) -> Tensor:
print('input shape:', x.shape)
x = self.conv1(x)
print('conv1 shape:', x.shape)
x = self.bn1(x)
print('bn1 shape:', x.shape)
x = self.relu(x)
print('relu shape:', x.shape)
x = self.maxpool(x)
print('maxpool shape:', x.shape)
x = self.layer1(x)
print('layer1 shape:', x.shape)
x = self.layer2(x)
print('layer2 shape:', x.shape)
x = self.layer3(x)
print('layer3 shape:', x.shape)
x = self.layer4(x)
print('layer4 shape:', x.shape)
x = self.avgpool(x)
print('avgpool shape:', x.shape)
x = torch.flatten(x, 1)
print('flatten shape:', x.shape)
x = self.fc(x)
print('fc shape:', x.shape)
return x
init
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer #batch norm layer
self.inplanes = 64 #input shape
self.dilation = 1
self.groups = 1
#input block
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride =2, padding =3, bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace = True)
self.maxpool = nn.MaxPool2d(kernel_size = 3, stride= 2, padding=1)
- normalization layer가 없다면 생성
- inplanes, dilation, groups는 각각 64,1,1로 고정
#residual blocks
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=False)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=False)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=False)
self.avgpool = nn.AdaptiveAvgPool2d((1,1))
self.fc = nn.Linear(512,num_classes)
- Residual block들을 쌓는다. 필터의 개수는 각 block들을 거치면서 2배씩 늘어나게 됨 (64->128->256->512)
- 모든 block들을 거친 후에는 Adaptive AgvPool2d를 적용하여 (n,512,1,1)의 텐서로 만듦
- fc layer 연결
_make_layer
def _make_layer(self, block: Type[Union[BasicBlock, Bottleneck]], planes: int, blocks: int,
stride: int = 1, dilate: bool = False) -> nn.Sequential:
norm_layer = self._norm_layer
downsample = None
# downsampling 필요할경우 downsample layer 생성
if stride != 1 or self.inplanes != planes:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes, stride),
norm_layer(planes),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.dilation, norm_layer))
self.inplanes = planes
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
- Residual block 생성
- block : BasicBlock 구조 사용
- planes : input shape
- bloks : layer 반복해서 쌓는 개수
- stride, dilate : default value
- stride가 1이 아니라서 줄어들거나, self.inplanes와 plnaes의 크기가 맞지 않을 때 downsampling
- 처음에 한번 따로 쌓아주는 이유는 첫 block을 쌓고 self.inplanes와 planes를 맞추기 위함
Summary¶
In [100]:
model = ResNet(BasicBlock, [2,2,2,2])
model
Out[100]:
ResNet( (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) (layer1): Sequential( (0): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) (1): BasicBlock( (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer2): Sequential( (0): BasicBlock( (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer3): Sequential( (0): BasicBlock( (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (layer4): Sequential( (0): BasicBlock( (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (downsample): Sequential( (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): BasicBlock( (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (avgpool): AdaptiveAvgPool2d(output_size=(1, 1)) (fc): Linear(in_features=512, out_features=1000, bias=True) )
In [101]:
x = torch.randn(1,3,112,112)
model(x).shape
input shape: torch.Size([1, 3, 112, 112]) conv1 shape: torch.Size([1, 64, 56, 56]) bn1 shape: torch.Size([1, 64, 56, 56]) relu shape: torch.Size([1, 64, 56, 56]) maxpool shape: torch.Size([1, 64, 28, 28]) layer1 shape: torch.Size([1, 64, 28, 28]) layer2 shape: torch.Size([1, 128, 14, 14]) layer3 shape: torch.Size([1, 256, 7, 7]) layer4 shape: torch.Size([1, 512, 4, 4]) avgpool shape: torch.Size([1, 512, 1, 1]) flatten shape: torch.Size([1, 512]) fc shape: torch.Size([1, 1000])
Out[101]:
torch.Size([1, 1000])