부스트캠프 Ai tech/4주차
14일 - CNN, LSTM
소갱
2022. 2. 8. 19:23
class ConvolutionalNeuralNetworkClass(nn.Module):
"""
Convolutional Neural Network (CNN) Class
"""
def __init__(self,name='cnn',xdim=[1,28,28],
ksize=3,cdims=[32,64],hdims=[1024,128],ydim=10,
USE_BATCHNORM=False):
super(ConvolutionalNeuralNetworkClass,self).__init__()
self.name = name
self.xdim = xdim
self.ksize = ksize
self.cdims = cdims
self.hdims = hdims
self.ydim = ydim
self.USE_BATCHNORM = USE_BATCHNORM # 배치 정규화
# Convolutional layers
self.layers = []
prev_cdim = self.xdim[0]
for cdim in self.cdims: # for each hidden layer
self.layers.append(
nn.Conv2d(in_channels=prev_cdim,
out_channels=cdim,
kernel_size=self.ksize,
stride=(1,1),
padding=self.ksize//2))
if self.USE_BATCHNORM:
self.layers.append(nn.BatchNorm2d(cdim)) # batch-norm
self.layers.append(nn.ReLU(True)) # activation
self.layers.append(nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))) # max-pooling
self.layers.append(nn.Dropout2d(p=0.5)) # dropout
prev_cdim = cdim
# Dense layers
self.layers.append(nn.Flatten()) # 한 줄로 펴주는 작업
prev_hdim = prev_cdim*(self.xdim[1]//(2**len(self.cdims)))*(self.xdim[2]//(2**len(self.cdims))) # 폈을때의 dim을 가지고 있음
for hdim in self.hdims:
self.layers.append(nn.Linear(prev_hdim,hdim,bias=True))
self.layers.append(nn.ReLU(True)) # activation
prev_hdim = hdim
# Final layer (without activation)
self.layers.append(nn.Linear(prev_hdim,self.ydim,bias=True))
# Concatenate all layers
self.net = nn.Sequential()
for l_idx,layer in enumerate(self.layers):
layer_name = "%s_%02d"%(type(layer).__name__.lower(),l_idx)
self.net.add_module(layer_name,layer) # sequential일때 add_module한다고 생각됨, (name,모듈)
self.init_param() # initialize parameters
def init_param(self):
for m in self.modules():
if isinstance(m,nn.Conv2d): # init conv
nn.init.kaiming_normal_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m,nn.BatchNorm2d): # init BN
nn.init.constant_(m.weight,1)
nn.init.constant_(m.bias,0)
elif isinstance(m,nn.Linear): # lnit dense
nn.init.kaiming_normal_(m.weight)
nn.init.zeros_(m.bias)
def forward(self,x):
return self.net(x)
C = ConvolutionalNeuralNetworkClass(
name='cnn',xdim=[1,28,28],ksize=3,cdims=[32,64],
hdims=[32],ydim=10).to(device)
loss = nn.CrossEntropyLoss()
optm = optim.Adam(C.parameters(),lr=1e-3)
print ("Done.")
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
%matplotlib inline
%config InlineBackend.figure_format='retina'
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
---------------------------------------------------------------------------
from torchvision import datasets,transforms
mnist_train = datasets.MNIST(root='./data/',train=True,transform=transforms.ToTensor(),download=True)
mnist_test = datasets.MNIST(root='./data/',train=False,transform=transforms.ToTensor(),download=True)
BATCH_SIZE = 256
train_iter = torch.utils.data.DataLoader(mnist_train,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)
test_iter = torch.utils.data.DataLoader(mnist_test,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)
---------------------------------------------------------------------------
class RecurrentNeuralNetworkClass(nn.Module):
def __init__(self,name='rnn',xdim=28,hdim=256,ydim=10,n_layer=3):
super(RecurrentNeuralNetworkClass,self).__init__()
self.name = name
self.xdim = xdim
self.hdim = hdim
self.ydim = ydim
self.n_layer = n_layer
self.rnn = nn.LSTM(
input_size=self.xdim,hidden_size=self.hdim,num_layers=self.n_layer,batch_first=True)
self.lin = nn.Linear(self.hdim,self.ydim)
def forward(self,x):
# Set initial hidden and cell states
h0 = torch.zeros(self.n_layer,x.size(0),self.hdim).to(device) # layer 갯수, 입력, 출력
# x.size(0) - > x의 shape의 0번째 인덱스의 값을 의미한다.
# 내 생각으로는 hdim의 값이 256이라서 256개의 인덱스에 저장하지 않나 싶다
c0 = torch.zeros(self.n_layer,x.size(0),self.hdim).to(device)
# h0와 마찬가지라고 생각된다.
# RNN
rnn_out,(hn,cn) = self.rnn(x, (h0,c0))
# rnn에는 nn.LSTM이 담겨 있고 입력x 와 초기 설정 h0, c0가 들어간다고 생각된다.
print(x.shape)
# x:[N x L x Q] => rnn_out:[N x L x D]
# N = number of batches
# L = sequence length
# Q = input dim
# D = LSTM feature dimension
# Linear
out = self.lin(rnn_out[:,-1,:]).view([-1,self.ydim])
# [:,-1,:]을 할 경우 첫번째 세번째는 계속 돌면서 두번째의 마지막값만 저장하게 된다.
# out에는 각 layer를 다 돌고 마지막값만 나오면 되기 때문이라고 생각된다.
return out
R = RecurrentNeuralNetworkClass(
name='rnn',xdim=28,hdim=256,ydim=10,n_layer=2).to(device)
loss = nn.CrossEntropyLoss()
optm = optim.Adam(R.parameters(),lr=1e-3)