07-01 Live Session
GAP (Global Average Pooling)
import torch.nn.functional as F
x = F.adaptive_avg_pool2d(x, (1, 1))
x = torch.randn(16, 14, 14)
out = F.adaptive_max_pool2d(x.unsqueeze(0), output_size=1)
# Calculate result manually to compare results
out_manual = torch.stack([out[:, i:i+4].mean() for i in range(0, 16, 4)])
out = out.view(out.size(0), out.size(1)//4, -1)
out = out.mean(2)
print(torch.allclose(out_manual, out))
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv = nn.Sequential(
#3 224 128
nn.Conv2d(3, 64, 3, padding=1),nn.LeakyReLU(0.2),
nn.Conv2d(64, 64, 3, padding=1),nn.LeakyReLU(0.2),
nn.MaxPool2d(2, 2),
#64 112 64
nn.Conv2d(64, 128, 3, padding=1),nn.LeakyReLU(0.2),
nn.Conv2d(128, 128, 3, padding=1),nn.LeakyReLU(0.2),
nn.MaxPool2d(2, 2),
#128 56 32
nn.Conv2d(128, 256, 3, padding=1),nn.LeakyReLU(0.2),
nn.Conv2d(256, 256, 3, padding=1),nn.LeakyReLU(0.2),
nn.Conv2d(256, 256, 3, padding=1),nn.LeakyReLU(0.2),
nn.MaxPool2d(2, 2),
#256 28 16
nn.Conv2d(256, 512, 3, padding=1),nn.LeakyReLU(0.2),
nn.Conv2d(512, 512, 3, padding=1),nn.LeakyReLU(0.2),
nn.Conv2d(512, 512, 3, padding=1),nn.LeakyReLU(0.2),
nn.MaxPool2d(2, 2),
#512 14 8
nn.Conv2d(512, 512, 3, padding=1),nn.LeakyReLU(0.2),
nn.Conv2d(512, 512, 3, padding=1),nn.LeakyReLU(0.2),
nn.Conv2d(512, 512, 3, padding=1),nn.LeakyReLU(0.2),
nn.MaxPool2d(2, 2)
)
#512 7 4
self.avg_pool = nn.AvgPool2d(7)
#512 1 1
self.classifier = nn.Linear(512, 10)
"""
self.fc1 = nn.Linear(512*2*2,4096)
self.fc2 = nn.Linear(4096,4096)
self.fc3 = nn.Linear(4096,10)
"""
def forward(self, x):
#print(x.size())
features = self.conv(x)
#print(features.size())
x = self.avg_pool(features)
#print(avg_pool.size())
x = x.view(features.size(0), -1)
#print(flatten.size())
x = self.classifier(x)
#x = self.softmax(x)
return x, features
CNN History
Alexnet
VGG
Inception
Resnet
resnetμ΄νμλ κΉμ΄μ λν κ²½μμ μ€μ΄λ¬
Efficientnet
https://hoya012.github.io/blog/EfficientNet-review/
GAN
VAE
RNN
LSTM
RNNμ νκ³λ₯Ό 극볡νκ³ μ ν΄μ λμ¨ κ².
Transfer Learning
Domain Adaptation
Generalization
mlp mixer
Q & A
- λ°μ΄ν°λ₯Ό λ³΄κ³ μ μ΄νμ΅μ μ°λ©΄ μ’κ² λ€ / μ°λ©΄ μλκ² λ€ νλ νλ¨μ κΈ°μ€μ΄ μμκΉμ?
- inductive: λλ©μΈμ΄ λκ°κ³ νμ€ν¬λ§ λ¬λΌμ§λ, μ½κ² ν΄κ²° κ°λ₯
- λλ©μΈμ΄ λ¬λΌμ§λ κ²½μ° μ΄λ ΅λ€.
- fcμΈ΅μμ avg poolingμ μ¨μ 1λ 1 λμΉμ΄ μ΄λ£¨μ΄μ§λ€κ³ νμ
§λλ°, fcμΈ΅μλ linearμΈ΅μ μ°μ§ μλλ€λ κ²μΌκΉμ
- λ§μ§λ§ λ μ΄μ΄μΈ΅λ§ fcμ°κ³ κ·Έμ μ gapλ‘ μΆμΆν κ²μ 1:1λ‘ λμ.
- μμ μλ μ°μ°λμ μ€μ΄λ €κ³ μ μΌλ€λ©΄ μμ¦μ νλμ¨μ΄μ λ₯λ ₯μΉλ₯Ό λ―Ώκ³ μ°μ°λμ μ€μ΄κΈ°λ³΄λ€λ μ±λ₯μμ£Όλ‘ κ°κ³ μλ€κ³ μκ°νλ©΄ λλμ? μλλ©΄ μ§κΈλ μ°μ°λμ μ΅λν μ€μ΄λ €λ λ
Έλ ₯μ νλμ?
- deeper, lighter
http://cs231n.stanford.edu/slides/2021/lecture_4.pdf
http://cs231n.stanford.edu/slides/2021/lecture_5.pdf
http://cs231n.stanford.edu/slides/2021/lecture_6.pdf
Reference
gap: https://discuss.pytorch.org/t/tensor-global-max-pooling-and-average/38988
cam with gap: https://ctkim.tistory.com/117
CNN Development History: https://hoya012.github.io/blog/deeplearning-classification-guidebook-1/, https://hoya012.github.io/blog/deeplearning-classification-guidebook-2/, https://hoya012.github.io/blog/deeplearning-classification-guidebook-3/, https://hoya012.github.io/blog/deeplearning-classification-guidebook-4/ :http://cs231n.stanford.edu/slides/2021/lecture_5.pdf
Leave a comment