전공 공부/파이썬 기초

모델 가중치 초기화

상솜공방 2023. 10. 3. 16:29

torch.nn.init.uniform_ 등으로 각 레이어별 웨이트를 초기화할 수 있다.

import torch
import torch.nn as nn
import torch.nn.functional as F

layer = torch.nn.Conv2d(1, 1, 2) # 간단한 예시 레이어

print(layer, end = '\n\n')
print(layer.__class__.__name__, end = '\n\n') #객체 이름 출력
print(dir(layer), end = '\n\n') # 객체 어트리뷰트 모두 출력
print(hasattr(layer, 'weight'), end = '\n\n') #객체 어트리뷰트 중 'weight'라는 것이 있는가?

print(layer.weight, end = '\n\n') #레이어의 weight 확인하기
# 파이토치 내장 함수인 reset_parameters()에서 kaiming_uniform으로 초기화 하는 게 디폴트

torch.nn.init.uniform_(layer.weight.data, 0.5, 0.5) #해당 레이어의 weights를 직접 초기화하고 싶다면
print(layer.weight)

출력값:

Conv2d(1, 1, kernel_size=(2, 2), stride=(1, 1))

Conv2d

['T_destination', '__annotations__', '__call__', '__class__', '__constants__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_apply', '_backward_hooks', '_buffers', '_call_impl', '_conv_forward', '_forward_hooks', '_forward_pre_hooks', '_get_backward_hooks', '_get_name', '_is_full_backward_hook', '_load_from_state_dict', '_load_state_dict_post_hooks', '_load_state_dict_pre_hooks', '_maybe_warn_non_full_backward_hook', '_modules', '_named_members', '_non_persistent_buffers_set', '_parameters', '_register_load_state_dict_pre_hook', '_register_state_dict_hook', '_replicate_for_data_parallel', '_reversed_padding_repeated_twice', '_save_to_state_dict', '_slow_forward', '_state_dict_hooks', '_version', 'add_module', 'apply', 'bfloat16', 'bias', 'buffers', 'children', 'cpu', 'cuda', 'dilation', 'double', 'dump_patches', 'eval', 'extra_repr', 'float', 'forward', 'get_buffer', 'get_extra_state', 'get_parameter', 'get_submodule', 'groups', 'half', 'in_channels', 'ipu', 'kernel_size', 'load_state_dict', 'modules', 'named_buffers', 'named_children', 'named_modules', 'named_parameters', 'out_channels', 'output_padding', 'padding', 'padding_mode', 'parameters', 'register_backward_hook', 'register_buffer', 'register_forward_hook', 'register_forward_pre_hook', 'register_full_backward_hook', 'register_load_state_dict_post_hook', 'register_module', 'register_parameter', 'requires_grad_', 'reset_parameters', 'set_extra_state', 'share_memory', 'state_dict', 'stride', 'to', 'to_empty', 'train', 'training', 'transposed', 'type', 'weight', 'xpu', 'zero_grad']

True

Parameter containing:
tensor([[[[-0.0813, -0.3744],
          [ 0.3922,  0.1265]]]], requires_grad=True)

Parameter containing:
tensor([[[[0.5000, 0.5000],
          [0.5000, 0.5000]]]], requires_grad=True)

 

inner function 혹은 nested function을 이용하여 조건에 따라 가중치 초기화 하기.

def weights_init(init_type = 'normal', gain = 0.02):
    def init_func(m):
        if init_type == 'normal':
            nn.init.normal_(m.weight.data, 0.0, gain)
        elif init_type == 'xavier':
            nn.init.xavier_normal_(m.weight.data, gain = gain)
        elif init_type == 'uniform':
            nn.init.uniform_(layer.weight.data, 0.5, 0.5)
    return init_func
    
weights_init('xavier', gain = 0.02)(layer)
print(layer.weight)
weights_init('normal', gain = 0.02)(layer)
print(layer.weight)
weights_init('uniform', gain = 0.02)(layer)
print(layer.weight)

출력값:

Parameter containing:
tensor([[[[-0.0078, -0.0042],
          [ 0.0049,  0.0035]]]], requires_grad=True)
Parameter containing:
tensor([[[[-0.0112,  0.0418],
          [ 0.0128,  0.0040]]]], requires_grad=True)
Parameter containing:
tensor([[[[0.5000, 0.5000],
          [0.5000, 0.5000]]]], requires_grad=True)

공부 출처: https://jh-bk.tistory.com/10