Drawing CAM

CAM을 그리는 코드인데 bakward함수가 없어서 내 문제 해결할 수 있을 것 같음

(test시에 gradient를 계산해야 하는 상황.. torch.no_grad를 써야 gpu메모리 문제없이 학습가능해서)

cam

 def cam(model, cam_topk=1, images=None, labels=None, truncate=False, shift=0.0):
    if images is not None:
        _ = model.forward(images)
 
    if labels is None:
        _, labels = torch.topk(
            model.pred, k=cam_topk, dim=1, largest=True, sorted=True)
        labels = labels[:, [cam_topk-1]]
    labels = labels.squeeze()
 
    last_layer = model.fc if model.last_layer == 'fc' else model.conv
    _, score_map = model.avgpool(
        model.feature_map, last_layer, truncate=truncate, shift=shift)
    cams = batched_index_select(score_map, 1, labels)
    return cams, labels

다행히 코드 길이는 굉장히 짧다.

그런데 이게 swin transformer feature map에 대해서도 효과가 있을지는 모르겠다.

일단 출력해보고 bounding box를 쳐보자 (좌표 값을 가져오자)

def cam(model, cam_topk=1, images=None, labels=None, truncate=False, shift=0.0):

model : CAM을 계산할 신경망 모델

cam_topk : CAM을 계산할 때 고려할 클래스, 몇개의 top predicted 클래스에 대해 CAM을 계산할 것인지 (default = 1)

images : 모델에 들어가는 입력 이미지.

labels : gt label. top-k 예측 클래스가 아니라, gt라벨에 대해 CAM을 구하고 싶을 때 입력 (default = None)

truncate : score map 값이 음수이면 0으로 세팅할 건지에 대한 여부 (이 경우 CAM시각화 향상, default=False)

shift : score map 값을 shift할건지에 대한 float value (default = 0.0)

if images is not None:

이미지가 주어지면

    _ = model.forward(images)

모델에 이미지를 넣어 forward pass

if labels is None:

라벨이 주어지지 않았다면

    _, labels = torch.topk(model.pred, k=cam_topk, dim=1, largest=True, sorted=True)

모델의 예측값으로 label을 세팅

    labels = labels[:, [cam_topk-1]]

cam_topk에 따라 top-k label을 추출

labels = labels.squeeze()

dimension 축소

last_layer = model.fc if model.last_layer == 'fc' else model.conv

마지막 fc층을 다옴

_, score_map = model.avgpool(model.feature_map, last_layer, truncate=truncate, shift=shift)

feature map을 avgpooling하여 score map을 얻음

cams = batched_index_select(score_map, 1, labels)

score map을 인덱싱하여 cam을 얻는다.

결국 이 score map을 얻는게 관건...

score map을 계산하는 모듈은 avgpool 에 있다

ThresholdedAvgPool2D

 class ThresholdedAvgPool2d(nn.Module):
    def __init__(self, threshold=0.0):
        super(ThresholdedAvgPool2d, self).__init__()
        self.threshold = threshold
 
    def forward(self, feature_map, layer, truncate=False, shift=0.0, bias=True):
        # threshold feature map
        batch_size, channel, height, width = feature_map.shape
        max_vals, _ = torch.max(feature_map.view(batch_size, channel, -1), dim=2)
        thr_vals = (max_vals * self.threshold).view(batch_size, channel, 1, 1).expand_as(feature_map)
        thr_feature_map = torch.where(
            torch.gt(feature_map, thr_vals), feature_map, torch.zeros_like(feature_map))
 
        # divided by the number of positives
        num_positives = torch.sum(torch.gt(thr_feature_map, 0.), dim=(2,3))
        num_positives = torch.where(torch.eq(num_positives, 0),
                                    torch.ones_like(num_positives),
                                    num_positives).view(batch_size, channel, 1, 1).expand_as(feature_map)
        avg_feature_map = torch.div(thr_feature_map, num_positives.float())
 
        # convolve
        #weight = layer.weight + compute_shift(shift, layer.weight)
        weight = layer.weight
 
        if truncate:
            weight = torch.where(torch.gt(layer.weight, 0.),
                                 layer.weight, torch.zeros_like(layer.weight))
 
        if len(weight.shape) < 4:
            weight = weight.unsqueeze(-1).unsqueeze(-1)
 
        avgpooled_map = nn.functional.conv2d(
            avg_feature_map, weight=weight, bias=None)
        pred = torch.sum(avgpooled_map, dim=(2,3))
        score_map = nn.functional.conv2d(
            feature_map, weight=weight, bias=None)
        if bias:
            pred = pred + layer.bias.view(1, -1)
 
        return pred, score_map

forward 부분만 보면 될듯

    def forward(self, feature_map, layer, truncate=False, shift=0.0, bias=True):

feature_map : input feature map, 특정 층의 활성화를 표현하는 텐서, 주로 최종 컨볼루션층 중 하나

layer : 컨볼루션 연산에 사용될 가중치 (주로 fc나 컨볼루션 층)

truncate : 위와 같음

shift : 위와 같음

bias : 최종 예측에서 레이어에 bias를 추가할건지 여부 (default : True)

         batch_size, channel, height, width = feature_map.shape
        max_vals, _ = torch.max(feature_map.view(batch_size, channel, -1), dim=2)
        thr_vals = (max_vals * self.threshold).view(batch_size, channel, 1, 1).expand_as(feature_map)

feature map의 최대값에 따라 threshold 값을 계산한다.

최댓값에 self.threshold(비율)만큼 곱한 값을 feature map 차원에 맞게 확장

         thr_feature_map = torch.where(
            torch.gt(feature_map, thr_vals), feature_map, torch.zeros_like(feature_map))

이제 위에서 계산한 threshold를 적용한 feature map을 생성

        num_positives = torch.sum(torch.gt(thr_feature_map, 0.), dim=(2,3))

threshold를 적용한 feature map에서 양수값의 수를 얻는다.

         num_positives = torch.where(torch.eq(num_positives, 0),
                                    torch.ones_like(num_positives),
                                    num_positives).view(batch_size, channel, 1, 1).expand_as(feature_map)
        avg_feature_map = torch.div(thr_feature_map, num_positives.float())

threshold를 적용한 feature map을 양수의 개수로 나눠 avrage feature map을 계산한다

        weight = layer.weight
 
       if truncate:
           weight = torch.where(torch.gt(layer.weight, 0.),
                                layer.weight, torch.zeros_like(layer.weight))
 
       if len(weight.shape) < 4:
           weight = weight.unsqueeze(-1).unsqueeze(-1)

truncat, shift파라미터에 따라 가중치를 수정

         avgpooled_map = nn.functional.conv2d(
            avg_feature_map, weight=weight, bias=None)

수정된 가중치를 이용하여 average feature map에 대한 컨볼루션 적용

       pred = torch.sum(avgpooled_map, dim=(2,3))

average pooled map에 대해 sum을 적용하여 예측을 계산한다.

         score_map = nn.functional.conv2d(
            feature_map, weight=weight, bias=None)

수정된 가중치로 기존의 feature map에 대해 convoluion 연산 적용

nn.functional.conv2d

그럼 이 conv2d는 기존의 nn.conv2d와 뭐가 다를까?

가장 큰 차이는 weight다.

nn.functional.conv2d은 입력으로 input, weight를 받거나, 다른 filter을 사용하여 weight를 따로 만들어 다른 weight를 적용함

그러니까 nn.conv2d는 weight를 직접사용해주지 않는데, nn.functional.conv2d는 weight를 직접 선언 (즉, 외부에서 만든 filter를 사용)

저작자표시

내 블로그 - 관리자 홈 전환	`Q` `Q`
새 글 쓰기	`W` `W`

글 수정 (권한 있는 경우)	`E` `E`
댓글 영역으로 이동	`C` `C`

이 페이지의 URL 복사	`S` `S`
맨 위로 이동	`T` `T`
티스토리 홈 이동	`H` `H`
단축키 안내	`Shift` + `/` `⇧` + `/`

Drawing CAM

티스토리툴바

개인정보

단축키

내 블로그

블로그 게시글

모든 영역

	def cam(model, cam_topk=1, images=None, labels=None, truncate=False, shift=0.0):
	if images is not None:
	_ = model.forward(images)

	if labels is None:
	_, labels = torch.topk(
	model.pred, k=cam_topk, dim=1, largest=True, sorted=True)
	labels = labels[:, [cam_topk-1]]
	labels = labels.squeeze()

	last_layer = model.fc if model.last_layer == 'fc' else model.conv
	_, score_map = model.avgpool(
	model.feature_map, last_layer, truncate=truncate, shift=shift)
	cams = batched_index_select(score_map, 1, labels)
	return cams, labels

	class ThresholdedAvgPool2d(nn.Module):
	def __init__(self, threshold=0.0):
	super(ThresholdedAvgPool2d, self).__init__()
	self.threshold = threshold

	def forward(self, feature_map, layer, truncate=False, shift=0.0, bias=True):
	# threshold feature map
	batch_size, channel, height, width = feature_map.shape
	max_vals, _ = torch.max(feature_map.view(batch_size, channel, -1), dim=2)
	thr_vals = (max_vals * self.threshold).view(batch_size, channel, 1, 1).expand_as(feature_map)
	thr_feature_map = torch.where(
	torch.gt(feature_map, thr_vals), feature_map, torch.zeros_like(feature_map))

	# divided by the number of positives
	num_positives = torch.sum(torch.gt(thr_feature_map, 0.), dim=(2,3))
	num_positives = torch.where(torch.eq(num_positives, 0),
	torch.ones_like(num_positives),
	num_positives).view(batch_size, channel, 1, 1).expand_as(feature_map)
	avg_feature_map = torch.div(thr_feature_map, num_positives.float())

	# convolve
	#weight = layer.weight + compute_shift(shift, layer.weight)
	weight = layer.weight

	if truncate:
	weight = torch.where(torch.gt(layer.weight, 0.),
	layer.weight, torch.zeros_like(layer.weight))

	if len(weight.shape) < 4:
	weight = weight.unsqueeze(-1).unsqueeze(-1)

	avgpooled_map = nn.functional.conv2d(
	avg_feature_map, weight=weight, bias=None)
	pred = torch.sum(avgpooled_map, dim=(2,3))
	score_map = nn.functional.conv2d(
	feature_map, weight=weight, bias=None)
	if bias:
	pred = pred + layer.bias.view(1, -1)

	return pred, score_map