strict=False 但还是size mismatch 的解决办法

2023-08-25 12:52:29 来源：博客园

(资料图片)

问题描述：

# RuntimeError: Error(s) in loading state_dict for Fusion_Generator: size mismatch for fg_decoder.0.weight: copying a param with shape torch.Size([4096, 1024])，g_decoder.0.weight: copying a param with shape torch.Size([4096, 1024]...

出现两个参数的不匹配。

具体内容如下：

model = GAN(opt)loaded = torch.load(model_path)assert (opt.epoch == loaded["epoch"])model.load_state_dict(loaded["model"], strict=False)   # 这里爆出上述Error，定位到下面的函数def load_state_dict(self, pretrained_dict, strict=False):    for k in pretrained_dict:        if k ...             ...             ...        elif k == "generator":            self.generator.load_state_dict(pretrained_dict[k], strict=strict)  # 这里虽然strict传入的是False，忽略不匹配参数，仍有上述问题        elif k ...               ...

在参考这里后，如果只是pop()掉fg_decoder.0.weight和bg_decoder.0.weight后，会有新的问题出现(一般问题通过pop掉能解决问题)，即

KeyError: "fg_decoder.0.weight，bg_decoder.0.weight"

即不能识别上述两个键值，这时可以通过打印模型参数具体内容查看：

def load_state_dict(self, pretrained_dict, strict=False):    for k in pretrained_dict:        if k ...             ...             ...        elif k == "fusion_generator":            for u in pretrained_dict[k].keys():                print(u," ",pretrained_dict[k][u])            self.fusion_generator.load_state_dict(pretrained_dict[k], strict=strict)  #        elif k ...               ...

打印结果

fg_decoder.0.weight xxxxxx tensor([0., 0., 0.,  ..., 0., 0., 0.], device="cuda:0")

fg_decoder.0.bias xxxxxx tensor([0., 0., 0.,  ..., 0., 0., 0.], device="cuda:0")                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    fg_decoder.1.weight xxxxxx tensor([1.0362, 0.9969, 0.9892,  ..., 0.9939, 1.0122, 1.0190], device="cuda:0")                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          fg_decoder.1.bias xxxxxx tensor([0., 0., 0.,  ..., 0., 0., 0.], device="cuda:0")                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    fg_decoder.1.running_mean xxxxxx tensor([ 0.1915, -0.5510,  0.5370,  ..., -0.1265,  0.8344,  1.4391],                                                                                                                                                              device="cuda:0")                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             fg_decoder.1.running_var xxxxxx tensor([0.9402, 0.7382, 0.0167,  ..., 0.3988, 0.1081, 0.4470], device="cuda:0")                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     fg_decoder.1.num_batches_tracked xxxxxx tensor(3880, device="cuda:0")                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               fg_decoder.3.weight xxxxxx tensor([[ 0.0211, -0.0072,  0.0030,  ...,  0.0090,  0.0120,  0.0043],                                                                                                                                                                    [ 0.0221, -0.0320, -0.0050,  ...,  0.0239,  0.0035,  0.0438],                                                                                                                                                                                               [ 0.0246, -0.0091,  0.0146,  ..., -0.0003,  0.0257, -0.0025],        ...,        [ 0.0077, -0.0209, -0.0017,  ...,  0.0135,  0.0418,  0.0052],        [ 0.0109,  0.0066, -0.0093,  ...,  0.0048, -0.0019, -0.0381],        [ 0.0145, -0.0165,  0.0095,  ...,  0.0252, -0.0184,  0.0178]],       device="cuda:0")....

bg_decoder.0.weight xxxxxx tensor([0., 0., 0.,  ..., 0., 0., 0.], device="cuda:0")....

可以发现fg_decoder.0.weight和bg_decoder.0.weight都在里面，并且对应为pretrained_dict[k][u]

所以！！！在有序字典中将对应报错内容删除后，就能解决size mismatch问题

def load_state_dict(self, pretrained_dict, strict=False):    for k in pretrained_dict:        if k ...             ...             ...        elif k == "fusion_generator":            for u in list(pretrained_dict[k].keys()):# (小坑)加list防止同时读写报错                if u == "fg_decoder.0.weight" or u == "bg_decoder.0.weight":                    pretrained_dict[k].pop(u)            self.fusion_generator.load_state_dict(pretrained_dict[k], strict=strict)  #        elif k ...               ...

成功解决问题~

关键词：