点击上方,选择星标或置顶,每天给你送干货!
阅读大概需要10分钟
跟随小博主,每天进步一丢丢
作者 | 郁振波
地址 | https://zhuanlan.zhihu.com/p/7792356
编辑 | 深度学习这件小事公众号
  
  
    
   
   
     写:
    imagenametotal_.append(os.path.join('images', imagenametotal).encode())
    with h5py.File(outfile) as f:
        f.create_dataset('imagename', data=imagenametotal_)
        f['part'] = parts_
        f['S'] = Ss_
        f['image'] = cvimgs
读:
with h5py.File(outfile) as f:
    imagename = [x.decode() for x in f['imagename']]
    kp2ds = np.array(f['part'])
    kp3ds = np.array(f['S'])
    cvimgs = np.array(f['image'])
  
  
      
      
   
     
    
    
      from balanced_parallel import DataParallelModel, DataParallelCriterion
    
    
      model = DataParallelModel(model, device_ids=gpus).cuda()
    
    
      criterion = loss_fn().cuda()
   
   
       
      
   
     
    
    
      from torch.nn.parallel.scatter_gather import gather
    
    
      preds = gather(preds, 0)
   
   
       
      
   
     
    
    
      from torch.utils.data.distributed import DistributedSampler
    
    
      from torch.nn.parallel import DistributedDataParallel
    
    
      
torch.distributed.init_process_group(backend="nccl")
    
    
      # 配置每个进程的gpu
    
    
      local_rank = torch.distributed.get_rank()
    
    
      torch.cuda.set_device(local_rank)
    
    
      device = torch.device("cuda", local_rank)
    
    
      
#封装之前要把模型移到对应的gpu
    
    
      model.to(device)
    
    
      model = torch.nn.parallel.DistributedDataParallel(model,device_ids=[local_rank],
    
    
                                                         output_device=local_rank)
    
    
      
#原有的dataloader上面加一个数据sample
    
    
      train_loader = torch.utils.data.DataLoader(
    
    
              train_dataset,
    
    
              sampler=DistributedSampler(train_dataset)
    
    
          )
   
   
       
      
   
     
    
    
      cudnn.benchmark = True
    
    
      torch.backends.cudnn.deterministic = False
    
    
      torch.backends.cudnn.enabled = True
   
   
       
      
   
     
    
    
      torch.cuda.empty_cache()
   
   
       
      
   
     
    
    
      del xxx(变量名)
   
   
       
      
   
     
    
    
      def __len__(self):
    
    
          return self.images.shape[0]
   
   
       
      
   
     
    
    
      train_loader = torch.utils.data.DataLoader(
    
    
              train_dataset,
    
    
              pin_memory=True,
    
    
          )
   
   
       
   在深度学习中喂饱gpu
https://zhuanlan.zhihu.com/p/77633542