self.decoder = nn.Sequential(
                nn.Linear(512,512),
                nn.ReLU(True),
                nn.Unflatten(1, torch.Size([1, 8, 8, 8])),
                # torch.Size([10, 512]) -> torch.Size([b, 1, 8, 8, 8])

                nn.ConvTranspose3d(in_channels=1, out_channels=4, kernel_size=3, stride=1),
                nn.ReLU(True),   
                # torch.Size([b, 1, 8, 8, 8]) -> torch.Size([b, 4, 10, 10, 10])

                nn.ConvTranspose3d(in_channels=4, out_channels=8, kernel_size=3, stride=1),
                nn.ReLU(True),
                # torch.Size([b, 4, 10, 10, 10]) -> torch.Size([b, 8, 12, 12, 12])

                nn.ConvTranspose3d(in_channels=8, out_channels=16, kernel_size=5, stride=1),
                nn.ReLU(True),
                # torch.Size([b, 8, 12, 12, 12]) -> torch.Size([b, 16, 16, 16, 16])
    
                nn.ConvTranspose3d(in_channels=16, out_channels=8, kernel_size=7, stride=1),
                nn.ReLU(True),
                # torch.Size([b, 16, 16, 16, 16]) -> torch.Size([b, 8, 22, 22, 22])

                nn.ConvTranspose3d(in_channels=8, out_channels=4, kernel_size=9, stride=1),
                nn.ReLU(True),
                # torch.Size([b, 8, 22, 22, 22]) -> torch.Size([b, 4, 30, 30, 30])

                nn.ConvTranspose3d(in_channels=4, out_channels=1, kernel_size=3, stride=1)
                # torch.Size([b, 4, 30, 30, 30]) -> torch.Size([b, 1, 32, 32, 32])
            )


self.decoder =  nn.Sequential(
                nn.Linear(512, 512),
                nn.ReLU(True),
                nn.Unflatten(dim=1, unflattened_size= (512, 1)),
                # torch.Size([b, 512])  ->  torch.Size([b, 512, 1])

                nn.Conv1d(in_channels= 512, out_channels= 1024, kernel_size=1),
                nn.BatchNorm1d(num_features=1024),
                nn.ReLU(True),
                # torch.Size([b, 512, 1]) ->  torch.Size([b, 1024, 1])

                nn.Conv1d(in_channels= 1024, out_channels= 2048, kernel_size=1),
                nn.BatchNorm1d(num_features=2048),
                nn.ReLU(True),
                # torch.Size([b, 1024, 1]) ->  torch.Size([b, 2048, 1]) 

                nn.Conv1d(in_channels= 2048, out_channels= self.n_point*3, kernel_size=1),
                # torch.Size([b, 2048, 1]) ->  torch.Size([b, self.n_point*3, 1])

            )


self.decoder =  nn.Sequential(
                nn.Linear(in_features=512, out_features=1024),
                nn.ReLU(True),
                # torch.Size([b, 512]) ->  torch.Size([b, 1024])

                nn.Linear(in_features=1024, out_features= 2048),
                nn.ReLU(True),
                 # torch.Size([b, 1024]) ->  torch.Size([b, 2048])

                nn.Linear(in_features=2048, out_features= 4096),
                nn.ReLU(True),
                 # torch.Size([b, 2048]) ->  torch.Size([b, 4096])

                nn.Linear(in_features=4096, out_features= mesh_pred.verts_packed().shape[0] * 3),
                 # torch.Size([b, 4096]) ->  torch.Size([b, mesh_pred.verts_packed().shape[0] * 3])
            )

16-825 Assignment 2: Single View to 3D¶

Sayan Mondal¶

1. Exploring loss functions¶

1.1. Fitting a voxel grid (5 points)¶

1.2. Fitting a point cloud (5 points)¶

1.3. Fitting a mesh (5 points)¶

2. Reconstructing 3D from single view¶

2.1. Image to voxel grid (20 points)¶

2.2. Image to point cloud (20 points)¶

2.3. Image to mesh (20 points)¶

2.4. Quantitative comparisions(10 points)¶

2.5. Analyse effects of hyperparms variations (10 points)¶

2.6. Interpret your model (15 points)¶