# Generate pixel coordinates from in NDC space (from [-1, 1])
def get_pixels_from_image(image_size, camera):
    W, H = image_size[0], image_size[1]

    # TODO (1.3): Generate pixel coordinates from [0, W] in x and [0, H] in y
    x = torch.arange(0,W)
    y = torch.arange(0,H)
    # pass

    # TODO (1.3): Convert to the range [-1, 1] in both x and y
    x = (2.0 * x / W) - 1.0
    y = (2.0 * y / H) - 1.0
    # pass 

    # Create grid of coordinates
    xy_grid = torch.stack(
        tuple( reversed( torch.meshgrid(y, x) ) ),
        dim=-1,
    ).view(W * H, 2)

    return -xy_grid


# Get rays from pixel values
def get_rays_from_pixels(xy_grid, image_size, camera):
    W, H = image_size[0], image_size[1]

    # TODO (1.3): Map pixels to points on the image plane at Z=1
    device = get_device()
    ndc_points = xy_grid.to(device)
    # pass

    ndc_points = torch.cat(
        [
            ndc_points,
            torch.ones_like(ndc_points[..., -1:])
        ],
        dim=-1
    )

    # TODO (1.3): Use camera.unproject to get world space points on the image plane from NDC space points
    ndc_to_world = camera.unproject_points(ndc_points, world_coordinates=True)
    # pass

    # TODO (1.3): Get ray origins from camera center
    rays_o = camera.get_camera_center() * torch.ones_like(ndc_points)
    # pass

    # TODO (1.3): Get normalized ray directions
    rays_d = torch.nn.functional.normalize(ndc_to_world - rays_o)
    # pass

    # Create and return RayBundle
    return RayBundle(
        rays_o,
        rays_d,
        torch.zeros_like(rays_o).unsqueeze(1),
        torch.zeros_like(rays_o).unsqueeze(1),
    )


# TODO (1.3): Visualize xy grid using vis_grid
    W, H = image_size[0], image_size[1]

    if cam_idx == 0 and file_prefix == '':
        # pass
        image = xy_grid.cpu().numpy().reshape(W,H,2)
        image  = (image + 1 )/ 2.0      
        image = np.absolute(image)
        image = np.concatenate((image, np.zeros((W,H,1))), axis = -1)   
        plt.figure(figsize=(10, 10))
        plt.imshow(image)
        plt.imsave('vis_grid.png',image)
        plt.axis("off")

    # TODO (1.3): Visualize rays using vis_rays
    if cam_idx == 0 and file_prefix == '':
        # pass
        rays_d = ray_bundle.directions
        image = rays_d.cpu().numpy().reshape(W,H,3)
        image = np.absolute(image)
        plt.figure(figsize=(10, 10))
        plt.imshow(image)
        plt.imsave('vis_rays.png',image)
        plt.axis("off")


# Sampler which implements stratified (uniform) point sampling along rays
class StratifiedRaysampler(torch.nn.Module):
    def __init__(
        self,
        cfg
    ):
        super().__init__()

        self.n_pts_per_ray = cfg.n_pts_per_ray
        self.min_depth = cfg.min_depth
        self.max_depth = cfg.max_depth

    def forward(
        self,
        ray_bundle,
    ):
        # TODO (1.4): Compute z values for self.n_pts_per_ray points uniformly sampled between [near, far]
        z_vals = torch.linspace(self.min_depth, self.max_depth, self.n_pts_per_ray, device = get_device())

        # TODO (1.4): Sample points from z values
        num_rays = ray_bundle.origins.shape[0]                                     # 65536
        sample_points = torch.zeros((num_rays, self.n_pts_per_ray, 3), device = get_device())  # (65536, 64, 3)

        # p = z * dir + p0
        
        z_vals = torch.ones((num_rays,1)).to(get_device()) @ z_vals.reshape(1, -1)   # torch.Size([65536, 64])
        z_vals = z_vals.unsqueeze(2)       # torch.Size([65536, 64, 1])
        
        for i in range(self.n_pts_per_ray):
            sample_points[:,i,:] = ray_bundle.directions

        sample_points = z_vals * sample_points
        for i in range(self.n_pts_per_ray):
            sample_points[:,i,:] = sample_points[:,i,:] + ray_bundle.origins

        # Return
        return ray_bundle._replace(
            sample_points=sample_points,
            sample_lengths=z_vals * torch.ones_like(sample_points[..., :1]),
        )


# TODO (1.4): Implement point sampling along rays in sampler.py
        ray_bundle_modified = model.sampler.forward(ray_bundle)
        # pass

        # TODO (1.4): Visualize sample points as point cloud
        if cam_idx == 0 and file_prefix == '':
            render_points('render_points.png', ray_bundle_modified.sample_points.view(-1,3)[None],
                          image_size=256, color=[0.7, 0.7, 1], device=device)


def _compute_weights(
        self,
        deltas,
        rays_density: torch.Tensor,
        eps: float = 1e-10
    ):
        # TODO (1.5): Compute transmittance using the equation described in the README
        # pass
        num_rays, n_sample_per_ray = deltas.shape[0], deltas.shape[1]

        T = torch.ones(num_rays, n_sample_per_ray,1).to(get_device())
        factor = torch.exp(-(deltas * rays_density))
        
        for i in range(1, n_sample_per_ray):
            T[:,i,:] = T[:,i-1,:].clone() * factor[:,i-1,:]

        # TODO (1.5): Compute weight used for rendering from transmittance and density
        weights = T * (1-factor)

        return weights


def _aggregate(
        self,
        weights: torch.Tensor,
        rays_feature: torch.Tensor
    ):
        # TODO (1.5): Aggregate (weighted sum of) features using weights
        # pass
        feature = torch.sum(weights * rays_feature, dim=1)

        return feature


def forward(
        self,
        sampler,
        implicit_fn,
        ray_bundle,
    ):
        B = ray_bundle.shape[0]

        # Process the chunks of rays.
        chunk_outputs = []

        for chunk_start in range(0, B, self._chunk_size):
            cur_ray_bundle = ray_bundle[chunk_start:chunk_start+self._chunk_size]

            # Sample points along the ray
            cur_ray_bundle = sampler(cur_ray_bundle)
            n_pts = cur_ray_bundle.sample_shape[1]

            # Call implicit function with sample points
            implicit_output = implicit_fn(cur_ray_bundle)
            density = implicit_output['density']
            feature = implicit_output['feature']

            # Compute length of each ray segment
            depth_values = cur_ray_bundle.sample_lengths[..., 0]
            deltas = torch.cat(
                (
                    depth_values[..., 1:] - depth_values[..., :-1],
                    1e10 * torch.ones_like(depth_values[..., :1]),
                ),
                dim=-1,
            )[..., None]

            # Compute aggregation weights
            weights = self._compute_weights(
                deltas.view(-1, n_pts, 1),
                density.view(-1, n_pts, 1)
            ) 

            # TODO (1.5): Render (color) features using weights
            # pass
            feature = self._aggregate(weights, feature.view(-1, n_pts, 3))

            # TODO (1.5): Render depth map
            # pass
            depth = self._aggregate(weights, depth_values.view(-1, n_pts, 1))

            # Return
            cur_out = {
                'feature': feature,
                'depth': depth,
            }

            chunk_outputs.append(cur_out)

        # Concatenate chunk outputs
        out = {
            k: torch.cat(
              [chunk_out[k] for chunk_out in chunk_outputs],
              dim=0
            ) for k in chunk_outputs[0].keys()
        }

        return out


# TODO (1.5): Implement rendering in renderer.py
out = model(ray_bundle)

# Return rendered features (colors)
image = np.array(out['feature'].view(image_size[1], image_size[0], 3).detach().cpu())
all_images.append(image)

# TODO (1.5): Visualize depth
if cam_idx == 2 and file_prefix == '':
# pass
    depth = np.array(out['depth'].view(image_size[1], image_size[0]).detach().cpu())
    plt.imsave(f'{file_prefix}_{cam_idx}_depth.png',depth, cmap = 'plasma')


def get_random_pixels_from_image(n_pixels, image_size, camera):
    xy_grid = get_pixels_from_image(image_size, camera)
    
    # TODO (2.1): Random subsampling of pixel coordinates
    x_sampled = torch.rand(n_pixels)*2.0 - 1.0
    y_sampled = torch.rand(n_pixels)*2.0 - 1.0
    xy_grid_sub = torch.stack((x_sampled,y_sampled),dim = 1).to(get_device())

    # Return
    return xy_grid_sub.reshape(-1, 2)[:n_pixels]


# TODO (2.2): Calculate loss

predicted = out['feature']
# print('out', predicted.shape)
mse_loss = torch.nn.MSELoss()
loss = mse_loss(predicted, rgb_gt)


# TODO (3.1): Implement NeRF MLP
class NeuralRadianceField(torch.nn.Module):
    def __init__(
        self,
        cfg,
    ):
        super().__init__()

        self.harmonic_embedding_xyz = HarmonicEmbedding(3, cfg.n_harmonic_functions_xyz)
        self.harmonic_embedding_dir = HarmonicEmbedding(3, cfg.n_harmonic_functions_dir)

        embedding_dim_xyz = self.harmonic_embedding_xyz.output_dim
        embedding_dim_dir = self.harmonic_embedding_dir.output_dim

        # pass
        self.cfg = cfg
       
        embedding_dim = embedding_dim_xyz                      # 39
        hidden_dim = cfg.n_hidden_neurons_xyz                  # 128

        self.in_layer = torch.nn.Linear(embedding_dim, hidden_dim)
        self.hidden = torch.nn.Linear(hidden_dim, hidden_dim)
        
        # seperate output tail
        self.out_density = torch.nn.Linear(hidden_dim, 1)
        self.out_color = torch.nn.Linear(hidden_dim, 3)

        self.relu = torch.nn.functional.relu
        self.sigmoid = torch.sigmoid

    def forward(self, ray_bundle):
        sample_points = ray_bundle.sample_points.view(-1, 3)
        embedded_xyz = self.harmonic_embedding_xyz(sample_points)
        
        x = embedded_xyz                                 # torch.Size([131072, 39])
       
        x = self.in_layer(x)                               # torch.Size([131072, 128])
        # print(f"x_shape_after 1 pass: {x.shape}")                               

        for _ in range(self.cfg.n_layers_xyz-2):
            x = self.relu(self.hidden(x))

        density = self.relu(self.out_density(x))
        color = self.sigmoid(self.out_color(x))

        out = {
            'density': density,
            'feature': color,
        }

        return out


# TODO (3.1): Calculate loss

predicted = out['feature']
mse_loss = torch.nn.MSELoss()
loss = mse_loss(predicted, rgb_gt)


class NeuralRadianceField(torch.nn.Module):
    def __init__(
        self,
        cfg,
    ):
        super().__init__()

        self.harmonic_embedding_xyz = HarmonicEmbedding(3, cfg.n_harmonic_functions_xyz)
        self.harmonic_embedding_dir = HarmonicEmbedding(3, cfg.n_harmonic_functions_dir)

        embedding_dim_xyz = self.harmonic_embedding_xyz.output_dim
        embedding_dim_dir = self.harmonic_embedding_dir.output_dim

        # pass
        self.cfg = cfg
       
        embedding_dim = embedding_dim_dir + embedding_dim_xyz       # 54
        # print(f"embedding_dim: {embedding_dim}")

        hidden_dim = cfg.n_hidden_neurons_xyz             # 128

        self.in_layer = torch.nn.Linear(embedding_dim, hidden_dim)
        self.hidden = torch.nn.Linear(hidden_dim, hidden_dim)
        
        self.out_density = torch.nn.Linear(hidden_dim, 1)
        self.out_color = torch.nn.Linear(hidden_dim, 3)

        self.relu = torch.nn.functional.relu
        self.sigmoid = torch.sigmoid

    def forward(self, ray_bundle):
        sample_points = ray_bundle.sample_points.view(-1, 3)
        embedded_xyz = self.harmonic_embedding_xyz(sample_points)
        
        directions = ray_bundle.directions.view(-1, 3)            #4.1
        embedded_dir = self.harmonic_embedding_dir(directions)    #4.1    torch.Size([1024, 15])
        # import ipdb; ipdb.set_trace()
        embedded_dir = embedded_dir.unsqueeze(1).repeat(1, 128, 1).reshape((-1,15))
        x = torch.cat((embedded_xyz, embedded_dir), dim=-1)       #4.1
        # print(f"x_shape: {x.shape}")


        x = self.in_layer(x)                               # torch.Size([131072, 128])
        # print(f"x_shape_after 1 pass: {x.shape}")                               

        for _ in range(self.cfg.n_layers_xyz-2):
            x = self.relu(self.hidden(x))

        density = self.relu(self.out_density(x))
        color = self.sigmoid(self.out_color(x))

        out = {
            'density': density,
            'feature': color,
        }

        return out

Assignment 3¶

Sayan Mondal¶

Number of late days used¶

1. Differentiable Volume Rendering¶

1.3. Ray sampling (10 points)¶

1.4. Point sampling (10 points)¶

1.5. Volume rendering (30 points)¶

2. Optimizing a basic implicit volume¶

2.1. Random ray sampling (5 points)¶

2.2. Loss and training (5 points)¶

3. Optimizing a Neural Radiance Field (NeRF) (30 points)¶

4. NeRF Extras (Choose at least one! More than one is extra credit)¶

4.1 View Dependence (10 pts)¶