Implementation
1. get_pixels_from_image in ray_utils.py and
2. get_rays_from_pixels in ray_utils.py
# Generate pixel coordinates from in NDC space (from [-1, 1])
def get_pixels_from_image(image_size, camera):
W, H = image_size[0], image_size[1]
# TODO (1.3): Generate pixel coordinates from [0, W] in x and [0, H] in y
x = torch.arange(0,W)
y = torch.arange(0,H)
# pass
# TODO (1.3): Convert to the range [-1, 1] in both x and y
x = (2.0 * x / W) - 1.0
y = (2.0 * y / H) - 1.0
# pass
# Create grid of coordinates
xy_grid = torch.stack(
tuple( reversed( torch.meshgrid(y, x) ) ),
dim=-1,
).view(W * H, 2)
return -xy_grid
# Get rays from pixel values
def get_rays_from_pixels(xy_grid, image_size, camera):
W, H = image_size[0], image_size[1]
# TODO (1.3): Map pixels to points on the image plane at Z=1
device = get_device()
ndc_points = xy_grid.to(device)
# pass
ndc_points = torch.cat(
[
ndc_points,
torch.ones_like(ndc_points[..., -1:])
],
dim=-1
)
# TODO (1.3): Use camera.unproject to get world space points on the image plane from NDC space points
ndc_to_world = camera.unproject_points(ndc_points, world_coordinates=True)
# pass
# TODO (1.3): Get ray origins from camera center
rays_o = camera.get_camera_center() * torch.ones_like(ndc_points)
# pass
# TODO (1.3): Get normalized ray directions
rays_d = torch.nn.functional.normalize(ndc_to_world - rays_o)
# pass
# Create and return RayBundle
return RayBundle(
rays_o,
rays_d,
torch.zeros_like(rays_o).unsqueeze(1),
torch.zeros_like(rays_o).unsqueeze(1),
)
Visualization
Code in main.py:
# TODO (1.3): Visualize xy grid using vis_grid
W, H = image_size[0], image_size[1]
if cam_idx == 0 and file_prefix == '':
# pass
image = xy_grid.cpu().numpy().reshape(W,H,2)
image = (image + 1 )/ 2.0
image = np.absolute(image)
image = np.concatenate((image, np.zeros((W,H,1))), axis = -1)
plt.figure(figsize=(10, 10))
plt.imshow(image)
plt.imsave('vis_grid.png',image)
plt.axis("off")
# TODO (1.3): Visualize rays using vis_rays
if cam_idx == 0 and file_prefix == '':
# pass
rays_d = ray_bundle.directions
image = rays_d.cpu().numpy().reshape(W,H,3)
image = np.absolute(image)
plt.figure(figsize=(10, 10))
plt.imshow(image)
plt.imsave('vis_rays.png',image)
plt.axis("off")
xy_grid visualization:
rays visualization:
Implementation
To fill out StratifiedSampler in sampler.py.
# Sampler which implements stratified (uniform) point sampling along rays
class StratifiedRaysampler(torch.nn.Module):
def __init__(
self,
cfg
):
super().__init__()
self.n_pts_per_ray = cfg.n_pts_per_ray
self.min_depth = cfg.min_depth
self.max_depth = cfg.max_depth
def forward(
self,
ray_bundle,
):
# TODO (1.4): Compute z values for self.n_pts_per_ray points uniformly sampled between [near, far]
z_vals = torch.linspace(self.min_depth, self.max_depth, self.n_pts_per_ray, device = get_device())
# TODO (1.4): Sample points from z values
num_rays = ray_bundle.origins.shape[0] # 65536
sample_points = torch.zeros((num_rays, self.n_pts_per_ray, 3), device = get_device()) # (65536, 64, 3)
# p = z * dir + p0
z_vals = torch.ones((num_rays,1)).to(get_device()) @ z_vals.reshape(1, -1) # torch.Size([65536, 64])
z_vals = z_vals.unsqueeze(2) # torch.Size([65536, 64, 1])
for i in range(self.n_pts_per_ray):
sample_points[:,i,:] = ray_bundle.directions
sample_points = z_vals * sample_points
for i in range(self.n_pts_per_ray):
sample_points[:,i,:] = sample_points[:,i,:] + ray_bundle.origins
# Return
return ray_bundle._replace(
sample_points=sample_points,
sample_lengths=z_vals * torch.ones_like(sample_points[..., :1]),
)
Visualization
Code in main.py:
# TODO (1.4): Implement point sampling along rays in sampler.py
ray_bundle_modified = model.sampler.forward(ray_bundle)
# pass
# TODO (1.4): Visualize sample points as point cloud
if cam_idx == 0 and file_prefix == '':
render_points('render_points.png', ray_bundle_modified.sample_points.view(-1,3)[None],
image_size=256, color=[0.7, 0.7, 1], device=device)
Point samples from the first camera:
Implementation
1. VolumeRenderer._compute_weights
2. VolumeRenderer._aggregate
3. Modify the VolumeRenderer.forward method to render a depth map in addition to color from a volume.
def _compute_weights(
self,
deltas,
rays_density: torch.Tensor,
eps: float = 1e-10
):
# TODO (1.5): Compute transmittance using the equation described in the README
# pass
num_rays, n_sample_per_ray = deltas.shape[0], deltas.shape[1]
T = torch.ones(num_rays, n_sample_per_ray,1).to(get_device())
factor = torch.exp(-(deltas * rays_density))
for i in range(1, n_sample_per_ray):
T[:,i,:] = T[:,i-1,:].clone() * factor[:,i-1,:]
# TODO (1.5): Compute weight used for rendering from transmittance and density
weights = T * (1-factor)
return weights
def _aggregate(
self,
weights: torch.Tensor,
rays_feature: torch.Tensor
):
# TODO (1.5): Aggregate (weighted sum of) features using weights
# pass
feature = torch.sum(weights * rays_feature, dim=1)
return feature
def forward(
self,
sampler,
implicit_fn,
ray_bundle,
):
B = ray_bundle.shape[0]
# Process the chunks of rays.
chunk_outputs = []
for chunk_start in range(0, B, self._chunk_size):
cur_ray_bundle = ray_bundle[chunk_start:chunk_start+self._chunk_size]
# Sample points along the ray
cur_ray_bundle = sampler(cur_ray_bundle)
n_pts = cur_ray_bundle.sample_shape[1]
# Call implicit function with sample points
implicit_output = implicit_fn(cur_ray_bundle)
density = implicit_output['density']
feature = implicit_output['feature']
# Compute length of each ray segment
depth_values = cur_ray_bundle.sample_lengths[..., 0]
deltas = torch.cat(
(
depth_values[..., 1:] - depth_values[..., :-1],
1e10 * torch.ones_like(depth_values[..., :1]),
),
dim=-1,
)[..., None]
# Compute aggregation weights
weights = self._compute_weights(
deltas.view(-1, n_pts, 1),
density.view(-1, n_pts, 1)
)
# TODO (1.5): Render (color) features using weights
# pass
feature = self._aggregate(weights, feature.view(-1, n_pts, 3))
# TODO (1.5): Render depth map
# pass
depth = self._aggregate(weights, depth_values.view(-1, n_pts, 1))
# Return
cur_out = {
'feature': feature,
'depth': depth,
}
chunk_outputs.append(cur_out)
# Concatenate chunk outputs
out = {
k: torch.cat(
[chunk_out[k] for chunk_out in chunk_outputs],
dim=0
) for k in chunk_outputs[0].keys()
}
return out
Visualization
# TODO (1.5): Implement rendering in renderer.py
out = model(ray_bundle)
# Return rendered features (colors)
image = np.array(out['feature'].view(image_size[1], image_size[0], 3).detach().cpu())
all_images.append(image)
# TODO (1.5): Visualize depth
if cam_idx == 2 and file_prefix == '':
# pass
depth = np.array(out['depth'].view(image_size[1], image_size[0]).detach().cpu())
plt.imsave(f'{file_prefix}_{cam_idx}_depth.png',depth, cmap = 'plasma')
Depth Map:
Implement the get_random_pixels_from_image method in ray_utils.py
def get_random_pixels_from_image(n_pixels, image_size, camera):
xy_grid = get_pixels_from_image(image_size, camera)
# TODO (2.1): Random subsampling of pixel coordinates
x_sampled = torch.rand(n_pixels)*2.0 - 1.0
y_sampled = torch.rand(n_pixels)*2.0 - 1.0
xy_grid_sub = torch.stack((x_sampled,y_sampled),dim = 1).to(get_device())
# Return
return xy_grid_sub.reshape(-1, 2)[:n_pixels]
Replace the loss in train with mean squared error between the predicted colors and ground truth colors rgb_gt
# TODO (2.2): Calculate loss
predicted = out['feature']
# print('out', predicted.shape)
mse_loss = torch.nn.MSELoss()
loss = mse_loss(predicted, rgb_gt)
Report the center of the box, and the side lengths of the box after training, rounded to the nearest 1/100 decimal place.
Box center: [0.25, 0.25, 0.00].
Box side lengths: [2.00, 1.50, 1.50]
Visualization
Render of a spiral sequence of the optimized volume:
Implementation
An implicit volume as a Multi-Layer Perceptron (MLP) in the NeuraRadianceField class in implicit.py.
Fill out the loss in train_nerf in the main.py file.
# TODO (3.1): Implement NeRF MLP
class NeuralRadianceField(torch.nn.Module):
def __init__(
self,
cfg,
):
super().__init__()
self.harmonic_embedding_xyz = HarmonicEmbedding(3, cfg.n_harmonic_functions_xyz)
self.harmonic_embedding_dir = HarmonicEmbedding(3, cfg.n_harmonic_functions_dir)
embedding_dim_xyz = self.harmonic_embedding_xyz.output_dim
embedding_dim_dir = self.harmonic_embedding_dir.output_dim
# pass
self.cfg = cfg
embedding_dim = embedding_dim_xyz # 39
hidden_dim = cfg.n_hidden_neurons_xyz # 128
self.in_layer = torch.nn.Linear(embedding_dim, hidden_dim)
self.hidden = torch.nn.Linear(hidden_dim, hidden_dim)
# seperate output tail
self.out_density = torch.nn.Linear(hidden_dim, 1)
self.out_color = torch.nn.Linear(hidden_dim, 3)
self.relu = torch.nn.functional.relu
self.sigmoid = torch.sigmoid
def forward(self, ray_bundle):
sample_points = ray_bundle.sample_points.view(-1, 3)
embedded_xyz = self.harmonic_embedding_xyz(sample_points)
x = embedded_xyz # torch.Size([131072, 39])
x = self.in_layer(x) # torch.Size([131072, 128])
# print(f"x_shape_after 1 pass: {x.shape}")
for _ in range(self.cfg.n_layers_xyz-2):
x = self.relu(self.hidden(x))
density = self.relu(self.out_density(x))
color = self.sigmoid(self.out_color(x))
out = {
'density': density,
'feature': color,
}
return out
# TODO (3.1): Calculate loss
predicted = out['feature']
mse_loss = torch.nn.MSELoss()
loss = mse_loss(predicted, rgb_gt)
Visualization
Code of the network:
class NeuralRadianceField(torch.nn.Module):
def __init__(
self,
cfg,
):
super().__init__()
self.harmonic_embedding_xyz = HarmonicEmbedding(3, cfg.n_harmonic_functions_xyz)
self.harmonic_embedding_dir = HarmonicEmbedding(3, cfg.n_harmonic_functions_dir)
embedding_dim_xyz = self.harmonic_embedding_xyz.output_dim
embedding_dim_dir = self.harmonic_embedding_dir.output_dim
# pass
self.cfg = cfg
embedding_dim = embedding_dim_dir + embedding_dim_xyz # 54
# print(f"embedding_dim: {embedding_dim}")
hidden_dim = cfg.n_hidden_neurons_xyz # 128
self.in_layer = torch.nn.Linear(embedding_dim, hidden_dim)
self.hidden = torch.nn.Linear(hidden_dim, hidden_dim)
self.out_density = torch.nn.Linear(hidden_dim, 1)
self.out_color = torch.nn.Linear(hidden_dim, 3)
self.relu = torch.nn.functional.relu
self.sigmoid = torch.sigmoid
def forward(self, ray_bundle):
sample_points = ray_bundle.sample_points.view(-1, 3)
embedded_xyz = self.harmonic_embedding_xyz(sample_points)
directions = ray_bundle.directions.view(-1, 3) #4.1
embedded_dir = self.harmonic_embedding_dir(directions) #4.1 torch.Size([1024, 15])
# import ipdb; ipdb.set_trace()
embedded_dir = embedded_dir.unsqueeze(1).repeat(1, 128, 1).reshape((-1,15))
x = torch.cat((embedded_xyz, embedded_dir), dim=-1) #4.1
# print(f"x_shape: {x.shape}")
x = self.in_layer(x) # torch.Size([131072, 128])
# print(f"x_shape_after 1 pass: {x.shape}")
for _ in range(self.cfg.n_layers_xyz-2):
x = self.relu(self.hidden(x))
density = self.relu(self.out_density(x))
color = self.sigmoid(self.out_color(x))
out = {
'density': density,
'feature': color,
}
return out
Visualization
With added view dependence (direction of the rays), the results improved slightly visually and also the loss got improved with same training parameters and epochs. View dependence can result overfitting to certain views in the training data. However, for a volumetric rendering task, the predicted density is not view-dependent. We can therefore use seperated prediction heads for predicting density and color. For density the input includes view dependence and position of ray, and for color, the input includes only the position of ray.