Compare commits

..

2 Commits

Author SHA1 Message Date
Anton Ljungdahl
3e41274f45 diffuse skylight 2025-04-25 23:47:57 +02:00
Anton Ljungdahl
10aa24acbe antialiasing with proper pointer passing of local rand state 2025-04-25 23:28:59 +02:00
2 changed files with 145 additions and 54 deletions

View File

@ -42,8 +42,10 @@ typedef float F32;
#define CURAND_SEED 1984 #define CURAND_SEED 1984
#define MAX_RANDOM_UNIT_VECTOR_ITERATIONS 64
#define MAX_NUM_ENTITIES 64 #define MAX_NUM_ENTITIES 64
#define SAMPLES_PER_PIXEL 32 #define SAMPLES_PER_PIXEL 64
#define MAX_DIFFUSE_DEPTH 8
//------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------
//~ structs //~ structs
@ -242,6 +244,66 @@ __device__ function F32 surrounds_RngF32(RngF32 rng, F32 val)
//} //}
// //
__device__ function Vec3F32
rand_uniform_V3F32(curandState *local_rand_state)
{
Vec3F32 out = {0};
out.x = curand_uniform(local_rand_state);
out.y = curand_uniform(local_rand_state);
out.z = curand_uniform(local_rand_state);
return out;
}
__device__ function Vec3F32
rand_uniform_rng_V3F32(RngF32 rng, curandState *local_rand_state)
{
Vec3F32 out = {0};
out.x = rng.min + (rng.max-rng.min) * curand_uniform(local_rand_state);
out.y = rng.min + (rng.max-rng.min) * curand_uniform(local_rand_state);
out.z = rng.min + (rng.max-rng.min) * curand_uniform(local_rand_state);
return out;
}
__device__ function Vec3F32
rand_unit_vector_on_sphere_F32(curandState *local_rand_state)
{
Vec3F32 out = {0};
RngF32 range = {-1.0f, 1.0f}; // Cube bounding the unit sphere
F32 inner_bound = 1e-8f; // Don't want too small vectors
for(U32 i = 0; i < MAX_RANDOM_UNIT_VECTOR_ITERATIONS; i += 1)
{
out = rand_uniform_rng_V3F32(range, local_rand_state);
F32 normsqrd = dot_V3F32(out, out);
if(inner_bound < normsqrd && normsqrd <= 1.0f)
{
F32 norm = __fsqrt_rn(normsqrd);
out = scale_V3F32(1.0f/norm, out);
break;
}
}
return out;
}
__device__ function Vec3F32
rand_unit_vector_on_hemisphere_F32(curandState *local_rand_state, Vec3F32 normal)
{
Vec3F32 out = {0};
Vec3F32 vec_on_unit_sphere = rand_unit_vector_on_sphere_F32(local_rand_state);
if(dot_V3F32(vec_on_unit_sphere, normal) > 0.0f)
{
// same hemisphere
out = vec_on_unit_sphere;
}
else
{
out = scale_V3F32(-1.0f, vec_on_unit_sphere);
}
return out;
}
__host__ function void write_buffer_to_ppm(Vec3F32 *buffer, __host__ function void write_buffer_to_ppm(Vec3F32 *buffer,
U32 image_width, U32 image_width,
U32 image_height) U32 image_height)
@ -388,7 +450,7 @@ hit_sphere(Vec3F32 center, F32 radius, RayF32 ray, RngF32 range)
} }
__device__ function RayF32 __device__ function RayF32
ray_get_F32(F32 x, F32 y, Vec3F32 cam_center, curandState local_rand_state) ray_get_F32(F32 x, F32 y, Vec3F32 cam_center, curandState *local_rand_state)
{ {
RayF32 out = {0}; RayF32 out = {0};
@ -399,8 +461,8 @@ ray_get_F32(F32 x, F32 y, Vec3F32 cam_center, curandState local_rand_state)
Vec3F32 pixel_center = add_V3F32(viewport.pixel_origin, add_V3F32(px_u, px_v)); Vec3F32 pixel_center = add_V3F32(viewport.pixel_origin, add_V3F32(px_u, px_v));
// To get anti-aliasing we make a random offset from the pixel center // To get anti-aliasing we make a random offset from the pixel center
F32 rand_u = curand_uniform(&local_rand_state) - 0.5f; F32 rand_u = curand_uniform(local_rand_state) - 0.5f;
F32 rand_v = curand_uniform(&local_rand_state) - 0.5f; F32 rand_v = curand_uniform(local_rand_state) - 0.5f;
// the rand u and rand v are offsets from a pixel in the [-0.5, 0.5] square. // the rand u and rand v are offsets from a pixel in the [-0.5, 0.5] square.
// We need to put that into the world space of our viewport // We need to put that into the world space of our viewport
Vec3F32 offset_u = scale_V3F32(rand_u, viewport.pixel_delta_u); Vec3F32 offset_u = scale_V3F32(rand_u, viewport.pixel_delta_u);
@ -419,58 +481,81 @@ ray_get_F32(F32 x, F32 y, Vec3F32 cam_center, curandState local_rand_state)
// Trace a ray and get a pixel color sample // Trace a ray and get a pixel color sample
__device__ function Vec3F32 __device__ function Vec3F32
get_sample_color(RayF32 ray, Entity *entities) get_sample_color(RayF32 ray, Entity *entities, curandState *local_rand_state)
{ {
RayF32 current_ray = ray;
Vec3F32 out = {0}; Vec3F32 out = {0};
RngF32 hit_range = {F32_MIN, F32_MAX}; F32 current_attenuation = 1.0f;
HitRecord hit_rec = {0}; F32 attenuation_factor = 0.5f;
for(U32 entity_idx = 0; entity_idx < MAX_NUM_ENTITIES; entity_idx += 1)
{
Entity *entity = &entities[entity_idx];
switch(entity->kind)
{
case EntityKind_Nil:
{
// no op
} break;
case EntityKind_Sphere:
{
HitRecord temp_hit_rec = hit_sphere(entity->center, entity->radius,
ray, hit_range);
if(temp_hit_rec.hit)
{
hit_rec = temp_hit_rec;
hit_range.max = hit_rec.t;
}
} break;
} // end switch entity kind
}
Vec3F32 sample_pixel_color = vec3F32(0.0f, 0.0f, 0.0f); Vec3F32 sample_pixel_color = vec3F32(0.0f, 0.0f, 0.0f);
if(hit_rec.hit) for(U32 bounce_idx = 0;
bounce_idx < MAX_DIFFUSE_DEPTH;
bounce_idx += 1)
{ {
// Paint entity
sample_pixel_color = add_V3F32(hit_rec.normal, vec3F32(1.0f, 1.0f, 1.0f));
sample_pixel_color = scale_V3F32(0.5f, sample_pixel_color);
// debug
//sample_pixel_color = vec3F32(1.0f, 0.0f, 0.0f);
}
else
{
// Paint background gradient
F32 norm = norm_V3F32(ray.direction);
Vec3F32 unit_dir = scale_V3F32(1.0f/norm, ray.direction);
Vec3F32 white = vec3F32(1.0f, 1.0f, 1.0f);
Vec3F32 light_blue = vec3F32(0.5f, 0.7f, 1.0f);
// Lerp between white and light blue depending on y position RngF32 hit_range = {0.001f, F32_MAX};
F32 blend = 0.5f*(unit_dir.y + 1.0f); HitRecord hit_rec = {0};
for(U32 entity_idx = 0; entity_idx < MAX_NUM_ENTITIES; entity_idx += 1)
{
Entity *entity = &entities[entity_idx];
switch(entity->kind)
{
case EntityKind_Nil:
{
// no op
} break;
case EntityKind_Sphere:
{
HitRecord temp_hit_rec = hit_sphere(entity->center, entity->radius,
current_ray, hit_range);
if(temp_hit_rec.hit)
{
hit_rec = temp_hit_rec;
hit_range.max = hit_rec.t;
}
} break;
} // end switch entity kind
}
if(hit_rec.hit)
{
// "Paint entity"
// For a diffuse color we actually just update the attenuation here and
// bounce rays around... Then when we are not hitting anything anymore we will sample
// the background gradient and use the computed attenuation. Since the rays are
// bouncing diffusely this will shade nicely.
Vec3F32 rand_dir = rand_unit_vector_on_hemisphere_F32(local_rand_state, hit_rec.normal);
current_attenuation = current_attenuation * attenuation_factor;
current_ray.origin = hit_rec.point;
current_ray.direction = rand_dir;
//sample_pixel_color = add_V3F32(hit_rec.normal, vec3F32(1.0f, 1.0f, 1.0f));
//sample_pixel_color = scale_V3F32(0.5f, sample_pixel_color);
// debug
//sample_pixel_color = vec3F32(1.0f, 0.0f, 0.0f);
}
else
{
// Paint background gradient
F32 norm = norm_V3F32(ray.direction);
Vec3F32 unit_dir = scale_V3F32(1.0f/norm, ray.direction);
Vec3F32 white = vec3F32(1.0f, 1.0f, 1.0f);
Vec3F32 light_blue = vec3F32(0.5f, 0.7f, 1.0f);
// Lerp between white and light blue depending on y position
F32 blend = 0.5f*(unit_dir.y + 1.0f);
sample_pixel_color = lerp_V3F32(blend, white, light_blue);
// Scale by the current attenuation for diffuse shading using background color
sample_pixel_color = scale_V3F32(current_attenuation, sample_pixel_color);
break;
}
sample_pixel_color = lerp_V3F32(blend, white, light_blue);
} }
out = sample_pixel_color; out = sample_pixel_color;
@ -488,6 +573,11 @@ cuda_main(Entity *entities, Vec3F32 *pixelbuffer, curandState *rand_state)
if(x < image.width && y < image.height) if(x < image.width && y < image.height)
{ {
// NOTE! We need to pass this as a pointer to subsequent usage functions, in order
// to update the random state on this thread, after each call to a distribution function.
curandState local_rand_state = rand_state[idx];
// We are adding all samples and then dividing by num samples to get the mean, so // We are adding all samples and then dividing by num samples to get the mean, so
// we initialise the color for this pixel to black. // we initialise the color for this pixel to black.
// Loop over all pixel samples // Loop over all pixel samples
@ -498,9 +588,9 @@ cuda_main(Entity *entities, Vec3F32 *pixelbuffer, curandState *rand_state)
// TODO(anton): Maybe we can randomise things directly here as the // TODO(anton): Maybe we can randomise things directly here as the
// nvidia accelerated version, where we just put the x, y indices with a // nvidia accelerated version, where we just put the x, y indices with a
// randomised shift and normalise to viewport space by dividing by max x, max y // randomised shift and normalise to viewport space by dividing by max x, max y
RayF32 ray = ray_get_F32((F32)x, (F32)y, camera.center, rand_state[idx]); RayF32 ray = ray_get_F32((F32)x, (F32)y, camera.center, &local_rand_state);
Vec3F32 sample_pixel_color = get_sample_color(ray, entities); Vec3F32 sample_pixel_color = get_sample_color(ray, entities, &local_rand_state);
F32 debug_sample = curand_uniform(&rand_state[idx]); F32 debug_sample = curand_uniform(&rand_state[idx]);
Vec3F32 debug = vec3F32(debug_sample, debug_sample, debug_sample); Vec3F32 debug = vec3F32(debug_sample, debug_sample, debug_sample);
@ -510,7 +600,8 @@ cuda_main(Entity *entities, Vec3F32 *pixelbuffer, curandState *rand_state)
pixel_color = scale_V3F32(1.0f/(F32)SAMPLES_PER_PIXEL, pixel_color); pixel_color = scale_V3F32(1.0f/(F32)SAMPLES_PER_PIXEL, pixel_color);
RngF32 clamp_range = {0.0f, 1.0f}; RngF32 clamp_range = {0.0f, 1.0f};
pixelbuffer[idx] = clamp_V3F32(clamp_range, pixel_color); //pixel_color = clamp_V3F32(clamp_range, pixel_color);
pixelbuffer[idx] = pixel_color;
} }
} }
@ -583,7 +674,7 @@ int main()
// pixel_origin = upper_left + 0.5 * (delta u + delta v) // pixel_origin = upper_left + 0.5 * (delta u + delta v)
Vec3F32 pixel_delta_sum = add_V3F32(h_viewport.pixel_delta_u, h_viewport.pixel_delta_v); Vec3F32 pixel_delta_sum = add_V3F32(h_viewport.pixel_delta_u, h_viewport.pixel_delta_v);
h_viewport.pixel_origin = add_V3F32(viewport_upper_left, h_viewport.pixel_origin = add_V3F32(viewport_upper_left,
scale_V3F32(0.5f, pixel_delta_sum)); scale_V3F32(0.5f, pixel_delta_sum));
cuErr = cudaMemcpyToSymbol(viewport, &h_viewport, sizeof(ViewportF32), 0, cuErr = cudaMemcpyToSymbol(viewport, &h_viewport, sizeof(ViewportF32), 0,
cudaMemcpyHostToDevice); cudaMemcpyHostToDevice);

Binary file not shown.