From 10aa24acbeeadf8b18c392efe07170b02a5c5c41 Mon Sep 17 00:00:00 2001 From: Anton Ljungdahl Date: Fri, 25 Apr 2025 23:28:59 +0200 Subject: [PATCH] antialiasing with proper pointer passing of local rand state --- src/main.cu | 190 ++++++++++++++++++++++++++++++++++++-------------- timeBuild.ctm | Bin 2228 -> 2468 bytes 2 files changed, 136 insertions(+), 54 deletions(-) diff --git a/src/main.cu b/src/main.cu index ce3bccc..0d6780b 100644 --- a/src/main.cu +++ b/src/main.cu @@ -42,8 +42,10 @@ typedef float F32; #define CURAND_SEED 1984 +#define MAX_RANDOM_UNIT_VECTOR_ITERATIONS 64 #define MAX_NUM_ENTITIES 64 -#define SAMPLES_PER_PIXEL 32 +#define SAMPLES_PER_PIXEL 64 +#define MAX_DIFFUSE_DEPTH 8 //------------------------------------------------------------------------------------------ //~ structs @@ -242,6 +244,66 @@ __device__ function F32 surrounds_RngF32(RngF32 rng, F32 val) //} // +__device__ function Vec3F32 +rand_uniform_V3F32(curandState *local_rand_state) +{ + Vec3F32 out = {0}; + out.x = curand_uniform(local_rand_state); + out.y = curand_uniform(local_rand_state); + out.z = curand_uniform(local_rand_state); + return out; +} + +__device__ function Vec3F32 +rand_uniform_rng_V3F32(RngF32 rng, curandState *local_rand_state) +{ + Vec3F32 out = {0}; + out.x = rng.min + (rng.max-rng.min) * curand_uniform(local_rand_state); + out.y = rng.min + (rng.max-rng.min) * curand_uniform(local_rand_state); + out.z = rng.min + (rng.max-rng.min) * curand_uniform(local_rand_state); + return out; +} + +__device__ function Vec3F32 +rand_unit_vector_on_sphere_F32(curandState *local_rand_state) +{ + Vec3F32 out = {0}; + RngF32 range = {-1.0f, 1.0f}; // Cube bounding the unit sphere + F32 inner_bound = 1e-8f; // Don't want too small vectors + for(U32 i = 0; i < MAX_RANDOM_UNIT_VECTOR_ITERATIONS; i += 1) + { + out = rand_uniform_rng_V3F32(range, local_rand_state); + F32 normsqrd = dot_V3F32(out, out); + if(inner_bound < normsqrd && normsqrd <= 1.0f) + { + F32 norm = __fsqrt_rn(normsqrd); + out = scale_V3F32(1.0f/norm, out); + break; + } + } + + return out; +} + +__device__ function Vec3F32 +rand_unit_vector_on_hemisphere_F32(curandState *local_rand_state, Vec3F32 normal) +{ + + Vec3F32 out = {0}; + Vec3F32 vec_on_unit_sphere = rand_unit_vector_on_sphere_F32(local_rand_state); + if(dot_V3F32(vec_on_unit_sphere, normal) > 0.0f) + { + // same hemisphere + out = vec_on_unit_sphere; + } + else + { + out = scale_V3F32(-1.0f, vec_on_unit_sphere); + } + + return out; +} + __host__ function void write_buffer_to_ppm(Vec3F32 *buffer, U32 image_width, U32 image_height) @@ -388,7 +450,7 @@ hit_sphere(Vec3F32 center, F32 radius, RayF32 ray, RngF32 range) } __device__ function RayF32 -ray_get_F32(F32 x, F32 y, Vec3F32 cam_center, curandState local_rand_state) +ray_get_F32(F32 x, F32 y, Vec3F32 cam_center, curandState *local_rand_state) { RayF32 out = {0}; @@ -399,8 +461,8 @@ ray_get_F32(F32 x, F32 y, Vec3F32 cam_center, curandState local_rand_state) Vec3F32 pixel_center = add_V3F32(viewport.pixel_origin, add_V3F32(px_u, px_v)); // To get anti-aliasing we make a random offset from the pixel center - F32 rand_u = curand_uniform(&local_rand_state) - 0.5f; - F32 rand_v = curand_uniform(&local_rand_state) - 0.5f; + F32 rand_u = curand_uniform(local_rand_state) - 0.5f; + F32 rand_v = curand_uniform(local_rand_state) - 0.5f; // the rand u and rand v are offsets from a pixel in the [-0.5, 0.5] square. // We need to put that into the world space of our viewport Vec3F32 offset_u = scale_V3F32(rand_u, viewport.pixel_delta_u); @@ -419,58 +481,76 @@ ray_get_F32(F32 x, F32 y, Vec3F32 cam_center, curandState local_rand_state) // Trace a ray and get a pixel color sample __device__ function Vec3F32 -get_sample_color(RayF32 ray, Entity *entities) +get_sample_color(RayF32 ray, Entity *entities, curandState *local_rand_state) { + + RayF32 current_ray = ray; Vec3F32 out = {0}; - RngF32 hit_range = {F32_MIN, F32_MAX}; - HitRecord hit_rec = {0}; - for(U32 entity_idx = 0; entity_idx < MAX_NUM_ENTITIES; entity_idx += 1) - { - Entity *entity = &entities[entity_idx]; - switch(entity->kind) - { - case EntityKind_Nil: - { - // no op - } break; - - case EntityKind_Sphere: - { - HitRecord temp_hit_rec = hit_sphere(entity->center, entity->radius, - ray, hit_range); - if(temp_hit_rec.hit) - { - hit_rec = temp_hit_rec; - hit_range.max = hit_rec.t; - } - - } break; - } // end switch entity kind - - } - + F32 current_attenuation = 1.0f; Vec3F32 sample_pixel_color = vec3F32(0.0f, 0.0f, 0.0f); - if(hit_rec.hit) + for(U32 bounce_idx = 0; + //bounce_idx < MAX_DIFFUSE_DEPTH; + bounce_idx < 1; + bounce_idx += 1) { - // Paint entity - sample_pixel_color = add_V3F32(hit_rec.normal, vec3F32(1.0f, 1.0f, 1.0f)); - sample_pixel_color = scale_V3F32(0.5f, sample_pixel_color); - // debug - //sample_pixel_color = vec3F32(1.0f, 0.0f, 0.0f); - } - else - { - // Paint background gradient - F32 norm = norm_V3F32(ray.direction); - Vec3F32 unit_dir = scale_V3F32(1.0f/norm, ray.direction); - Vec3F32 white = vec3F32(1.0f, 1.0f, 1.0f); - Vec3F32 light_blue = vec3F32(0.5f, 0.7f, 1.0f); - // Lerp between white and light blue depending on y position - F32 blend = 0.5f*(unit_dir.y + 1.0f); + RngF32 hit_range = {0.001f, F32_MAX}; + HitRecord hit_rec = {0}; + for(U32 entity_idx = 0; entity_idx < MAX_NUM_ENTITIES; entity_idx += 1) + { + Entity *entity = &entities[entity_idx]; + switch(entity->kind) + { + case EntityKind_Nil: + { + // no op + } break; + + case EntityKind_Sphere: + { + HitRecord temp_hit_rec = hit_sphere(entity->center, entity->radius, + current_ray, hit_range); + if(temp_hit_rec.hit) + { + hit_rec = temp_hit_rec; + hit_range.max = hit_rec.t; + } + + } break; + } // end switch entity kind + + } + + if(hit_rec.hit) + { + // Paint entity + Vec3F32 rand_dir = rand_unit_vector_on_hemisphere_F32(local_rand_state, hit_rec.normal); + current_attenuation = current_attenuation * 0.5f; + + current_ray.origin = hit_rec.point; + current_ray.direction = rand_dir; + sample_pixel_color = add_V3F32(hit_rec.normal, vec3F32(1.0f, 1.0f, 1.0f)); + sample_pixel_color = scale_V3F32(0.5f, sample_pixel_color); + // debug + //sample_pixel_color = vec3F32(1.0f, 0.0f, 0.0f); + } + else + { + // Paint background gradient + F32 norm = norm_V3F32(ray.direction); + Vec3F32 unit_dir = scale_V3F32(1.0f/norm, ray.direction); + Vec3F32 white = vec3F32(1.0f, 1.0f, 1.0f); + Vec3F32 light_blue = vec3F32(0.5f, 0.7f, 1.0f); + + // Lerp between white and light blue depending on y position + F32 blend = 0.5f*(unit_dir.y + 1.0f); + + sample_pixel_color = lerp_V3F32(blend, white, light_blue); + sample_pixel_color = scale_V3F32(current_attenuation, sample_pixel_color); + break; + } - sample_pixel_color = lerp_V3F32(blend, white, light_blue); } out = sample_pixel_color; @@ -488,6 +568,7 @@ cuda_main(Entity *entities, Vec3F32 *pixelbuffer, curandState *rand_state) if(x < image.width && y < image.height) { + curandState local_rand_state = rand_state[idx]; // We are adding all samples and then dividing by num samples to get the mean, so // we initialise the color for this pixel to black. // Loop over all pixel samples @@ -498,9 +579,9 @@ cuda_main(Entity *entities, Vec3F32 *pixelbuffer, curandState *rand_state) // TODO(anton): Maybe we can randomise things directly here as the // nvidia accelerated version, where we just put the x, y indices with a // randomised shift and normalise to viewport space by dividing by max x, max y - RayF32 ray = ray_get_F32((F32)x, (F32)y, camera.center, rand_state[idx]); - - Vec3F32 sample_pixel_color = get_sample_color(ray, entities); + RayF32 ray = ray_get_F32((F32)x, (F32)y, camera.center, &local_rand_state); + + Vec3F32 sample_pixel_color = get_sample_color(ray, entities, &local_rand_state); F32 debug_sample = curand_uniform(&rand_state[idx]); Vec3F32 debug = vec3F32(debug_sample, debug_sample, debug_sample); @@ -510,7 +591,8 @@ cuda_main(Entity *entities, Vec3F32 *pixelbuffer, curandState *rand_state) pixel_color = scale_V3F32(1.0f/(F32)SAMPLES_PER_PIXEL, pixel_color); RngF32 clamp_range = {0.0f, 1.0f}; - pixelbuffer[idx] = clamp_V3F32(clamp_range, pixel_color); + //pixel_color = clamp_V3F32(clamp_range, pixel_color); + pixelbuffer[idx] = pixel_color; } } @@ -583,7 +665,7 @@ int main() // pixel_origin = upper_left + 0.5 * (delta u + delta v) Vec3F32 pixel_delta_sum = add_V3F32(h_viewport.pixel_delta_u, h_viewport.pixel_delta_v); h_viewport.pixel_origin = add_V3F32(viewport_upper_left, - scale_V3F32(0.5f, pixel_delta_sum)); + scale_V3F32(0.5f, pixel_delta_sum)); cuErr = cudaMemcpyToSymbol(viewport, &h_viewport, sizeof(ViewportF32), 0, cudaMemcpyHostToDevice); diff --git a/timeBuild.ctm b/timeBuild.ctm index 521cfba5c6f63d073b537242589e73b04e1dc9ae..24f69157b8a6549c5d3d2869e18f7c7e0d622d16 100644 GIT binary patch delta 249 zcmdlYxI}ov7LNK$H+J@_ZM)6L$iTp0z{aubcgZlfqXanJvlHwNdLdkPg`JokcB7C2!Dq0 GLHYqSlS&r= delta 7 OcmZ1?yhU)s77hRmJp$