transfer from office work, working entity loop for spheres

This commit is contained in:
Anton Ljungdahl 2025-04-25 18:29:20 +02:00
parent fbf9421843
commit a615f8efe8
2 changed files with 196 additions and 50 deletions

View File

@ -1,12 +1,13 @@
#include <stdio.h> #include <stdio.h>
#include <stdint.h> #include <stdint.h>
#include <float.h>
#include <curand_kernel.h> #include <curand_kernel.h>
//------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------
//~ base defines //~ base defines
#define global static #define host_global static
#define function static #define function static
//~ typedefs //~ typedefs
@ -27,6 +28,8 @@ typedef float F32;
#define LOG printf #define LOG printf
#define F32_MAX FLT_MAX
#define F32_MIN FLT_MIN
//~ test defines //~ test defines
#define NUM_BLOCKS 1 #define NUM_BLOCKS 1
@ -38,6 +41,9 @@ typedef float F32;
#define CURAND_SEED 1984 #define CURAND_SEED 1984
#define MAX_NUM_ENTITIES 64
//------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------
//~ structs //~ structs
@ -59,6 +65,12 @@ union Vec3F32
F32 v[3]; F32 v[3];
}; };
typedef struct RngF32 RngF32;
struct RngF32
{
F32 min;
F32 max;
};
typedef struct RayF32 RayF32; typedef struct RayF32 RayF32;
struct RayF32 struct RayF32
@ -100,9 +112,34 @@ struct ImageF32
U32 total_num_pixels; U32 total_num_pixels;
}; };
enum EntityKind
{
EntityKind_Nil,
EntityKind_Sphere,
Num_EntityKinds
};
typedef struct HitRecord HitRecord;
struct HitRecord
{
Vec3F32 point;
Vec3F32 normal;
F32 t; // Root parameter for hit sphere
F32 hit; // Hit true or false
F32 front_face;
};
typedef struct Entity Entity;
struct Entity
{
EntityKind kind;
Vec3F32 center;
F32 radius;
};
//------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------
//~ host globals //~ host globals
host_global Entity nil_entity = {EntityKind_Nil, {0.0f, 0.0f, 0.0f}, 0.0f};
//~ device globals //~ device globals
__constant__ CameraF32 camera; __constant__ CameraF32 camera;
@ -184,6 +221,22 @@ __device__ function Vec3F32 lerp_V3F32(F32 s, Vec3F32 a, Vec3F32 b)
return lerp_result; return lerp_result;
} }
__device__ function F32 surrounds_RngF32(RngF32 rng, F32 val)
{
F32 out = (rng.min < val) && (val < rng.max);
return out;
}
__device__ function F32 contains_RngF32(RngF32 rng, F32 val)
{
F32 out = (rng.min <= val) && (val <= rng.max);
return out;
}
__device__ function F32 size_RngF32(RngF32 rng)
{
return rng.max-rng.min;
}
__host__ function void write_buffer_to_ppm(Vec3F32 *buffer, __host__ function void write_buffer_to_ppm(Vec3F32 *buffer,
U32 image_width, U32 image_width,
@ -233,8 +286,11 @@ __host__ function void write_buffer_to_ppm(Vec3F32 *buffer,
fclose(file); fclose(file);
} }
__device__ function F32 hit_sphere(Vec3F32 center, F32 radius, RayF32 r)
__device__ function HitRecord hit_sphere(Vec3F32 center, F32 radius,
RayF32 ray, RngF32 range)
{ {
HitRecord out = {0};
// We take the quadratic formula -b/2a +- sqrt(b*b-4ac) / 2a, // We take the quadratic formula -b/2a +- sqrt(b*b-4ac) / 2a,
// and we calculate only the sqrt part. If there is a hit with the sphere we either // and we calculate only the sqrt part. If there is a hit with the sphere we either
// have two solutions (positive sqrt), one solution (zero sqrt) // have two solutions (positive sqrt), one solution (zero sqrt)
@ -250,11 +306,11 @@ __device__ function F32 hit_sphere(Vec3F32 center, F32 radius, RayF32 r)
// Compare lines with RTIOW // Compare lines with RTIOW
// (C-Q) // (C-Q)
Vec3F32 oc = sub_V3F32(center, r.origin); Vec3F32 oc = sub_V3F32(center, ray.origin);
// a = D.D // a = D.D
F32 a = dot_V3F32(r.direction, r.direction); F32 a = dot_V3F32(ray.direction, ray.direction);
// h = D . (C-Q) // h = D . (C-Q)
F32 h = dot_V3F32(r.direction, oc); F32 h = dot_V3F32(ray.direction, oc);
// c = (C-Q) . (C-Q) - r*r // c = (C-Q) . (C-Q) - r*r
F32 c = dot_V3F32(oc, oc) - radius*radius; F32 c = dot_V3F32(oc, oc) - radius*radius;
@ -264,24 +320,58 @@ __device__ function F32 hit_sphere(Vec3F32 center, F32 radius, RayF32 r)
// intersects the sphere. This is the quadratic problem we get by solving for t in // intersects the sphere. This is the quadratic problem we get by solving for t in
// (C - P(t)) . (C - P(t)) = r*r, r being the radius and P(t) = tD+Q, // (C - P(t)) . (C - P(t)) = r*r, r being the radius and P(t) = tD+Q,
// where D is the direction of the ray and Q the origin of the ray. // where D is the direction of the ray and Q the origin of the ray.
F32 out = 0.0f; F32 hit_true = 0.0f;
// Branching version
// TODO(anton): Maybe try to make a branchless version
F32 root = 0.0f;
if(discriminant < 0.0f) if(discriminant < 0.0f)
{ {
out = -1.0f; hit_true = 0.0f;
} }
else else
{ {
// t = (h += sqrt(h*h-ac))/a, and here we take the smallest solution to get the point // t = (h += sqrt(h*h-ac))/a, and here we take the smallest solution to get the point
// on the sphere closest to the ray origin. // on the sphere closest to the ray origin.
out = (h - __fsqrt_rn(discriminant))/a; F32 sqrtd = __fsqrt_rn(discriminant);
root = (h - sqrtd)/a;
if(!surrounds_RngF32(range, root))
{
root = (h + sqrtd)/a;
if(!surrounds_RngF32(range, root))
{
hit_true = 0.0f;
}
else
{
hit_true = 1.0f;
}
}
else
{
hit_true = 1.0f;
}
} }
out.hit = hit_true;
out.t = root;
// t is the parameter of the (closest) sphere-ray intersection point P(t) = tD+Q,
// where Q is the ray origin and D the ray direction.
out.point = ray_point_F32(out.t, ray); // intersection point
Vec3F32 N = sub_V3F32(out.point, center);
N = scale_V3F32(1.0f/radius, N);
F32 front_face = dot_V3F32(ray.direction, N) < 0.0f;
out.normal = front_face ? N : scale_V3F32(-1.0f, N);
out.front_face = front_face;
return out; return out;
} }
__global__ function void cuda_main(Vec3F32 *pixelbuffer, U32 *idxbuffer) __global__ void cuda_main(Entity *entities, Vec3F32 *pixelbuffer, U32 *idxbuffer)
{ {
U32 x = blockIdx.x * blockDim.x + threadIdx.x; U32 x = blockIdx.x * blockDim.x + threadIdx.x;
@ -296,34 +386,56 @@ __global__ function void cuda_main(Vec3F32 *pixelbuffer, U32 *idxbuffer)
// TODO(anton): Maybe we dont need some ray structure here.. // TODO(anton): Maybe we dont need some ray structure here..
Vec3F32 ray_direction = sub_V3F32(pixel_center, camera.center); Vec3F32 ray_direction = sub_V3F32(pixel_center, camera.center);
RayF32 r = {0}; RayF32 ray = {0};
r.origin = camera.center; ray.origin = camera.center;
r.direction = ray_direction; ray.direction = ray_direction;
F32 norm = norm_V3F32(r.direction);
Vec3F32 unit_dir = scale_V3F32(1.0f/norm, r.direction);
Vec3F32 white = vec3F32(1.0f, 1.0f, 1.0f);
Vec3F32 light_blue = vec3F32(0.5f, 0.7f, 1.0f);
// Lerp between white and light blue depending on y position RngF32 hit_range = {F32_MIN, F32_MAX};
F32 blend = 0.5f*(unit_dir.y + 1.0f); HitRecord hit_rec = {0};
Vec3F32 pixel_color = {0}; for(U32 entity_idx = 0; entity_idx < MAX_NUM_ENTITIES; entity_idx += 1)
Vec3F32 sphere_center = vec3F32(0.0f, 0.0f, -1.0f);
F32 sphere_radius = 0.5f;
// t is the parameter of the (closest) sphere-ray intersection point P(t) = tD+Q,
// where Q is the ray origin and D the ray direction.
F32 t = hit_sphere(sphere_center, sphere_radius, r);
if(t > 0.0f)
{ {
Vec3F32 intersection_point = ray_point_F32(t, r); Entity *entity = &entities[entity_idx];
Vec3F32 N = sub_V3F32(intersection_point, sphere_center); switch(entity->kind)
N = scale_V3F32(1.0f/sphere_radius, N); {
pixel_color = scale_V3F32(0.5f, add_V3F32(N, vec3F32(1.0f, 1.0f, 1.0f))); case EntityKind_Nil:
{
// no op
} break;
case EntityKind_Sphere:
{
HitRecord temp_hit_rec = hit_sphere(entity->center, entity->radius,
ray, hit_range);
if(temp_hit_rec.hit)
{
hit_rec = temp_hit_rec;
hit_range.max = hit_rec.t;
}
} break;
} // end switch entity kind
}
Vec3F32 pixel_color = {0.0f, 0.0f, 0.0f};
if(hit_rec.hit)
{
// Paint entity
pixel_color = add_V3F32(hit_rec.normal, vec3F32(1.0f, 1.0f, 1.0f));
pixel_color = scale_V3F32(0.5f, pixel_color);
} }
else else
{ {
// Paint background gradient
F32 norm = norm_V3F32(ray.direction);
Vec3F32 unit_dir = scale_V3F32(1.0f/norm, ray.direction);
Vec3F32 white = vec3F32(1.0f, 1.0f, 1.0f);
Vec3F32 light_blue = vec3F32(0.5f, 0.7f, 1.0f);
// Lerp between white and light blue depending on y position
F32 blend = 0.5f*(unit_dir.y + 1.0f);
pixel_color = lerp_V3F32(blend, white, light_blue); pixel_color = lerp_V3F32(blend, white, light_blue);
} }
@ -338,7 +450,7 @@ __global__ function void cuda_main(Vec3F32 *pixelbuffer, U32 *idxbuffer)
} }
__global__ function void cuda_init_state(curandState *rand_state) __global__ void cuda_init_state(curandState *rand_state)
{ {
U32 x = threadIdx.x + blockIdx.x * blockDim.x; U32 x = threadIdx.x + blockIdx.x * blockDim.x;
@ -377,7 +489,7 @@ int main()
CameraF32 h_camera = {0}; CameraF32 h_camera = {0};
h_camera.focal_length = 1.0f; h_camera.focal_length = 1.0f;
cuErr = cudaMemcpyToSymbol(camera, &h_camera, sizeof(CameraF32), 0, cuErr = cudaMemcpyToSymbol(camera, &h_camera, sizeof(CameraF32), 0,
cudaMemcpyHostToDevice); cudaMemcpyHostToDevice);
CUDA_CHECK(cuErr); CUDA_CHECK(cuErr);
// ------------- // -------------
@ -395,7 +507,7 @@ int main()
// upper_left = camera - vec3(0,0,focal_length) - viewport_u/2 - viewport_v/2 // upper_left = camera - vec3(0,0,focal_length) - viewport_u/2 - viewport_v/2
Vec3F32 viewport_upper_left = sub_V3F32(h_camera.center, Vec3F32 viewport_upper_left = sub_V3F32(h_camera.center,
vec3F32(0.0f, 0.0f, h_camera.focal_length)); vec3F32(0.0f, 0.0f, h_camera.focal_length));
viewport_upper_left = sub_V3F32(viewport_upper_left, scale_V3F32(0.5f, h_viewport.u)); viewport_upper_left = sub_V3F32(viewport_upper_left, scale_V3F32(0.5f, h_viewport.u));
viewport_upper_left = sub_V3F32(viewport_upper_left, scale_V3F32(0.5f, h_viewport.v)); viewport_upper_left = sub_V3F32(viewport_upper_left, scale_V3F32(0.5f, h_viewport.v));
h_viewport.upper_left = viewport_upper_left; h_viewport.upper_left = viewport_upper_left;
@ -403,14 +515,46 @@ int main()
// pixel_origin = upper_left + 0.5 * (delta u + delta v) // pixel_origin = upper_left + 0.5 * (delta u + delta v)
Vec3F32 pixel_delta_sum = add_V3F32(h_viewport.pixel_delta_u, h_viewport.pixel_delta_v); Vec3F32 pixel_delta_sum = add_V3F32(h_viewport.pixel_delta_u, h_viewport.pixel_delta_v);
h_viewport.pixel_origin = add_V3F32(viewport_upper_left, h_viewport.pixel_origin = add_V3F32(viewport_upper_left,
scale_V3F32(0.5f, pixel_delta_sum)); scale_V3F32(0.5f, pixel_delta_sum));
cuErr = cudaMemcpyToSymbol(viewport, &h_viewport, sizeof(ViewportF32), 0, cuErr = cudaMemcpyToSymbol(viewport, &h_viewport, sizeof(ViewportF32), 0,
cudaMemcpyHostToDevice); cudaMemcpyHostToDevice);
CUDA_CHECK(cuErr); CUDA_CHECK(cuErr);
LOG("Viewport size %.2f x %.2f, aspect ratio: %.4f \n", LOG("Viewport size %.2f x %.2f, aspect ratio: %.4f \n",
h_viewport.width, h_viewport.height, h_viewport.aspect_ratio); h_viewport.width, h_viewport.height, h_viewport.aspect_ratio);
//////////////////////////////////////////////////////////////////////////////////////////
// Setup entities and copy to device
U64 entity_list_size = sizeof(Entity)*MAX_NUM_ENTITIES;
Entity *h_entities = (Entity *)malloc(entity_list_size);
for(U32 i = 0; i < MAX_NUM_ENTITIES; i += 1)
{
// Init all entities to nil
//h_entities[i] = {0};
//h_entities[i].kind = EntityKind_Nil;
h_entities[i] = nil_entity;
}
// Manual spheres
{
h_entities[0].kind = EntityKind_Sphere;
h_entities[0].center = vec3F32(0.0f, 0.0f, -1.0f);
h_entities[0].radius = 0.5f;
h_entities[1].kind = EntityKind_Sphere;
h_entities[1].center = vec3F32(0.0f, -100.5f, -1.0f);
h_entities[1].radius = 100.0f;
}
// Copy to device
Entity *entities = 0;
cuErr = cudaMalloc(&entities, entity_list_size);
CUDA_CHECK(cuErr);
cuErr = cudaMemcpy(entities, h_entities, entity_list_size, cudaMemcpyHostToDevice);
CUDA_CHECK(cuErr);
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// Define grid, blocks, threads and any buffers such as pixel data and random state // Define grid, blocks, threads and any buffers such as pixel data and random state
// ------------ // ------------
@ -436,6 +580,8 @@ int main()
cuErr = cudaMalloc(&d_rand_state, num_pixels*sizeof(curandState)); cuErr = cudaMalloc(&d_rand_state, num_pixels*sizeof(curandState));
CUDA_CHECK(cuErr); CUDA_CHECK(cuErr);
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// Initialise CUDA state such as random number states per thread. // Initialise CUDA state such as random number states per thread.
// This is separate for performance measurements // This is separate for performance measurements
@ -456,7 +602,7 @@ int main()
LOG("threads per block: (%i, %i %i) \n", LOG("threads per block: (%i, %i %i) \n",
threads_per_block.x, threads_per_block.y, threads_per_block.z); threads_per_block.x, threads_per_block.y, threads_per_block.z);
cuda_main<<<blocks_per_grid, threads_per_block>>>(pixel_buffer, idxbuffer); cuda_main<<<blocks_per_grid, threads_per_block>>>(entities, pixel_buffer, idxbuffer);
cuErr = cudaGetLastError(); cuErr = cudaGetLastError();
CUDA_CHECK(cuErr); CUDA_CHECK(cuErr);
cuErr = cudaDeviceSynchronize(); cuErr = cudaDeviceSynchronize();
@ -468,13 +614,13 @@ int main()
// ------------ // ------------
Vec3F32 *h_pixel_buffer = (Vec3F32 *)malloc(pixel_buffer_size); Vec3F32 *h_pixel_buffer = (Vec3F32 *)malloc(pixel_buffer_size);
cuErr = cudaMemcpy(h_pixel_buffer, pixel_buffer, pixel_buffer_size, cuErr = cudaMemcpy(h_pixel_buffer, pixel_buffer, pixel_buffer_size,
cudaMemcpyDeviceToHost); cudaMemcpyDeviceToHost);
CUDA_CHECK(cuErr); CUDA_CHECK(cuErr);
// TODO(anton): remove debug buffer // TODO(anton): remove debug buffer
U32 *h_idxbuffer = (U32 *)malloc(num_pixels*sizeof(U32)); U32 *h_idxbuffer = (U32 *)malloc(num_pixels*sizeof(U32));
cuErr = cudaMemcpy(h_idxbuffer, idxbuffer, num_pixels*sizeof(U32), cuErr = cudaMemcpy(h_idxbuffer, idxbuffer, num_pixels*sizeof(U32),
cudaMemcpyDeviceToHost); cudaMemcpyDeviceToHost);
write_buffer_to_ppm(h_pixel_buffer, h_image.width, h_image.height, h_idxbuffer); write_buffer_to_ppm(h_pixel_buffer, h_image.width, h_image.height, h_idxbuffer);

Binary file not shown.