transfer from office work, working entity loop for spheres
This commit is contained in:
parent
fbf9421843
commit
a615f8efe8
212
src/main.cu
212
src/main.cu
@ -1,12 +1,13 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <float.h>
|
||||||
|
|
||||||
#include <curand_kernel.h>
|
#include <curand_kernel.h>
|
||||||
|
|
||||||
//------------------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------------------
|
||||||
//~ base defines
|
//~ base defines
|
||||||
|
|
||||||
#define global static
|
#define host_global static
|
||||||
#define function static
|
#define function static
|
||||||
|
|
||||||
//~ typedefs
|
//~ typedefs
|
||||||
@ -27,6 +28,8 @@ typedef float F32;
|
|||||||
|
|
||||||
#define LOG printf
|
#define LOG printf
|
||||||
|
|
||||||
|
#define F32_MAX FLT_MAX
|
||||||
|
#define F32_MIN FLT_MIN
|
||||||
|
|
||||||
//~ test defines
|
//~ test defines
|
||||||
#define NUM_BLOCKS 1
|
#define NUM_BLOCKS 1
|
||||||
@ -38,6 +41,9 @@ typedef float F32;
|
|||||||
|
|
||||||
#define CURAND_SEED 1984
|
#define CURAND_SEED 1984
|
||||||
|
|
||||||
|
|
||||||
|
#define MAX_NUM_ENTITIES 64
|
||||||
|
|
||||||
//------------------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------------------
|
||||||
//~ structs
|
//~ structs
|
||||||
|
|
||||||
@ -59,6 +65,12 @@ union Vec3F32
|
|||||||
F32 v[3];
|
F32 v[3];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef struct RngF32 RngF32;
|
||||||
|
struct RngF32
|
||||||
|
{
|
||||||
|
F32 min;
|
||||||
|
F32 max;
|
||||||
|
};
|
||||||
|
|
||||||
typedef struct RayF32 RayF32;
|
typedef struct RayF32 RayF32;
|
||||||
struct RayF32
|
struct RayF32
|
||||||
@ -100,9 +112,34 @@ struct ImageF32
|
|||||||
U32 total_num_pixels;
|
U32 total_num_pixels;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum EntityKind
|
||||||
|
{
|
||||||
|
EntityKind_Nil,
|
||||||
|
EntityKind_Sphere,
|
||||||
|
Num_EntityKinds
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct HitRecord HitRecord;
|
||||||
|
struct HitRecord
|
||||||
|
{
|
||||||
|
Vec3F32 point;
|
||||||
|
Vec3F32 normal;
|
||||||
|
F32 t; // Root parameter for hit sphere
|
||||||
|
F32 hit; // Hit true or false
|
||||||
|
F32 front_face;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct Entity Entity;
|
||||||
|
struct Entity
|
||||||
|
{
|
||||||
|
EntityKind kind;
|
||||||
|
Vec3F32 center;
|
||||||
|
F32 radius;
|
||||||
|
};
|
||||||
|
|
||||||
//------------------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------------------
|
||||||
//~ host globals
|
//~ host globals
|
||||||
|
host_global Entity nil_entity = {EntityKind_Nil, {0.0f, 0.0f, 0.0f}, 0.0f};
|
||||||
//~ device globals
|
//~ device globals
|
||||||
|
|
||||||
__constant__ CameraF32 camera;
|
__constant__ CameraF32 camera;
|
||||||
@ -184,6 +221,22 @@ __device__ function Vec3F32 lerp_V3F32(F32 s, Vec3F32 a, Vec3F32 b)
|
|||||||
return lerp_result;
|
return lerp_result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ function F32 surrounds_RngF32(RngF32 rng, F32 val)
|
||||||
|
{
|
||||||
|
F32 out = (rng.min < val) && (val < rng.max);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ function F32 contains_RngF32(RngF32 rng, F32 val)
|
||||||
|
{
|
||||||
|
F32 out = (rng.min <= val) && (val <= rng.max);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ function F32 size_RngF32(RngF32 rng)
|
||||||
|
{
|
||||||
|
return rng.max-rng.min;
|
||||||
|
}
|
||||||
|
|
||||||
__host__ function void write_buffer_to_ppm(Vec3F32 *buffer,
|
__host__ function void write_buffer_to_ppm(Vec3F32 *buffer,
|
||||||
U32 image_width,
|
U32 image_width,
|
||||||
@ -233,8 +286,11 @@ __host__ function void write_buffer_to_ppm(Vec3F32 *buffer,
|
|||||||
fclose(file);
|
fclose(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ function F32 hit_sphere(Vec3F32 center, F32 radius, RayF32 r)
|
|
||||||
|
__device__ function HitRecord hit_sphere(Vec3F32 center, F32 radius,
|
||||||
|
RayF32 ray, RngF32 range)
|
||||||
{
|
{
|
||||||
|
HitRecord out = {0};
|
||||||
// We take the quadratic formula -b/2a +- sqrt(b*b-4ac) / 2a,
|
// We take the quadratic formula -b/2a +- sqrt(b*b-4ac) / 2a,
|
||||||
// and we calculate only the sqrt part. If there is a hit with the sphere we either
|
// and we calculate only the sqrt part. If there is a hit with the sphere we either
|
||||||
// have two solutions (positive sqrt), one solution (zero sqrt)
|
// have two solutions (positive sqrt), one solution (zero sqrt)
|
||||||
@ -250,11 +306,11 @@ __device__ function F32 hit_sphere(Vec3F32 center, F32 radius, RayF32 r)
|
|||||||
|
|
||||||
// Compare lines with RTIOW
|
// Compare lines with RTIOW
|
||||||
// (C-Q)
|
// (C-Q)
|
||||||
Vec3F32 oc = sub_V3F32(center, r.origin);
|
Vec3F32 oc = sub_V3F32(center, ray.origin);
|
||||||
// a = D.D
|
// a = D.D
|
||||||
F32 a = dot_V3F32(r.direction, r.direction);
|
F32 a = dot_V3F32(ray.direction, ray.direction);
|
||||||
// h = D . (C-Q)
|
// h = D . (C-Q)
|
||||||
F32 h = dot_V3F32(r.direction, oc);
|
F32 h = dot_V3F32(ray.direction, oc);
|
||||||
// c = (C-Q) . (C-Q) - r*r
|
// c = (C-Q) . (C-Q) - r*r
|
||||||
F32 c = dot_V3F32(oc, oc) - radius*radius;
|
F32 c = dot_V3F32(oc, oc) - radius*radius;
|
||||||
|
|
||||||
@ -264,24 +320,58 @@ __device__ function F32 hit_sphere(Vec3F32 center, F32 radius, RayF32 r)
|
|||||||
// intersects the sphere. This is the quadratic problem we get by solving for t in
|
// intersects the sphere. This is the quadratic problem we get by solving for t in
|
||||||
// (C - P(t)) . (C - P(t)) = r*r, r being the radius and P(t) = tD+Q,
|
// (C - P(t)) . (C - P(t)) = r*r, r being the radius and P(t) = tD+Q,
|
||||||
// where D is the direction of the ray and Q the origin of the ray.
|
// where D is the direction of the ray and Q the origin of the ray.
|
||||||
F32 out = 0.0f;
|
F32 hit_true = 0.0f;
|
||||||
|
|
||||||
|
// Branching version
|
||||||
|
// TODO(anton): Maybe try to make a branchless version
|
||||||
|
F32 root = 0.0f;
|
||||||
if(discriminant < 0.0f)
|
if(discriminant < 0.0f)
|
||||||
{
|
{
|
||||||
out = -1.0f;
|
hit_true = 0.0f;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// t = (h += sqrt(h*h-ac))/a, and here we take the smallest solution to get the point
|
// t = (h += sqrt(h*h-ac))/a, and here we take the smallest solution to get the point
|
||||||
// on the sphere closest to the ray origin.
|
// on the sphere closest to the ray origin.
|
||||||
out = (h - __fsqrt_rn(discriminant))/a;
|
F32 sqrtd = __fsqrt_rn(discriminant);
|
||||||
|
root = (h - sqrtd)/a;
|
||||||
|
if(!surrounds_RngF32(range, root))
|
||||||
|
{
|
||||||
|
root = (h + sqrtd)/a;
|
||||||
|
if(!surrounds_RngF32(range, root))
|
||||||
|
{
|
||||||
|
hit_true = 0.0f;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
hit_true = 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
hit_true = 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out.hit = hit_true;
|
||||||
|
out.t = root;
|
||||||
|
|
||||||
|
// t is the parameter of the (closest) sphere-ray intersection point P(t) = tD+Q,
|
||||||
|
// where Q is the ray origin and D the ray direction.
|
||||||
|
out.point = ray_point_F32(out.t, ray); // intersection point
|
||||||
|
Vec3F32 N = sub_V3F32(out.point, center);
|
||||||
|
N = scale_V3F32(1.0f/radius, N);
|
||||||
|
|
||||||
|
F32 front_face = dot_V3F32(ray.direction, N) < 0.0f;
|
||||||
|
out.normal = front_face ? N : scale_V3F32(-1.0f, N);
|
||||||
|
out.front_face = front_face;
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__global__ function void cuda_main(Vec3F32 *pixelbuffer, U32 *idxbuffer)
|
__global__ void cuda_main(Entity *entities, Vec3F32 *pixelbuffer, U32 *idxbuffer)
|
||||||
{
|
{
|
||||||
|
|
||||||
U32 x = blockIdx.x * blockDim.x + threadIdx.x;
|
U32 x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
@ -296,34 +386,56 @@ __global__ function void cuda_main(Vec3F32 *pixelbuffer, U32 *idxbuffer)
|
|||||||
|
|
||||||
// TODO(anton): Maybe we dont need some ray structure here..
|
// TODO(anton): Maybe we dont need some ray structure here..
|
||||||
Vec3F32 ray_direction = sub_V3F32(pixel_center, camera.center);
|
Vec3F32 ray_direction = sub_V3F32(pixel_center, camera.center);
|
||||||
RayF32 r = {0};
|
RayF32 ray = {0};
|
||||||
r.origin = camera.center;
|
ray.origin = camera.center;
|
||||||
r.direction = ray_direction;
|
ray.direction = ray_direction;
|
||||||
|
|
||||||
F32 norm = norm_V3F32(r.direction);
|
|
||||||
Vec3F32 unit_dir = scale_V3F32(1.0f/norm, r.direction);
|
RngF32 hit_range = {F32_MIN, F32_MAX};
|
||||||
|
HitRecord hit_rec = {0};
|
||||||
|
for(U32 entity_idx = 0; entity_idx < MAX_NUM_ENTITIES; entity_idx += 1)
|
||||||
|
{
|
||||||
|
Entity *entity = &entities[entity_idx];
|
||||||
|
switch(entity->kind)
|
||||||
|
{
|
||||||
|
case EntityKind_Nil:
|
||||||
|
{
|
||||||
|
// no op
|
||||||
|
} break;
|
||||||
|
|
||||||
|
case EntityKind_Sphere:
|
||||||
|
{
|
||||||
|
HitRecord temp_hit_rec = hit_sphere(entity->center, entity->radius,
|
||||||
|
ray, hit_range);
|
||||||
|
if(temp_hit_rec.hit)
|
||||||
|
{
|
||||||
|
hit_rec = temp_hit_rec;
|
||||||
|
hit_range.max = hit_rec.t;
|
||||||
|
}
|
||||||
|
|
||||||
|
} break;
|
||||||
|
} // end switch entity kind
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
Vec3F32 pixel_color = {0.0f, 0.0f, 0.0f};
|
||||||
|
if(hit_rec.hit)
|
||||||
|
{
|
||||||
|
// Paint entity
|
||||||
|
pixel_color = add_V3F32(hit_rec.normal, vec3F32(1.0f, 1.0f, 1.0f));
|
||||||
|
pixel_color = scale_V3F32(0.5f, pixel_color);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Paint background gradient
|
||||||
|
F32 norm = norm_V3F32(ray.direction);
|
||||||
|
Vec3F32 unit_dir = scale_V3F32(1.0f/norm, ray.direction);
|
||||||
Vec3F32 white = vec3F32(1.0f, 1.0f, 1.0f);
|
Vec3F32 white = vec3F32(1.0f, 1.0f, 1.0f);
|
||||||
Vec3F32 light_blue = vec3F32(0.5f, 0.7f, 1.0f);
|
Vec3F32 light_blue = vec3F32(0.5f, 0.7f, 1.0f);
|
||||||
|
|
||||||
// Lerp between white and light blue depending on y position
|
// Lerp between white and light blue depending on y position
|
||||||
F32 blend = 0.5f*(unit_dir.y + 1.0f);
|
F32 blend = 0.5f*(unit_dir.y + 1.0f);
|
||||||
Vec3F32 pixel_color = {0};
|
|
||||||
|
|
||||||
Vec3F32 sphere_center = vec3F32(0.0f, 0.0f, -1.0f);
|
|
||||||
F32 sphere_radius = 0.5f;
|
|
||||||
|
|
||||||
// t is the parameter of the (closest) sphere-ray intersection point P(t) = tD+Q,
|
|
||||||
// where Q is the ray origin and D the ray direction.
|
|
||||||
F32 t = hit_sphere(sphere_center, sphere_radius, r);
|
|
||||||
if(t > 0.0f)
|
|
||||||
{
|
|
||||||
Vec3F32 intersection_point = ray_point_F32(t, r);
|
|
||||||
Vec3F32 N = sub_V3F32(intersection_point, sphere_center);
|
|
||||||
N = scale_V3F32(1.0f/sphere_radius, N);
|
|
||||||
pixel_color = scale_V3F32(0.5f, add_V3F32(N, vec3F32(1.0f, 1.0f, 1.0f)));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
pixel_color = lerp_V3F32(blend, white, light_blue);
|
pixel_color = lerp_V3F32(blend, white, light_blue);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -338,7 +450,7 @@ __global__ function void cuda_main(Vec3F32 *pixelbuffer, U32 *idxbuffer)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ function void cuda_init_state(curandState *rand_state)
|
__global__ void cuda_init_state(curandState *rand_state)
|
||||||
{
|
{
|
||||||
|
|
||||||
U32 x = threadIdx.x + blockIdx.x * blockDim.x;
|
U32 x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
@ -411,6 +523,38 @@ int main()
|
|||||||
LOG("Viewport size %.2f x %.2f, aspect ratio: %.4f \n",
|
LOG("Viewport size %.2f x %.2f, aspect ratio: %.4f \n",
|
||||||
h_viewport.width, h_viewport.height, h_viewport.aspect_ratio);
|
h_viewport.width, h_viewport.height, h_viewport.aspect_ratio);
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Setup entities and copy to device
|
||||||
|
U64 entity_list_size = sizeof(Entity)*MAX_NUM_ENTITIES;
|
||||||
|
Entity *h_entities = (Entity *)malloc(entity_list_size);
|
||||||
|
for(U32 i = 0; i < MAX_NUM_ENTITIES; i += 1)
|
||||||
|
{
|
||||||
|
// Init all entities to nil
|
||||||
|
//h_entities[i] = {0};
|
||||||
|
//h_entities[i].kind = EntityKind_Nil;
|
||||||
|
h_entities[i] = nil_entity;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Manual spheres
|
||||||
|
{
|
||||||
|
h_entities[0].kind = EntityKind_Sphere;
|
||||||
|
h_entities[0].center = vec3F32(0.0f, 0.0f, -1.0f);
|
||||||
|
h_entities[0].radius = 0.5f;
|
||||||
|
|
||||||
|
h_entities[1].kind = EntityKind_Sphere;
|
||||||
|
h_entities[1].center = vec3F32(0.0f, -100.5f, -1.0f);
|
||||||
|
h_entities[1].radius = 100.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy to device
|
||||||
|
Entity *entities = 0;
|
||||||
|
cuErr = cudaMalloc(&entities, entity_list_size);
|
||||||
|
CUDA_CHECK(cuErr);
|
||||||
|
cuErr = cudaMemcpy(entities, h_entities, entity_list_size, cudaMemcpyHostToDevice);
|
||||||
|
CUDA_CHECK(cuErr);
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Define grid, blocks, threads and any buffers such as pixel data and random state
|
// Define grid, blocks, threads and any buffers such as pixel data and random state
|
||||||
// ------------
|
// ------------
|
||||||
@ -436,6 +580,8 @@ int main()
|
|||||||
cuErr = cudaMalloc(&d_rand_state, num_pixels*sizeof(curandState));
|
cuErr = cudaMalloc(&d_rand_state, num_pixels*sizeof(curandState));
|
||||||
CUDA_CHECK(cuErr);
|
CUDA_CHECK(cuErr);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Initialise CUDA state such as random number states per thread.
|
// Initialise CUDA state such as random number states per thread.
|
||||||
// This is separate for performance measurements
|
// This is separate for performance measurements
|
||||||
@ -456,7 +602,7 @@ int main()
|
|||||||
LOG("threads per block: (%i, %i %i) \n",
|
LOG("threads per block: (%i, %i %i) \n",
|
||||||
threads_per_block.x, threads_per_block.y, threads_per_block.z);
|
threads_per_block.x, threads_per_block.y, threads_per_block.z);
|
||||||
|
|
||||||
cuda_main<<<blocks_per_grid, threads_per_block>>>(pixel_buffer, idxbuffer);
|
cuda_main<<<blocks_per_grid, threads_per_block>>>(entities, pixel_buffer, idxbuffer);
|
||||||
cuErr = cudaGetLastError();
|
cuErr = cudaGetLastError();
|
||||||
CUDA_CHECK(cuErr);
|
CUDA_CHECK(cuErr);
|
||||||
cuErr = cudaDeviceSynchronize();
|
cuErr = cudaDeviceSynchronize();
|
||||||
|
|||||||
BIN
timeBuild.ctm
BIN
timeBuild.ctm
Binary file not shown.
Loading…
Reference in New Issue
Block a user