antialising working!
This commit is contained in:
parent
a615f8efe8
commit
fe726e9f49
181
src/main.cu
181
src/main.cu
@ -31,18 +31,19 @@ typedef float F32;
|
|||||||
#define F32_MAX FLT_MAX
|
#define F32_MAX FLT_MAX
|
||||||
#define F32_MIN FLT_MIN
|
#define F32_MIN FLT_MIN
|
||||||
|
|
||||||
//~ test defines
|
|
||||||
|
//------------------------------------------------------------------------------------------
|
||||||
|
//~ Program parameter defines
|
||||||
#define NUM_BLOCKS 1
|
#define NUM_BLOCKS 1
|
||||||
#define NUM_THREADS 32
|
#define NUM_THREADS 32
|
||||||
|
|
||||||
|
|
||||||
#define IMAGE_WIDTH 1920
|
#define IMAGE_WIDTH 1920
|
||||||
#define ASPECT_RATIO 1.7778f // 16/9
|
#define ASPECT_RATIO 1.7778f // 16/9
|
||||||
|
|
||||||
#define CURAND_SEED 1984
|
#define CURAND_SEED 1984
|
||||||
|
|
||||||
|
|
||||||
#define MAX_NUM_ENTITIES 64
|
#define MAX_NUM_ENTITIES 64
|
||||||
|
#define SAMPLES_PER_PIXEL 32
|
||||||
|
|
||||||
//------------------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------------------
|
||||||
//~ structs
|
//~ structs
|
||||||
@ -101,6 +102,7 @@ struct CameraF32
|
|||||||
Vec3F32 center;
|
Vec3F32 center;
|
||||||
Vec3F32 up;
|
Vec3F32 up;
|
||||||
F32 focal_length;
|
F32 focal_length;
|
||||||
|
F32 pixel_sample_scale;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct ImageF32 ImageF32;
|
typedef struct ImageF32 ImageF32;
|
||||||
@ -227,21 +229,22 @@ __device__ function F32 surrounds_RngF32(RngF32 rng, F32 val)
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__ function F32 contains_RngF32(RngF32 rng, F32 val)
|
//
|
||||||
{
|
//__device__ function F32 contains_RngF32(RngF32 rng, F32 val)
|
||||||
F32 out = (rng.min <= val) && (val <= rng.max);
|
//{
|
||||||
return out;
|
// F32 out = (rng.min <= val) && (val <= rng.max);
|
||||||
}
|
// return out;
|
||||||
|
//}
|
||||||
__device__ function F32 size_RngF32(RngF32 rng)
|
//
|
||||||
{
|
//__device__ function F32 size_RngF32(RngF32 rng)
|
||||||
return rng.max-rng.min;
|
//{
|
||||||
}
|
// return rng.max-rng.min;
|
||||||
|
//}
|
||||||
|
//
|
||||||
|
|
||||||
__host__ function void write_buffer_to_ppm(Vec3F32 *buffer,
|
__host__ function void write_buffer_to_ppm(Vec3F32 *buffer,
|
||||||
U32 image_width,
|
U32 image_width,
|
||||||
U32 image_height,
|
U32 image_height)
|
||||||
U32 *idx_buffer)
|
|
||||||
{
|
{
|
||||||
|
|
||||||
const char *filename = "output.ppm";
|
const char *filename = "output.ppm";
|
||||||
@ -267,9 +270,7 @@ __host__ function void write_buffer_to_ppm(Vec3F32 *buffer,
|
|||||||
{
|
{
|
||||||
// We represent RGB values by floats internally and scale to integer values
|
// We represent RGB values by floats internally and scale to integer values
|
||||||
U32 idx = i * image_width + j;
|
U32 idx = i * image_width + j;
|
||||||
if(idx_buffer[idx] != 0) {
|
|
||||||
//LOG("idx %i, idxbuffer[idx] = %i \n", idx, idx_buffer[idx]);
|
|
||||||
}
|
|
||||||
F32 r = buffer[idx].r;
|
F32 r = buffer[idx].r;
|
||||||
F32 g = buffer[idx].g;
|
F32 g = buffer[idx].g;
|
||||||
F32 b = buffer[idx].b;
|
F32 b = buffer[idx].b;
|
||||||
@ -286,9 +287,26 @@ __host__ function void write_buffer_to_ppm(Vec3F32 *buffer,
|
|||||||
fclose(file);
|
fclose(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ function F32
|
||||||
|
clamp_F32(RngF32 rng, F32 val)
|
||||||
|
{
|
||||||
|
F32 out = fmaxf(rng.min, val);
|
||||||
|
out = fminf(val, rng.max);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
__device__ function HitRecord hit_sphere(Vec3F32 center, F32 radius,
|
__device__ function Vec3F32
|
||||||
RayF32 ray, RngF32 range)
|
clamp_V3F32(RngF32 rng, Vec3F32 v)
|
||||||
|
{
|
||||||
|
Vec3F32 out = {0};
|
||||||
|
out.x = clamp_F32(rng, v.x);
|
||||||
|
out.y = clamp_F32(rng, v.y);
|
||||||
|
out.z = clamp_F32(rng, v.z);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ function HitRecord
|
||||||
|
hit_sphere(Vec3F32 center, F32 radius, RayF32 ray, RngF32 range)
|
||||||
{
|
{
|
||||||
HitRecord out = {0};
|
HitRecord out = {0};
|
||||||
// We take the quadratic formula -b/2a +- sqrt(b*b-4ac) / 2a,
|
// We take the quadratic formula -b/2a +- sqrt(b*b-4ac) / 2a,
|
||||||
@ -369,27 +387,41 @@ __device__ function HitRecord hit_sphere(Vec3F32 center, F32 radius,
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ function RayF32
|
||||||
|
ray_get_F32(F32 x, F32 y, Vec3F32 cam_center, curandState local_rand_state)
|
||||||
__global__ void cuda_main(Entity *entities, Vec3F32 *pixelbuffer, U32 *idxbuffer)
|
|
||||||
{
|
{
|
||||||
|
|
||||||
U32 x = blockIdx.x * blockDim.x + threadIdx.x;
|
RayF32 out = {0};
|
||||||
U32 y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
U32 idx = y * image.width + x;
|
|
||||||
|
|
||||||
if(x < image.width && y < image.height)
|
// We have unit vectors delta_u and delta_v in the horizontal and vertical viewport directions.
|
||||||
{
|
Vec3F32 px_u = scale_V3F32(x, viewport.pixel_delta_u);
|
||||||
Vec3F32 px_u = scale_V3F32((F32)x, viewport.pixel_delta_u);
|
Vec3F32 px_v = scale_V3F32(y, viewport.pixel_delta_v);
|
||||||
Vec3F32 px_v = scale_V3F32((F32)y, viewport.pixel_delta_v);
|
|
||||||
Vec3F32 pixel_center = add_V3F32(viewport.pixel_origin, add_V3F32(px_u, px_v));
|
Vec3F32 pixel_center = add_V3F32(viewport.pixel_origin, add_V3F32(px_u, px_v));
|
||||||
|
|
||||||
// TODO(anton): Maybe we dont need some ray structure here..
|
// To get anti-aliasing we make a random offset from the pixel center
|
||||||
Vec3F32 ray_direction = sub_V3F32(pixel_center, camera.center);
|
F32 rand_u = curand_uniform(&local_rand_state) - 0.5f;
|
||||||
RayF32 ray = {0};
|
F32 rand_v = curand_uniform(&local_rand_state) - 0.5f;
|
||||||
ray.origin = camera.center;
|
// the rand u and rand v are offsets from a pixel in the [-0.5, 0.5] square.
|
||||||
ray.direction = ray_direction;
|
// We need to put that into the world space of our viewport
|
||||||
|
Vec3F32 offset_u = scale_V3F32(rand_u, viewport.pixel_delta_u);
|
||||||
|
Vec3F32 offset_v = scale_V3F32(rand_v, viewport.pixel_delta_v);
|
||||||
|
|
||||||
|
// Then we shift the pixel center with the offsets in both directions
|
||||||
|
Vec3F32 pixel_sample = add_V3F32(pixel_center, add_V3F32(offset_u, offset_v));
|
||||||
|
// With a randomised point around the pixel center we can define the ray direction
|
||||||
|
// as the vector from the camera center to the point on the viewport.
|
||||||
|
Vec3F32 ray_direction = sub_V3F32(pixel_sample, camera.center);
|
||||||
|
|
||||||
|
out.origin = camera.center;
|
||||||
|
out.direction = ray_direction;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trace a ray and get a pixel color sample
|
||||||
|
__device__ function Vec3F32
|
||||||
|
get_sample_color(RayF32 ray, Entity *entities)
|
||||||
|
{
|
||||||
|
Vec3F32 out = {0};
|
||||||
|
|
||||||
RngF32 hit_range = {F32_MIN, F32_MAX};
|
RngF32 hit_range = {F32_MIN, F32_MAX};
|
||||||
HitRecord hit_rec = {0};
|
HitRecord hit_rec = {0};
|
||||||
@ -418,12 +450,14 @@ __global__ void cuda_main(Entity *entities, Vec3F32 *pixelbuffer, U32 *idxbuffer
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Vec3F32 pixel_color = {0.0f, 0.0f, 0.0f};
|
Vec3F32 sample_pixel_color = vec3F32(0.0f, 0.0f, 0.0f);
|
||||||
if(hit_rec.hit)
|
if(hit_rec.hit)
|
||||||
{
|
{
|
||||||
// Paint entity
|
// Paint entity
|
||||||
pixel_color = add_V3F32(hit_rec.normal, vec3F32(1.0f, 1.0f, 1.0f));
|
sample_pixel_color = add_V3F32(hit_rec.normal, vec3F32(1.0f, 1.0f, 1.0f));
|
||||||
pixel_color = scale_V3F32(0.5f, pixel_color);
|
sample_pixel_color = scale_V3F32(0.5f, sample_pixel_color);
|
||||||
|
// debug
|
||||||
|
//sample_pixel_color = vec3F32(1.0f, 0.0f, 0.0f);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -436,16 +470,47 @@ __global__ void cuda_main(Entity *entities, Vec3F32 *pixelbuffer, U32 *idxbuffer
|
|||||||
// Lerp between white and light blue depending on y position
|
// Lerp between white and light blue depending on y position
|
||||||
F32 blend = 0.5f*(unit_dir.y + 1.0f);
|
F32 blend = 0.5f*(unit_dir.y + 1.0f);
|
||||||
|
|
||||||
pixel_color = lerp_V3F32(blend, white, light_blue);
|
sample_pixel_color = lerp_V3F32(blend, white, light_blue);
|
||||||
}
|
}
|
||||||
|
|
||||||
pixelbuffer[idx] = pixel_color;
|
out = sample_pixel_color;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
//pixelbuffer[idx].x = (F32)x/(F32)image.width;
|
__global__ void
|
||||||
//pixelbuffer[idx].y = (F32)y/(F32)image.height;
|
cuda_main(Entity *entities, Vec3F32 *pixelbuffer, curandState *rand_state)
|
||||||
//pixelbuffer[idx].z = 0.0f;
|
{
|
||||||
|
|
||||||
idxbuffer[idx] = idx;
|
U32 x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
U32 y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
U32 idx = y * image.width + x;
|
||||||
|
|
||||||
|
if(x < image.width && y < image.height)
|
||||||
|
{
|
||||||
|
|
||||||
|
// We are adding all samples and then dividing by num samples to get the mean, so
|
||||||
|
// we initialise the color for this pixel to black.
|
||||||
|
// Loop over all pixel samples
|
||||||
|
Vec3F32 pixel_color = vec3F32(0.0f, 0.0f, 0.0f);
|
||||||
|
for(U32 sample_idx = 0; sample_idx < SAMPLES_PER_PIXEL; sample_idx += 1)
|
||||||
|
{
|
||||||
|
|
||||||
|
// TODO(anton): Maybe we can randomise things directly here as the
|
||||||
|
// nvidia accelerated version, where we just put the x, y indices with a
|
||||||
|
// randomised shift and normalise to viewport space by dividing by max x, max y
|
||||||
|
RayF32 ray = ray_get_F32((F32)x, (F32)y, camera.center, rand_state[idx]);
|
||||||
|
|
||||||
|
Vec3F32 sample_pixel_color = get_sample_color(ray, entities);
|
||||||
|
|
||||||
|
F32 debug_sample = curand_uniform(&rand_state[idx]);
|
||||||
|
Vec3F32 debug = vec3F32(debug_sample, debug_sample, debug_sample);
|
||||||
|
//pixel_color = add_V3F32(pixel_color, debug);
|
||||||
|
pixel_color = add_V3F32(pixel_color, sample_pixel_color);
|
||||||
|
}
|
||||||
|
|
||||||
|
pixel_color = scale_V3F32(1.0f/(F32)SAMPLES_PER_PIXEL, pixel_color);
|
||||||
|
RngF32 clamp_range = {0.0f, 1.0f};
|
||||||
|
pixelbuffer[idx] = clamp_V3F32(clamp_range, pixel_color);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -488,6 +553,9 @@ int main()
|
|||||||
// -------------
|
// -------------
|
||||||
CameraF32 h_camera = {0};
|
CameraF32 h_camera = {0};
|
||||||
h_camera.focal_length = 1.0f;
|
h_camera.focal_length = 1.0f;
|
||||||
|
F32 samples_per_pixel = (F32)SAMPLES_PER_PIXEL;
|
||||||
|
h_camera.pixel_sample_scale = 1.0f/samples_per_pixel;
|
||||||
|
|
||||||
cuErr = cudaMemcpyToSymbol(camera, &h_camera, sizeof(CameraF32), 0,
|
cuErr = cudaMemcpyToSymbol(camera, &h_camera, sizeof(CameraF32), 0,
|
||||||
cudaMemcpyHostToDevice);
|
cudaMemcpyHostToDevice);
|
||||||
CUDA_CHECK(cuErr);
|
CUDA_CHECK(cuErr);
|
||||||
@ -571,22 +639,16 @@ int main()
|
|||||||
cuErr = cudaMalloc(&pixel_buffer, pixel_buffer_size);
|
cuErr = cudaMalloc(&pixel_buffer, pixel_buffer_size);
|
||||||
CUDA_CHECK(cuErr);
|
CUDA_CHECK(cuErr);
|
||||||
|
|
||||||
// This is just a debug buffer, TODO(anton): remove
|
curandState *rand_state = 0;
|
||||||
U32 *idxbuffer = 0;
|
cuErr = cudaMalloc(&rand_state, num_pixels*sizeof(curandState));
|
||||||
cuErr = cudaMalloc(&idxbuffer, sizeof(U32)*num_pixels);
|
|
||||||
CUDA_CHECK(cuErr);
|
CUDA_CHECK(cuErr);
|
||||||
|
|
||||||
curandState *d_rand_state = 0;
|
|
||||||
cuErr = cudaMalloc(&d_rand_state, num_pixels*sizeof(curandState));
|
|
||||||
CUDA_CHECK(cuErr);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Initialise CUDA state such as random number states per thread.
|
// Initialise CUDA state such as random number states per thread.
|
||||||
// This is separate for performance measurements
|
// This is separate for performance measurements
|
||||||
// ------------
|
// ------------
|
||||||
cuda_init_state<<<blocks_per_grid, threads_per_block>>>(d_rand_state);
|
cuda_init_state<<<blocks_per_grid, threads_per_block>>>(rand_state);
|
||||||
cuErr = cudaGetLastError();
|
cuErr = cudaGetLastError();
|
||||||
CUDA_CHECK(cuErr);
|
CUDA_CHECK(cuErr);
|
||||||
cuErr = cudaDeviceSynchronize();
|
cuErr = cudaDeviceSynchronize();
|
||||||
@ -602,7 +664,7 @@ int main()
|
|||||||
LOG("threads per block: (%i, %i %i) \n",
|
LOG("threads per block: (%i, %i %i) \n",
|
||||||
threads_per_block.x, threads_per_block.y, threads_per_block.z);
|
threads_per_block.x, threads_per_block.y, threads_per_block.z);
|
||||||
|
|
||||||
cuda_main<<<blocks_per_grid, threads_per_block>>>(entities, pixel_buffer, idxbuffer);
|
cuda_main<<<blocks_per_grid, threads_per_block>>>(entities, pixel_buffer, rand_state);
|
||||||
cuErr = cudaGetLastError();
|
cuErr = cudaGetLastError();
|
||||||
CUDA_CHECK(cuErr);
|
CUDA_CHECK(cuErr);
|
||||||
cuErr = cudaDeviceSynchronize();
|
cuErr = cudaDeviceSynchronize();
|
||||||
@ -617,15 +679,14 @@ int main()
|
|||||||
cudaMemcpyDeviceToHost);
|
cudaMemcpyDeviceToHost);
|
||||||
CUDA_CHECK(cuErr);
|
CUDA_CHECK(cuErr);
|
||||||
|
|
||||||
// TODO(anton): remove debug buffer
|
write_buffer_to_ppm(h_pixel_buffer, h_image.width, h_image.height);
|
||||||
U32 *h_idxbuffer = (U32 *)malloc(num_pixels*sizeof(U32));
|
|
||||||
cuErr = cudaMemcpy(h_idxbuffer, idxbuffer, num_pixels*sizeof(U32),
|
|
||||||
cudaMemcpyDeviceToHost);
|
|
||||||
|
|
||||||
write_buffer_to_ppm(h_pixel_buffer, h_image.width, h_image.height, h_idxbuffer);
|
|
||||||
|
|
||||||
cuErr = cudaFree(pixel_buffer);
|
cuErr = cudaFree(pixel_buffer);
|
||||||
CUDA_CHECK(cuErr);
|
CUDA_CHECK(cuErr);
|
||||||
|
cuErr = cudaFree(entities);
|
||||||
|
CUDA_CHECK(cuErr);
|
||||||
|
cuErr = cudaFree(rand_state);
|
||||||
|
CUDA_CHECK(cuErr);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
BIN
timeBuild.ctm
BIN
timeBuild.ctm
Binary file not shown.
Loading…
Reference in New Issue
Block a user