From ae39bb793f473a5604e37bbe007c62a326517d21 Mon Sep 17 00:00:00 2001 From: Anton Ljungdahl Date: Mon, 4 Nov 2024 23:41:32 +0100 Subject: [PATCH] some refactoring and quicksort --- src/hf/bsplines_and_grid.c | 297 ++++++++++++ src/hf/bsplines_and_grid.h | 46 ++ src/hf/file_io.c | 63 +++ src/hf/file_io.h | 17 + src/hf/hf_base.c | 177 +++++++ src/hf/hf_base.h | 47 ++ src/hf/tests.c | 247 ++++++++++ src/main.c | 911 ++++--------------------------------- 8 files changed, 982 insertions(+), 823 deletions(-) create mode 100644 src/hf/bsplines_and_grid.c create mode 100644 src/hf/bsplines_and_grid.h create mode 100644 src/hf/file_io.c create mode 100644 src/hf/file_io.h create mode 100644 src/hf/hf_base.c create mode 100644 src/hf/hf_base.h create mode 100644 src/hf/tests.c diff --git a/src/hf/bsplines_and_grid.c b/src/hf/bsplines_and_grid.c new file mode 100644 index 0000000..b8e46f2 --- /dev/null +++ b/src/hf/bsplines_and_grid.c @@ -0,0 +1,297 @@ +global BSplineCtx g_bspline_ctx = {0}; +global Grid g_grid = {0}; +global U32 g_debug_bspline_matrix = 0; + + +function F64 +bspline_recursion(F64 x, U32 k, U32 i) +{ + F64 *t = g_bspline_ctx.knotpoints; + + if(k == 1) + { + if(i == g_bspline_ctx.num_bsplines-1 && x == g_grid.end) + { + // TODO(anton): + // This is like a hack to get the last bspline to be 1 at the last point. + // I dont get how the Cox-de Boor recursion formula can force the last bspline + // to unity at the last point, actually. I have to check this. + return 1.0; + } + else if(i < g_bspline_ctx.num_bsplines && (x >= t[i] && x < t[i+1])) + { + return 1.0; + } else { + return 0.0; + } + } + else + { + F64 recursion1 = bspline_recursion(x, k-1, i); + F64 term1_enum = (x - t[i]); + F64 term1_denom = (t[i+k-1] - t[i]); + F64 term1 = recursion1 > 0.0 ? (term1_enum/term1_denom)*recursion1 : 0.0; + + F64 recursion2 = bspline_recursion(x, k-1, i+1); + F64 term2_enum = (t[i+k] - x); + F64 term2_denom = (t[i+k] - t[i+1]); + F64 term2 = recursion2 > 0.0 ? (term2_enum/term2_denom)*recursion2 : 0.0; + + return term1 + term2; + } + + +} + + +function F64 +compute_bspline_F64(F64 x_coord, U32 index) +{ + U32 k = g_bspline_ctx.order; + F64 out = bspline_recursion(x_coord, k, index); + return out; +} + + +function F64 +compute_dBspline_F64(F64 x_coord, U32 index) +{ + U32 k = g_bspline_ctx.order; + F64 prefac = (F64)(k - 1); + F64 *t = g_bspline_ctx.knotpoints; + F64 term1_enum = bspline_recursion(x_coord, k-1, index); + F64 term2_enum = bspline_recursion(x_coord, k-1, index+1); + F64 term1_denom = t[index+k-1] - t[index]; + F64 term2_denom = t[index+k] - t[index+1]; + F64 term1 = term1_enum > 0.0 ? (term1_enum/term1_denom) : 0.0; + F64 term2 = term2_enum > 0.0 ? (term2_enum/term2_denom) : 0.0; + + return prefac*(term1-term2); +} + +function void +set_up_grid(Arena *arena) +{ + g_grid.start = GRID_START_POINT; + g_grid.end = GRID_END_POINT; + g_grid.num_steps = 100; + + g_grid.points = PushArray(arena, F64, g_grid.num_steps); + F64 step_size = (g_grid.end-g_grid.start)/(F64)g_grid.num_steps; + g_grid.points[0] = g_grid.start; + g_grid.points[g_grid.num_steps-1] = g_grid.end; + for(U32 i = 1; i < g_grid.num_steps-1; i++) + { + g_grid.points[i] = g_grid.points[i-1] + step_size; + } + +} + +function inline U32 +get_bspline_index_size(U32 size1, U32 i, U32 j) +{ + return size1 * j + i; +} + + +function inline U32 +get_bspline_index(U32 i, U32 j) +{ + return g_bspline_ctx.num_knotpoints * j + i; +} + +function inline U32 +get_bspline_grid_index(U32 i, U32 j) +{ + return g_grid.num_steps * j + i; +} + + +function void +set_up_bspline_context(Arena* arena) +{ + // Create knotpoint sequence. + U32 k = 4; + U32 bspl_N = 40; + g_bspline_ctx.order = k; + g_bspline_ctx.num_knotpoints = bspl_N; + g_bspline_ctx.num_bsplines = bspl_N-k; + g_bspline_ctx.num_phys_points = bspl_N-(2*k)+2; // Remove k points at each end, and then add back the first and last points. + g_bspline_ctx.arena = arena; + g_bspline_ctx.knotpoints = PushArray(arena, F64, g_bspline_ctx.num_knotpoints); + // Set up physical points; + F64 delta = (g_grid.end-g_grid.start)/(g_bspline_ctx.num_phys_points-1); + // Set ghost points including first physical + U32 phys_point_last_index = g_bspline_ctx.num_phys_points + k-1; + for(U32 i = 0; i < k; i++) + { + g_bspline_ctx.knotpoints[i] = g_grid.start; + } + for(U32 i = k; i < phys_point_last_index; i++) + { + g_bspline_ctx.knotpoints[i] = g_bspline_ctx.knotpoints[i-1] + delta; + } + // Set the last points + F64 last_physical = g_grid.end; + for(U32 i = phys_point_last_index; i < g_bspline_ctx.num_knotpoints; i++) + { + g_bspline_ctx.knotpoints[i] = last_physical; + } +} + +function void +write_bspline_matrix_F64(F64 *bsplines, U32 size1, U32 size2, String8 filename_path) +{ + ArenaTemp scratch = scratch_get(0, 0); + // First line is just the bspline indices. + String8List first_line_list = {0}; + StringJoin join = {0}; + join.sep = str8_lit("\t\t"); + for(U32 i = 0; i < size2; i++) + { + str8_list_pushf(scratch.arena, &first_line_list, " %i", i); + } + str8_list_push(scratch.arena, &first_line_list, str8_lit("\n")); + String8 first_line = str8_list_join(scratch.arena, first_line_list, &join); + + String8List bspline_array_list = {0}; + str8_list_push(scratch.arena, &bspline_array_list, first_line); + for(U32 i = 0; i < size1; i++) + { + String8List row = {0}; + for(U32 j = 0; j < size2; j++) + { + U32 index = get_bspline_index_size(size1, i, j); + F64 bspline_value = bsplines[index]; + if(g_debug_bspline_matrix && j == 0) + { + String8 out = str8_pushf(scratch.arena, "%i %i \t %13.6e\n", i, index, bspline_value); + LOG(out.str); + } + str8_list_pushf(scratch.arena, &row, "%13.6e", bspline_value); + } + StringJoin bspl_join = {0}; + bspl_join.sep = str8_lit(" "); + bspl_join.post = str8_lit("\n"); + String8 row_joined = str8_list_join(scratch.arena, row, &bspl_join); + str8_list_push(scratch.arena, &bspline_array_list, row_joined); + } + + write_string_list_to_file(scratch.arena, filename_path, &bspline_array_list); + + + scratch_release(scratch); +} + +function void +set_up_bsplines_at_points_and_write_matrix_F64(Arena *arena) +{ + + U64 num_bsplines = g_bspline_ctx.num_bsplines; + U64 k = g_bspline_ctx.order; + F64 *t = g_bspline_ctx.knotpoints; + U32 num_grid_points = g_grid.num_steps; + U32 num_knotpoints = g_bspline_ctx.num_knotpoints; + + // For sanity check we make the first 4 bsplines by hand. + { + F64 *bspl0 = PushArray(arena, F64, num_grid_points); + F64 *bspl1 = PushArray(arena, F64, num_grid_points); + F64 *bspl2 = PushArray(arena, F64, num_grid_points); + F64 *bspl3 = PushArray(arena, F64, num_grid_points); + F64 *bspl9 = PushArray(arena, F64, num_grid_points); + F64 *dBspl0 = PushArray(arena, F64, num_grid_points); + F64 *dBspl1 = PushArray(arena, F64, num_grid_points); + F64 *dBspl2 = PushArray(arena, F64, num_grid_points); + F64 *dBspl3 = PushArray(arena, F64, num_grid_points); + F64 *dBspl9 = PushArray(arena, F64, num_grid_points); + for(U32 i = 0; i < num_grid_points; i++) + { + F64 x = g_grid.points[i]; + bspl0[i] = compute_bspline_F64(x, 0); + bspl1[i] = compute_bspline_F64(x, 1); + bspl2[i] = compute_bspline_F64(x, 2); + bspl3[i] = compute_bspline_F64(x, 3); + bspl9[i] = compute_bspline_F64(x, 9); + dBspl0[i] = compute_dBspline_F64(x, 0); + dBspl1[i] = compute_dBspline_F64(x, 1); + dBspl2[i] = compute_dBspline_F64(x, 2); + dBspl3[i] = compute_dBspline_F64(x, 3); + dBspl9[i] = compute_dBspline_F64(x, 9); + } + + F64 test = compute_bspline_F64(g_grid.points[num_grid_points-1], 9); + + write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\bspline0.dat"), bspl0, num_grid_points, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\bspline1.dat"), bspl1, num_grid_points, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\bspline2.dat"), bspl2, num_grid_points, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\bspline3.dat"), bspl3, num_grid_points, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\bspline9.dat"), bspl9, num_grid_points, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\dBspline0.dat"), dBspl0, num_grid_points, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\dBspline1.dat"), dBspl1, num_grid_points, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\dBspline2.dat"), dBspl2, num_grid_points, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\dBspline3.dat"), dBspl3, num_grid_points, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\dBspline9.dat"), dBspl9, num_grid_points, "%13.6e\n"); + } + + { + ArenaTemp scratch = scratch_get(0,0); + + g_bspline_ctx.bsplines_grid = PushArray(arena, F64, num_grid_points*num_bsplines); + g_bspline_ctx.dBsplines_grid = PushArray(arena, F64, num_grid_points*num_bsplines); + + for(U32 j = 0; j < num_bsplines; j++) + { + for(U32 i = 0; i < num_grid_points; i++) + { + U32 index = get_bspline_grid_index(i, j); + F64 bspline_value = compute_bspline_F64(g_grid.points[i], j); + F64 dBspline_value = compute_dBspline_F64(g_grid.points[i], j); + g_bspline_ctx.bsplines_grid[index] = bspline_value; + g_bspline_ctx.dBsplines_grid[index] = dBspline_value; + if(g_debug_bspline_matrix && j == 0) + { + String8 out = str8_pushf(scratch.arena, "%i %i \t %13.6e\n", i, index, bspline_value); + LOG(out.str); + } + + } + } + scratch_release(scratch); + } + + g_bspline_ctx.bsplines = PushArray(arena, F64, num_knotpoints*num_bsplines); + g_bspline_ctx.dBsplines = PushArray(arena, F64, num_knotpoints*num_bsplines); + for(U32 i = 0; i < num_knotpoints; i++) + { + for(U32 j = 0; j < num_bsplines; j++) + { + U32 index = get_bspline_index(i, j); + g_bspline_ctx.bsplines[index] = compute_bspline_F64(t[i], j); + g_bspline_ctx.bsplines[index] = compute_dBspline_F64(t[i], j); + } + } + + write_bspline_matrix_F64(g_bspline_ctx.bsplines_grid, + num_grid_points, + num_bsplines, + str8_lit(bspline_grid_array_file_path)); + + write_bspline_matrix_F64(g_bspline_ctx.dBsplines_grid, + num_grid_points, + num_bsplines, + str8_lit(dBspline_grid_array_file_path)); + + write_bspline_matrix_F64(g_bspline_ctx.bsplines, + num_knotpoints, + num_bsplines, + str8_lit(bspline_knots_array_file_path)); + + + write_bspline_matrix_F64(g_bspline_ctx.dBsplines, + num_knotpoints, + num_bsplines, + str8_lit(dBspline_knots_array_file_path)); + +} + diff --git a/src/hf/bsplines_and_grid.h b/src/hf/bsplines_and_grid.h new file mode 100644 index 0000000..f92da1d --- /dev/null +++ b/src/hf/bsplines_and_grid.h @@ -0,0 +1,46 @@ +#ifndef BSPLINES_AND_GRID_H +#define BSPLINES_AND_GRID_H + +// coordinates in Bohr radii +#define GRID_START_POINT 0.0 +#define GRID_END_POINT 10.0 + +typedef struct Grid Grid; +struct Grid +{ + F64 start; + F64 end; + U32 num_steps; + F64 *points; +}; + +typedef struct BSplineCtx BSplineCtx; +struct BSplineCtx +{ + Arena* arena; + U32 order; + F64 *knotpoints; + U32 num_knotpoints; + U32 num_bsplines; + U32 num_phys_points; + F64 *bsplines_grid; // In grid points + F64 *dBsplines_grid; // First deriv in grid points + F64 *bsplines; // In knotpoints + F64 *dBsplines; // First deriv in knotpoints +}; + + + +function void set_up_grid(Arena *arena); +//~ Bspline functions +function F64 bspline_recursion(F64 x, U32 k, U32 i); +function F64 compute_bspline_F64(F64 x_coord, U32 index); +function F64 compute_dBspline_F64(F64 x_coord, U32 index); +function inline U32 get_bspline_index_size(U32 size1, U32 i, U32 j); +function inline U32 get_bspline_index(U32 i, U32 j); +function inline U32 get_bspline_grid_index(U32 i, U32 j); +function void set_up_bspline_context(Arena* arena); +function void write_bspline_matrix_F64(F64 *bsplines, U32 size1, U32 size2, String8 filename_path); +function void set_up_bsplines_at_points_and_write_matrix_F64(Arena *arena); + +#endif /* BSPLINES_AND_GRID_H */ diff --git a/src/hf/file_io.c b/src/hf/file_io.c new file mode 100644 index 0000000..a4997e0 --- /dev/null +++ b/src/hf/file_io.c @@ -0,0 +1,63 @@ + + +function void +write_array_binary_F64(String8 path_to_file, F64 *values, U32 array_size) +{ + OS_Handle file_handle = OS_file_open(OS_AccessFlag_Write | OS_AccessFlag_CreateNew, + path_to_file); + { + ArenaTemp scratch = scratch_get(0, 0); + String8List list = {0}; + String8 temp = {0}; + temp.str = (U8*)values; + temp.size = sizeof(F64)*array_size; + str8_list_push(scratch.arena, &list, temp); + OS_file_write(scratch.arena, file_handle, 0, list, 0); + + String8List log_list = {0}; + str8_list_push(scratch.arena, &log_list, str8_lit("Wrote binary array data to")); + str8_list_push(scratch.arena, &log_list, path_to_file); + StringJoin join = {0}; + join.sep = str8_lit(" "); + join.post = str8_lit("\n"); + String8 log_msg = str8_list_join(scratch.arena, log_list, &join); + LOG(log_msg.str); + } + OS_file_close(file_handle); +} + +function void +write_string_list_to_file(Arena *arena, String8 path, String8List *list) +{ + + OS_Handle file_handle = OS_file_open(OS_AccessFlag_Write | OS_AccessFlag_CreateNew, + path); + OS_file_write(arena, file_handle, 0, *list, 0); + + U32 debug = 1; + if(debug) + { + String8List log_list = {0}; + str8_list_push(arena, &log_list, str8_lit("Wrote array to")); + str8_list_push(arena, &log_list, path); + StringJoin join = {0}; + join.sep = str8_lit(" "); + join.post = str8_lit("\n"); + String8 log_msg = str8_list_join(arena, log_list, &join); + LOG(log_msg.str); + } + OS_file_close(file_handle); +} + + +function void +write_array_F64(String8 path_to_file, F64 *values, U32 array_size, char* fmt) +{ + ArenaTemp scratch = scratch_get(0, 0); + String8List list = {0}; + for(U32 i = 0; i < array_size; i++) + { + str8_list_pushf(scratch.arena, &list, fmt, values[i]); + } + write_string_list_to_file(scratch.arena, path_to_file, &list); +} diff --git a/src/hf/file_io.h b/src/hf/file_io.h new file mode 100644 index 0000000..0310599 --- /dev/null +++ b/src/hf/file_io.h @@ -0,0 +1,17 @@ +#ifndef FILE_IO_H +#define FILE_IO_H + +#define grid_file_path_bin "D:\\dev\\hf_again\\out\\grid.bin" +#define grid_file_path "D:\\dev\\hf_again\\out\\grid.dat" +#define knotpoints_file_path "D:\\dev\\hf_again\\out\\knotpoints.dat" +#define bspline_grid_array_file_path "D:\\dev\\hf_again\\out\\bsplines_grid.dat" +#define dBspline_grid_array_file_path "D:\\dev\\hf_again\\out\\dBsplines_grid.dat" +#define bspline_knots_array_file_path "D:\\dev\\hf_again\\out\\bsplines_knots.dat" +#define dBspline_knots_array_file_path "D:\\dev\\hf_again\\out\\dBsplines_knots.dat" + + +function void write_string_list_to_file(Arena *arena, String8 path, String8List *list); +function void write_array_F64(String8 path_to_file, F64 *values, U32 array_size, char* fmt); +function void write_array_binary_F64(String8 path_to_file, F64 *values, U32 array_size); + +#endif /* FILE_IO_H */ diff --git a/src/hf/hf_base.c b/src/hf/hf_base.c new file mode 100644 index 0000000..618b29f --- /dev/null +++ b/src/hf/hf_base.c @@ -0,0 +1,177 @@ + +//~ Globals +global GaussLegendre g_gauss_legendre = {0}; + + +function Mat_F64 +mat_F64(U32 size1, U32 size2) +{ + Mat_F64 out = {0}; + out.size1 = size1; + out.size2 = size2; + out.matrix = (F64 **)malloc(size1 * sizeof(F64 *)); + U64 data_byte_size = size1 * size2 * sizeof(F64); + out.data = (F64 *)malloc(data_byte_size); + MemoryZero(out.data, data_byte_size); + + for(U32 i = 0; i < size1; i++) + { + out.matrix[i] = &out.data[i * size2]; + } + + return out; +} + +function void +mat_F64_free(Mat_F64 *mat) +{ + free(mat->matrix); + free(mat->data); + mat->size1 = 0; + mat->size2 = 0; + mat->matrix = 0; + mat->data = 0; +} + + +function inline void +mat_F64_set(Mat_F64 *mat, U32 i, U32 j, F64 val) +{ + U32 row = i < mat->size1 ? i : mat->size1-1; + U32 col = j < mat->size2 ? j : mat->size2-1; + mat->matrix[row][col] = val; + return; +} + +function inline F64 +mat_F64_get(Mat_F64 *mat, U32 i, U32 j) +{ + U32 row = i < mat->size1 ? i : mat->size1-1; + U32 col = j < mat->size2 ? j : mat->size2-1; + return mat->matrix[row][col]; +} + + +function Mat_F64 +mat_F64_copy(Mat_F64 *src) +{ + Mat_F64 out = mat_F64(src->size1, src->size2); + U32 data_byte_size = src->size1 * src->size2 * sizeof(F64); + MemoryCopy(out.data, src->data, data_byte_size); + return out; +} + + +function void +set_up_gauss_legendre_points(Arena* arena) +{ + U32 GL_order = 7; + + if(GL_order == 7) + { + F64 quadrature_weights[7] = { + 0.41795918367346938, + 0.38183005050511894, + 0.38183005050511894, + 0.27970539148927666, + 0.27970539148927666, + 0.12948496616886969, + 0.12948496616886969 + }; + + F64 legendre_abscissae[7] = { + 0, + 0.405845151377397, + -0.40584515137739, + -0.74153118559939, + 0.741531185599394, + -0.94910791234275, + 0.949107912342758 + }; + + g_gauss_legendre.order = GL_order; + g_gauss_legendre.weights = PushArray(arena, F64, GL_order); + g_gauss_legendre.abscissae = PushArray(arena, F64, GL_order); + for(U32 i = 0; i < GL_order; i++) + { + g_gauss_legendre.weights[i] = quadrature_weights[i]; + g_gauss_legendre.abscissae[i] = legendre_abscissae[i]; + } + + } + +} + + +/* Auxiliary routine: printing a matrix */ +function void +print_matrix_Z64( char* desc, int m, int n, Z64* a, int lda ) +{ + ArenaTemp scratch = scratch_get(0,0); + int i, j; + String8 newline = str8_lit("\n"); + String8 header = str8_pushf(scratch.arena, "\n %s\n", desc ); + LOG(header.str); + //printf("\n %s \n", desc); + for( i = 0; i < m; i++ ) { + for( j = 0; j < n; j++ ) { + String8 outstr = str8_pushf(scratch.arena, " (%6.2f,%6.2f)", a[i+j*lda].re, a[i+j*lda].im ); + LOG(outstr.str); + //printf(" (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag ); + } + LOG(newline.str); + //printf("\n"); + } + + scratch_release(scratch); +} + + +/* Auxiliary routine: printing a matrix */ +function void +print_matrix_F64( char* desc, int m, int n, F64* a, int lda ) +{ + ArenaTemp scratch = scratch_get(0,0); + int i, j; + String8 newline = str8_lit("\n"); + String8 header = str8_pushf(scratch.arena, "\n %s\n", desc ); + LOG(header.str); + //printf("\n %s \n", desc); + for( i = 0; i < m; i++ ) { + for( j = 0; j < n; j++ ) { + String8 outstr = str8_pushf(scratch.arena, " %6.2f", a[i+j*lda], a[i+j*lda]); + LOG(outstr.str); + //printf(" (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag ); + } + LOG(newline.str); + //printf("\n"); + } + + scratch_release(scratch); +} + +function void +print_mat_F64(Mat_F64 *mat) +{ + + ArenaTemp scratch = scratch_get(0,0); + + for(U32 i = 0; i < mat->size1-1; i++) + { + for(U32 j = 0; j < mat->size2-1; j++) + { + F64 val = mat_F64_get(mat, i, j); + //if(val < 1e-15) + //{ + //val = 0.0; + //} + String8 out_str = str8_pushf(scratch.arena, " %.4e", val); + LOG(out_str.str); + } + LOG("\n"); + } + + scratch_release(scratch); + +} + diff --git a/src/hf/hf_base.h b/src/hf/hf_base.h new file mode 100644 index 0000000..7096a98 --- /dev/null +++ b/src/hf/hf_base.h @@ -0,0 +1,47 @@ +#ifndef HF_BASE_H +#define HF_BASE_H + + +// Complex number with double precision +typedef struct Z64 Z64; +struct Z64 +{ + F64 re; + F64 im; +}; + +typedef struct Mat_F64 Mat_F64; +struct Mat_F64 +{ + U32 size1; + U32 size2; + F64 **matrix; + F64 *data; +}; + +typedef struct GaussLegendre GaussLegendre; +struct GaussLegendre +{ + U32 order; + F64 *weights; + F64 *abscissae; +}; + +//~ Base math and utility functions + +// Mat_F64 functions +function Mat_F64 mat_F64(U32 size1, U32 size2); +function void mat_F64_free(Mat_F64 *mat); +function inline void mat_F64_set(Mat_F64 *mat, U32 i, U32 j, F64 val); +function inline F64 mat_F64_get(Mat_F64 *mat, U32 i, U32 j); +function Mat_F64 mat_F64_copy(Mat_F64 *src); +function void print_mat_F64(Mat_F64 *mat); + +// Gauss-Legendre +function void set_up_gauss_legendre_points(Arena* arena); + +// Random utility +function void print_matrix_Z64( char* desc, int m, int n, Z64* a, int lda ); +function void print_matrix_F64( char* desc, int m, int n, F64* a, int lda ); + +#endif /* HF_BASE_H */ diff --git a/src/hf/tests.c b/src/hf/tests.c new file mode 100644 index 0000000..f034359 --- /dev/null +++ b/src/hf/tests.c @@ -0,0 +1,247 @@ + + +typedef F64 (*func_F64)(F64); + +function F64 +cos2(F64 x) +{ + return cos(x)*cos(x); +} + +function F64 +gq_integration(F64 a, F64 b, func_F64 func) +{ + + F64 prefac = 0.5*(b-a); + F64 gq_sum = 0.0; + + for(U32 i = 0; i < g_gauss_legendre.order; i++) + { + F64 w = g_gauss_legendre.weights[i]; + F64 z = g_gauss_legendre.abscissae[i]; + F64 shift = (z*prefac)+((a+b)*0.5); + F64 funct_value_at_shift = func(shift); + gq_sum = gq_sum + prefac*w*funct_value_at_shift; + } + + return gq_sum; + +} + + function void +test_gauss_legendre() +{ + + // Test GL qudrature by integrating cos^2 from 0 to 2pi, it should equal pi. + { + ArenaTemp scratch = scratch_get(0,0); + + F64 pi = 4.0*atan(1.0); + String8 pi_out = str8_pushf(scratch.arena, "Pi from standard library 4.0*atan(1.0) = %.16f \n", pi); + LOG(pi_out.str); + + F64 a = 0.0; + F64 b = 2.0*pi; + F64 gq_sum_single_interval = gq_integration(a, b, cos2); + + String8 out = str8_pushf(scratch.arena, "Integration result for a single interval: %.16f, Reference: %.16f \n", gq_sum_single_interval, pi); + LOG(out.str); + + // Test with several smaller intervals instead. + F64 aa[10] = {0.0}; + F64 bb[10] = {0.0}; + F64 delta = b/10.0; + for(U32 i = 0; i < 10; i++) + { + aa[i] = i*delta; + bb[i] = (i+1)*delta; + String8 intervals = str8_pushf(scratch.arena, "%i, a=%f, b=%f \n", i, aa[i], bb[i]); + LOG(intervals.str); + } + + F64 gq_sum_several_intervals = 0.0; + for(U32 i = 0; i < 10; i++) + { + gq_sum_several_intervals += gq_integration(aa[i], bb[i], cos2); + + } + + out = str8_pushf(scratch.arena, "Integration result for ten intervals: %.16f, Reference: %.16f \n", gq_sum_several_intervals, pi); + LOG(out.str); + + + scratch_release(scratch); + } +} + + + + +function void mkl_things(void) +{ + + OS_InitReceipt os_receipt = OS_init(); + OS_InitGfxReceipt os_gfx_receipt = OS_gfx_init(os_receipt); + + Arena *arena = m_make_arena(); + + U32 N = 4; + Z64 *main_A = PushArray(arena, Z64, N*N); + main_A[0] = (Z64){-3.84, 2.25}; + main_A[1] = (Z64){-0.66, 0.83}; + main_A[2] = (Z64){-3.99, -4.73}; + main_A[3] = (Z64){ 7.74, 4.18}; + main_A[4] = (Z64){-8.94, -4.75}; + main_A[5] = (Z64){-4.40, -3.82}; + main_A[6] = (Z64){-5.88, -6.60}; + main_A[7] = (Z64){ 3.66, -7.53}; + main_A[8] = (Z64){ 8.95, -6.53}; + main_A[9] = (Z64){-3.50, -4.26}; + main_A[10] = (Z64){-3.36, -0.40}; + main_A[11] = (Z64){ 2.58, 3.60}; + main_A[12] = (Z64){-9.87, 4.82}; + main_A[13] = (Z64){-3.15, 7.36}; + main_A[14] = (Z64){-0.75, 5.23}; + main_A[15] = (Z64){ 4.59, 5.41}; + + LOG("\n\n---- Calling Intel MKL zgeev test (Using Z64 instead of MKL_Complex16 etc) ---- \n\n"); + + { + S32 n = N, lda = N, ldvl = N, ldvr = N, info, lwork; + Z64 wkopt; + Z64 *work; + + F64 *rwork = PushArray(arena, F64, 2*N); + Z64 *w = PushArray(arena, Z64, N); + Z64 *vl = PushArray(arena, Z64, N*N); + Z64 *vr = PushArray(arena, Z64, N*N); + Z64 *a = PushArray(arena, Z64, N*N); + for(U32 j = 0; j < N; j++) + { + for(U32 i = 0; i < N; i++) + { + U32 index = i*N+j; + + a[index] = main_A[index]; + } + + } + + + /* Executable statements */ + LOG( " ZGEEV Example Program Results\n" ); + /* Query and allocate the optimal workspace */ + lwork = -1; + zgeev( "Vectors", "Vectors", &n, a, &lda, w, vl, &ldvl, vr, &ldvr, + &wkopt, &lwork, rwork, &info ); + lwork = (S32)wkopt.re; + work = (Z64*)malloc( lwork*sizeof(Z64) ); + /* Solve eigenproblem */ + zgeev( "Vectors", "Vectors", &n, a, &lda, w, vl, &ldvl, vr, &ldvr, + work, &lwork, rwork, &info ); + /* Check for convergence */ + if( info > 0 ) { + LOG( "The algorithm failed to compute eigenvalues.\n" ); + exit( 1 ); + } + /* Print eigenvalues */ + print_matrix_Z64( "Eigenvalues", 1, n, w, 1 ); + /* Print left eigenvectors */ + print_matrix_Z64( "Left eigenvectors", n, n, vl, ldvl ); + /* Print right eigenvectors */ + print_matrix_Z64( "Right eigenvectors", n, n, vr, ldvr ); + /* Free workspace */ + free( (void*)work ); + } /* End of ZGEEV Example */ + + + + LOG("\n\n--- End of EntryPoint, exiting program. \n\n"); +} + + + function void +test_matrix() +{ + + Mat_F64 test_mat = mat_F64(5, 5); + + { + ArenaTemp scratch = scratch_get(0,0); + + for(U32 i = 0; i < test_mat.size1; i++) + { + for(U32 j = 0; j < test_mat.size2; j++) + { + F64 val = i * test_mat.size2 + j; + mat_F64_set(&test_mat, i, j, val); + } + } + + for(U32 i = 0; i < test_mat.size1; i++) + { + for(U32 j = 0; j < test_mat.size2; j++) + { + F64 val = mat_F64_get(&test_mat, i, j); + String8 out_str = str8_pushf(scratch.arena, " %2.2f", val); + LOG(out_str.str); + } + LOG("\n"); + } + + scratch_release(scratch); + } +} + + + function void +testing_MKL() +{ + + test_mkl_zgeev(); + test_mkl_dsyevd(); + +} + +/* function void */ +/* dgeev_example() */ +/* { */ +/* /1* Locals *1/ */ +/* MKL_INT n = N, lda = LDA, ldvl = LDVL, ldvr = LDVR, info, lwork; */ +/* double wkopt; */ +/* double* work; */ +/* /1* Local arrays *1/ */ +/* double wr[N], wi[N], vl[LDVL*N], vr[LDVR*N]; */ +/* double a[LDA*N] = { */ +/* -1.01, 3.98, 3.30, 4.43, 7.31, */ +/* 0.86, 0.53, 8.26, 4.96, -6.43, */ +/* -4.60, -7.04, -3.89, -7.66, -6.16, */ +/* 3.31, 5.29, 8.20, -7.33, 2.47, */ +/* -4.81, 3.55, -1.51, 6.18, 5.58 */ +/* }; */ +/* /1* Executable statements *1/ */ +/* printf( " DGEEV Example Program Results\n" ); */ +/* /1* Query and allocate the optimal workspace *1/ */ +/* lwork = -1; */ +/* dgeev( "Vectors", "Vectors", &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, */ +/* &wkopt, &lwork, &info ); */ +/* lwork = (MKL_INT)wkopt; */ +/* work = (double*)malloc( lwork*sizeof(double) ); */ +/* /1* Solve eigenproblem *1/ */ +/* dgeev( "Vectors", "Vectors", &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, */ +/* work, &lwork, &info ); */ +/* /1* Check for convergence *1/ */ +/* if( info > 0 ) { */ +/* printf( "The algorithm failed to compute eigenvalues.\n" ); */ +/* exit( 1 ); */ +/* } */ +/* /1* Print eigenvalues *1/ */ +/* print_eigenvalues( "Eigenvalues", n, wr, wi ); */ +/* /1* Print left eigenvectors *1/ */ +/* print_eigenvectors( "Left eigenvectors", n, wi, vl, ldvl ); */ +/* /1* Print right eigenvectors *1/ */ +/* print_eigenvectors( "Right eigenvectors", n, wi, vr, ldvr ); */ +/* /1* Free workspace *1/ */ +/* free( (void*)work ); */ + +/* } */ diff --git a/src/main.c b/src/main.c index d1eef77..4ff75c1 100644 --- a/src/main.c +++ b/src/main.c @@ -1,5 +1,3 @@ -#include -#include #define ENABLE_LOGGING 1 #if ENABLE_LOGGING @@ -10,70 +8,28 @@ // --- // Header includes +#include +#include +#include #include "base/base_inc.h" #include "os/os_inc.h" +#include "hf/file_io.h" +#include "hf/hf_base.h" +#include "hf/bsplines_and_grid.h" + // --- // .C includes #include "base/base_inc.c" #include "os/os_inc.c" #include "os/os_entry_point.c" -#include +#include "hf/file_io.c" +#include "hf/hf_base.c" +#include "hf/bsplines_and_grid.c" -#define grid_file_path_bin "D:\\dev\\hf_again\\out\\grid.bin" -#define grid_file_path "D:\\dev\\hf_again\\out\\grid.dat" -#define knotpoints_file_path "D:\\dev\\hf_again\\out\\knotpoints.dat" -#define bspline_grid_array_file_path "D:\\dev\\hf_again\\out\\bsplines_grid.dat" -#define dBspline_grid_array_file_path "D:\\dev\\hf_again\\out\\dBsplines_grid.dat" -#define bspline_knots_array_file_path "D:\\dev\\hf_again\\out\\bsplines_knots.dat" -#define dBspline_knots_array_file_path "D:\\dev\\hf_again\\out\\dBsplines_knots.dat" +#include "hf/tests.c" -// coordinates in Bohr radii -#define GRID_START_POINT 0.0 -#define GRID_END_POINT 10.0 - -// Complex number with double precision -typedef struct Z64 Z64; -struct Z64 -{ - F64 re; - F64 im; -}; - -typedef struct Mat_F64 Mat_F64; -struct Mat_F64 -{ - U32 size1; - U32 size2; - F64 **matrix; - F64 *data; -}; - -typedef struct Grid Grid; -struct Grid -{ - F64 start; - F64 end; - U32 num_steps; - F64 *points; -}; - - -typedef struct BSplineCtx BSplineCtx; -struct BSplineCtx -{ - Arena* arena; - U32 order; - F64 *knotpoints; - U32 num_knotpoints; - U32 num_bsplines; - U32 num_phys_points; - F64 *bsplines_grid; // In grid points - F64 *dBsplines_grid; // First deriv in grid points - F64 *bsplines; // In knotpoints - F64 *dBsplines; // First deriv in knotpoints -}; typedef struct Orbital Orbital; struct Orbital @@ -91,524 +47,91 @@ struct Atom Orbital *orbitals; }; - -typedef struct GaussLegendre GaussLegendre; -struct GaussLegendre +typedef struct SortPair_F64 SortPair_F64; +struct SortPair_F64 { - U32 order; - F64 *weights; - F64 *abscissae; + F64 value; + U64 original_index; }; -//~ Globals -global Grid g_grid = {0}; -global BSplineCtx g_bspline_ctx = {0}; -global GaussLegendre g_gauss_legendre = {0}; global Arena* g_base_arena = 0; -global U32 g_debug_bspline_matrix = 0; -function Mat_F64 -mat_F64(U32 size1, U32 size2) +U64 qsort_partition_F64(SortPair_F64 *array, U64 size, U64 low, U64 high) { - Mat_F64 out = {0}; - out.size1 = size1; - out.size2 = size2; - out.matrix = (F64 **)malloc(size1 * sizeof(F64 *)); - U64 data_byte_size = size1 * size2 * sizeof(F64); - out.data = (F64 *)malloc(data_byte_size); - MemoryZero(out.data, data_byte_size); - - for(U32 i = 0; i < size1; i++) + F64 pivot = array[high].value; + U64 i = low - 1; + + SortPair_F64 temp = {0}; + for (U64 j = low; j < high; j++) { - out.matrix[i] = &out.data[i * size2]; - } - - return out; -} - -function void -mat_F64_free(Mat_F64 *mat) -{ - free(mat->matrix); - free(mat->data); - mat->size1 = 0; - mat->size2 = 0; - mat->matrix = 0; - mat->data = 0; -} - - -function inline void -mat_F64_set(Mat_F64 *mat, U32 i, U32 j, F64 val) -{ - U32 row = i < mat->size1 ? i : mat->size1-1; - U32 col = j < mat->size2 ? j : mat->size2-1; - mat->matrix[row][col] = val; - return; -} - -function inline F64 -mat_F64_get(Mat_F64 *mat, U32 i, U32 j) -{ - U32 row = i < mat->size1 ? i : mat->size1-1; - U32 col = j < mat->size2 ? j : mat->size2-1; - return mat->matrix[row][col]; -} - - -function Mat_F64 -mat_F64_copy(Mat_F64 *src) -{ - Mat_F64 out = mat_F64(src->size1, src->size2); - U32 data_byte_size = src->size1 * src->size2 * sizeof(F64); - MemoryCopy(out.data, src->data, data_byte_size); - return out; -} - - -function void -set_up_gauss_legendre_points(Arena* arena) -{ - U32 GL_order = 7; - - if(GL_order == 7) - { - F64 quadrature_weights[7] = { - 0.41795918367346938, - 0.38183005050511894, - 0.38183005050511894, - 0.27970539148927666, - 0.27970539148927666, - 0.12948496616886969, - 0.12948496616886969 - }; - - F64 legendre_abscissae[7] = { - 0, - 0.405845151377397, - -0.40584515137739, - -0.74153118559939, - 0.741531185599394, - -0.94910791234275, - 0.949107912342758 - }; - - g_gauss_legendre.order = GL_order; - g_gauss_legendre.weights = PushArray(arena, F64, GL_order); - g_gauss_legendre.abscissae = PushArray(arena, F64, GL_order); - for(U32 i = 0; i < GL_order; i++) + if(array[j].value <= pivot) { - g_gauss_legendre.weights[i] = quadrature_weights[i]; - g_gauss_legendre.abscissae[i] = legendre_abscissae[i]; + i += 1; + temp = array[i]; + array[i] = array[j]; + array[j] = temp; } - } + U64 final_pivot_pos = i + 1; + temp = array[final_pivot_pos]; + array[final_pivot_pos] = array[high]; + array[high] = temp; + + return final_pivot_pos; } - -function void -write_array_binary_F64(String8 path_to_file, F64 *values, U32 array_size) +function void qsort_F64(SortPair_F64 *array, U64 size, U64 low, U64 high) { - OS_Handle file_handle = OS_file_open(OS_AccessFlag_Write | OS_AccessFlag_CreateNew, - path_to_file); - { - ArenaTemp scratch = scratch_get(0, 0); - String8List list = {0}; - String8 temp = {0}; - temp.str = (U8*)values; - temp.size = sizeof(F64)*array_size; - str8_list_push(scratch.arena, &list, temp); - OS_file_write(scratch.arena, file_handle, 0, list, 0); - - String8List log_list = {0}; - str8_list_push(scratch.arena, &log_list, str8_lit("Wrote binary array data to")); - str8_list_push(scratch.arena, &log_list, path_to_file); - StringJoin join = {0}; - join.sep = str8_lit(" "); - join.post = str8_lit("\n"); - String8 log_msg = str8_list_join(scratch.arena, log_list, &join); - LOG(log_msg.str); - } - OS_file_close(file_handle); -} - -function void -write_string_list_to_file(Arena *arena, String8 path, String8List *list) -{ - - OS_Handle file_handle = OS_file_open(OS_AccessFlag_Write | OS_AccessFlag_CreateNew, - path); - OS_file_write(arena, file_handle, 0, *list, 0); - - U32 debug = 1; - if(debug) + if(low < high) { - String8List log_list = {0}; - str8_list_push(arena, &log_list, str8_lit("Wrote array to")); - str8_list_push(arena, &log_list, path); - StringJoin join = {0}; - join.sep = str8_lit(" "); - join.post = str8_lit("\n"); - String8 log_msg = str8_list_join(arena, log_list, &join); - LOG(log_msg.str); - } - OS_file_close(file_handle); -} - - -function void -write_array_F64(String8 path_to_file, F64 *values, U32 array_size, char* fmt) -{ - ArenaTemp scratch = scratch_get(0, 0); - String8List list = {0}; - for(U32 i = 0; i < array_size; i++) - { - str8_list_pushf(scratch.arena, &list, fmt, values[i]); - } - write_string_list_to_file(scratch.arena, path_to_file, &list); -} - -function F64 -bspline_recursion(F64 x, U32 k, U32 i) -{ - F64 *t = g_bspline_ctx.knotpoints; - - if(k == 1) - { - if(i == g_bspline_ctx.num_bsplines-1 && x == g_grid.end) - { - // TODO(anton): - // This is like a hack to get the last bspline to be 1 at the last point. - // I dont get how the Cox-de Boor recursion formula can force the last bspline - // to unity at the last point, actually. I have to check this. - return 1.0; - } - else if(i < g_bspline_ctx.num_bsplines && (x >= t[i] && x < t[i+1])) - { - return 1.0; - } else { - return 0.0; - } - } - else - { - F64 recursion1 = bspline_recursion(x, k-1, i); - F64 term1_enum = (x - t[i]); - F64 term1_denom = (t[i+k-1] - t[i]); - F64 term1 = recursion1 > 0.0 ? (term1_enum/term1_denom)*recursion1 : 0.0; - - F64 recursion2 = bspline_recursion(x, k-1, i+1); - F64 term2_enum = (t[i+k] - x); - F64 term2_denom = (t[i+k] - t[i+1]); - F64 term2 = recursion2 > 0.0 ? (term2_enum/term2_denom)*recursion2 : 0.0; - - return term1 + term2; + U64 pivot_index = qsort_partition_F64(array, size, low, high); + + qsort_F64(array, size, low, pivot_index - 1); + qsort_F64(array, size, pivot_index + 1, high); } - -} - - -function F64 -compute_bspline_F64(F64 x_coord, U32 index) -{ - U32 k = g_bspline_ctx.order; - F64 out = bspline_recursion(x_coord, k, index); - return out; -} - - -function F64 -compute_dBspline_F64(F64 x_coord, U32 index) -{ - U32 k = g_bspline_ctx.order; - F64 prefac = (F64)(k - 1); - F64 *t = g_bspline_ctx.knotpoints; - F64 term1_enum = bspline_recursion(x_coord, k-1, index); - F64 term2_enum = bspline_recursion(x_coord, k-1, index+1); - F64 term1_denom = t[index+k-1] - t[index]; - F64 term2_denom = t[index+k] - t[index+1]; - F64 term1 = term1_enum > 0.0 ? (term1_enum/term1_denom) : 0.0; - F64 term2 = term2_enum > 0.0 ? (term2_enum/term2_denom) : 0.0; - - return prefac*(term1-term2); -} - -function void -set_up_grid(Arena *arena) -{ - g_grid.start = GRID_START_POINT; - g_grid.end = GRID_END_POINT; - g_grid.num_steps = 100; - - g_grid.points = PushArray(arena, F64, g_grid.num_steps); - F64 step_size = (g_grid.end-g_grid.start)/(F64)g_grid.num_steps; - g_grid.points[0] = g_grid.start; - g_grid.points[g_grid.num_steps-1] = g_grid.end; - for(U32 i = 1; i < g_grid.num_steps-1; i++) - { - g_grid.points[i] = g_grid.points[i-1] + step_size; - } - -} - -function inline U32 -get_bspline_index_size(U32 size1, U32 i, U32 j) -{ - return size1 * j + i; -} - - -function inline U32 -get_bspline_index(U32 i, U32 j) -{ - return g_bspline_ctx.num_knotpoints * j + i; -} - -function inline U32 -get_bspline_grid_index(U32 i, U32 j) -{ - return g_grid.num_steps * j + i; -} - - -function void -set_up_bspline_context(Arena* arena) -{ - // Create knotpoint sequence. - U32 k = 4; - U32 bspl_N = 40; - g_bspline_ctx.order = k; - g_bspline_ctx.num_knotpoints = bspl_N; - g_bspline_ctx.num_bsplines = bspl_N-k; - g_bspline_ctx.num_phys_points = bspl_N-(2*k)+2; // Remove k points at each end, and then add back the first and last points. - g_bspline_ctx.arena = arena; - g_bspline_ctx.knotpoints = PushArray(arena, F64, g_bspline_ctx.num_knotpoints); - // Set up physical points; - F64 delta = (g_grid.end-g_grid.start)/(g_bspline_ctx.num_phys_points-1); - // Set ghost points including first physical - U32 phys_point_last_index = g_bspline_ctx.num_phys_points + k-1; - for(U32 i = 0; i < k; i++) - { - g_bspline_ctx.knotpoints[i] = g_grid.start; - } - for(U32 i = k; i < phys_point_last_index; i++) - { - g_bspline_ctx.knotpoints[i] = g_bspline_ctx.knotpoints[i-1] + delta; - } - // Set the last points - F64 last_physical = g_grid.end; - for(U32 i = phys_point_last_index; i < g_bspline_ctx.num_knotpoints; i++) - { - g_bspline_ctx.knotpoints[i] = last_physical; - } } function void -write_bspline_matrix_F64(F64 *bsplines, U32 size1, U32 size2, String8 filename_path) +sort_and_get_indices_F64(F64 *array, U64 *indices, U64 size) { - ArenaTemp scratch = scratch_get(0, 0); - // First line is just the bspline indices. - String8List first_line_list = {0}; - StringJoin join = {0}; - join.sep = str8_lit("\t\t"); - for(U32 i = 0; i < size2; i++) - { - str8_list_pushf(scratch.arena, &first_line_list, " %i", i); - } - str8_list_push(scratch.arena, &first_line_list, str8_lit("\n")); - String8 first_line = str8_list_join(scratch.arena, first_line_list, &join); + SortPair_F64 *pairs = malloc(size * sizeof(SortPair_F64)); + for(U64 i = 0; i < size; i++) + { + pairs[i].value = array[i]; + pairs[i].original_index = i; + } + + qsort_F64(pairs, size, 0, size-1); - String8List bspline_array_list = {0}; - str8_list_push(scratch.arena, &bspline_array_list, first_line); - for(U32 i = 0; i < size1; i++) - { - String8List row = {0}; - for(U32 j = 0; j < size2; j++) - { - U32 index = get_bspline_index_size(size1, i, j); - F64 bspline_value = bsplines[index]; - if(g_debug_bspline_matrix && j == 0) - { - String8 out = str8_pushf(scratch.arena, "%i %i \t %13.6e\n", i, index, bspline_value); - LOG(out.str); - } - str8_list_pushf(scratch.arena, &row, "%13.6e", bspline_value); - } - StringJoin bspl_join = {0}; - bspl_join.sep = str8_lit(" "); - bspl_join.post = str8_lit("\n"); - String8 row_joined = str8_list_join(scratch.arena, row, &bspl_join); - str8_list_push(scratch.arena, &bspline_array_list, row_joined); - } - - write_string_list_to_file(scratch.arena, filename_path, &bspline_array_list); + for(U32 i = 0; i < size; i++) + { + array[i] = pairs[i].value; + indices[i] = pairs[i].original_index; + } - scratch_release(scratch); + free(pairs); } function void -set_up_bsplines_at_points_and_write_matrix_F64(Arena *arena) +sort_by_indices_F64(F64 *array, U64 *indices, U64 size) { - U64 num_bsplines = g_bspline_ctx.num_bsplines; - U64 k = g_bspline_ctx.order; - F64 *t = g_bspline_ctx.knotpoints; - U32 num_grid_points = g_grid.num_steps; - U32 num_knotpoints = g_bspline_ctx.num_knotpoints; - - // For sanity check we make the first 4 bsplines by hand. + F64 *temp = malloc(size * sizeof(F64)); + for(U64 i = 0; i < size; i++) { - F64 *bspl0 = PushArray(arena, F64, num_grid_points); - F64 *bspl1 = PushArray(arena, F64, num_grid_points); - F64 *bspl2 = PushArray(arena, F64, num_grid_points); - F64 *bspl3 = PushArray(arena, F64, num_grid_points); - F64 *bspl9 = PushArray(arena, F64, num_grid_points); - F64 *dBspl0 = PushArray(arena, F64, num_grid_points); - F64 *dBspl1 = PushArray(arena, F64, num_grid_points); - F64 *dBspl2 = PushArray(arena, F64, num_grid_points); - F64 *dBspl3 = PushArray(arena, F64, num_grid_points); - F64 *dBspl9 = PushArray(arena, F64, num_grid_points); - for(U32 i = 0; i < num_grid_points; i++) - { - F64 x = g_grid.points[i]; - bspl0[i] = compute_bspline_F64(x, 0); - bspl1[i] = compute_bspline_F64(x, 1); - bspl2[i] = compute_bspline_F64(x, 2); - bspl3[i] = compute_bspline_F64(x, 3); - bspl9[i] = compute_bspline_F64(x, 9); - dBspl0[i] = compute_dBspline_F64(x, 0); - dBspl1[i] = compute_dBspline_F64(x, 1); - dBspl2[i] = compute_dBspline_F64(x, 2); - dBspl3[i] = compute_dBspline_F64(x, 3); - dBspl9[i] = compute_dBspline_F64(x, 9); - } - - F64 test = compute_bspline_F64(g_grid.points[num_grid_points-1], 9); - - write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\bspline0.dat"), bspl0, num_grid_points, "%13.6e\n"); - write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\bspline1.dat"), bspl1, num_grid_points, "%13.6e\n"); - write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\bspline2.dat"), bspl2, num_grid_points, "%13.6e\n"); - write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\bspline3.dat"), bspl3, num_grid_points, "%13.6e\n"); - write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\bspline9.dat"), bspl9, num_grid_points, "%13.6e\n"); - write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\dBspline0.dat"), dBspl0, num_grid_points, "%13.6e\n"); - write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\dBspline1.dat"), dBspl1, num_grid_points, "%13.6e\n"); - write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\dBspline2.dat"), dBspl2, num_grid_points, "%13.6e\n"); - write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\dBspline3.dat"), dBspl3, num_grid_points, "%13.6e\n"); - write_array_F64(str8_lit("D:\\dev\\hf_again\\out\\dBspline9.dat"), dBspl9, num_grid_points, "%13.6e\n"); + U64 original_index = indices[i]; + temp[i] = array[original_index]; } - + for(U64 i = 0; i < size; i++) { - ArenaTemp scratch = scratch_get(0,0); - - g_bspline_ctx.bsplines_grid = PushArray(arena, F64, num_grid_points*num_bsplines); - g_bspline_ctx.dBsplines_grid = PushArray(arena, F64, num_grid_points*num_bsplines); - - for(U32 j = 0; j < num_bsplines; j++) - { - for(U32 i = 0; i < num_grid_points; i++) - { - U32 index = get_bspline_grid_index(i, j); - F64 bspline_value = compute_bspline_F64(g_grid.points[i], j); - F64 dBspline_value = compute_dBspline_F64(g_grid.points[i], j); - g_bspline_ctx.bsplines_grid[index] = bspline_value; - g_bspline_ctx.dBsplines_grid[index] = dBspline_value; - if(g_debug_bspline_matrix && j == 0) - { - String8 out = str8_pushf(scratch.arena, "%i %i \t %13.6e\n", i, index, bspline_value); - LOG(out.str); - } - - } + array[i] = temp[i]; } - scratch_release(scratch); - } - - g_bspline_ctx.bsplines = PushArray(arena, F64, num_knotpoints*num_bsplines); - g_bspline_ctx.dBsplines = PushArray(arena, F64, num_knotpoints*num_bsplines); - for(U32 i = 0; i < num_knotpoints; i++) - { - for(U32 j = 0; j < num_bsplines; j++) - { - U32 index = get_bspline_index(i, j); - g_bspline_ctx.bsplines[index] = compute_bspline_F64(t[i], j); - g_bspline_ctx.bsplines[index] = compute_dBspline_F64(t[i], j); - } - } - - write_bspline_matrix_F64(g_bspline_ctx.bsplines_grid, - num_grid_points, - num_bsplines, - str8_lit(bspline_grid_array_file_path)); - - write_bspline_matrix_F64(g_bspline_ctx.dBsplines_grid, - num_grid_points, - num_bsplines, - str8_lit(dBspline_grid_array_file_path)); - - write_bspline_matrix_F64(g_bspline_ctx.bsplines, - num_knotpoints, - num_bsplines, - str8_lit(bspline_knots_array_file_path)); - - - write_bspline_matrix_F64(g_bspline_ctx.dBsplines, - num_knotpoints, - num_bsplines, - str8_lit(dBspline_knots_array_file_path)); - -} - - -/* Auxiliary routine: printing a matrix */ - function void -print_matrix_Z64( char* desc, int m, int n, Z64* a, int lda ) -{ - ArenaTemp scratch = scratch_get(0,0); - int i, j; - String8 newline = str8_lit("\n"); - String8 header = str8_pushf(scratch.arena, "\n %s\n", desc ); - LOG(header.str); - //printf("\n %s \n", desc); - for( i = 0; i < m; i++ ) { - for( j = 0; j < n; j++ ) { - String8 outstr = str8_pushf(scratch.arena, " (%6.2f,%6.2f)", a[i+j*lda].re, a[i+j*lda].im ); - LOG(outstr.str); - //printf(" (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag ); - } - LOG(newline.str); - //printf("\n"); - } - - scratch_release(scratch); -} - - -/* Auxiliary routine: printing a matrix */ -function void -print_matrix_F64( char* desc, int m, int n, F64* a, int lda ) -{ - ArenaTemp scratch = scratch_get(0,0); - int i, j; - String8 newline = str8_lit("\n"); - String8 header = str8_pushf(scratch.arena, "\n %s\n", desc ); - LOG(header.str); - //printf("\n %s \n", desc); - for( i = 0; i < m; i++ ) { - for( j = 0; j < n; j++ ) { - String8 outstr = str8_pushf(scratch.arena, " %6.2f", a[i+j*lda], a[i+j*lda]); - LOG(outstr.str); - //printf(" (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag ); - } - LOG(newline.str); - //printf("\n"); - } - - scratch_release(scratch); + free(temp); } /* Auxiliary routine: printing a matrix */ @@ -621,118 +144,17 @@ print_eigenvalues( char* desc, int n, F64* wr, F64* wi) String8 header = str8_pushf(scratch.arena, "\n %s\n", desc ); LOG(header.str); //printf("\n %s \n", desc); - for( j = 0; j < n; j++ ) { - String8 outstr = str8_pushf(scratch.arena, " (%4.5f, %4.5f)\n", wr[j], wi[j]); - LOG(outstr.str); - //printf(" (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag ); - } - LOG(newline.str); - //printf("\n"); + for( j = 0; j < n; j++ ) { + String8 outstr = str8_pushf(scratch.arena, " (%4.5f, %4.5f)\n", wr[j], wi[j]); + LOG(outstr.str); + //printf(" (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag ); + } + LOG(newline.str); + //printf("\n"); scratch_release(scratch); } - -typedef F64 (*func_F64)(F64); - -function F64 -cos2(F64 x) -{ - return cos(x)*cos(x); -} - -function F64 -gq_integration(F64 a, F64 b, func_F64 func) -{ - - F64 prefac = 0.5*(b-a); - F64 gq_sum = 0.0; - - for(U32 i = 0; i < g_gauss_legendre.order; i++) - { - F64 w = g_gauss_legendre.weights[i]; - F64 z = g_gauss_legendre.abscissae[i]; - F64 shift = (z*prefac)+((a+b)*0.5); - F64 funct_value_at_shift = func(shift); - gq_sum = gq_sum + prefac*w*funct_value_at_shift; - } - - return gq_sum; - -} - - function void -test_gauss_legendre() -{ - - // Test GL qudrature by integrating cos^2 from 0 to 2pi, it should equal pi. - { - ArenaTemp scratch = scratch_get(0,0); - - F64 pi = 4.0*atan(1.0); - String8 pi_out = str8_pushf(scratch.arena, "Pi from standard library 4.0*atan(1.0) = %.16f \n", pi); - LOG(pi_out.str); - - F64 a = 0.0; - F64 b = 2.0*pi; - F64 gq_sum_single_interval = gq_integration(a, b, cos2); - - String8 out = str8_pushf(scratch.arena, "Integration result for a single interval: %.16f, Reference: %.16f \n", gq_sum_single_interval, pi); - LOG(out.str); - - // Test with several smaller intervals instead. - F64 aa[10] = {0.0}; - F64 bb[10] = {0.0}; - F64 delta = b/10.0; - for(U32 i = 0; i < 10; i++) - { - aa[i] = i*delta; - bb[i] = (i+1)*delta; - String8 intervals = str8_pushf(scratch.arena, "%i, a=%f, b=%f \n", i, aa[i], bb[i]); - LOG(intervals.str); - } - - F64 gq_sum_several_intervals = 0.0; - for(U32 i = 0; i < 10; i++) - { - gq_sum_several_intervals += gq_integration(aa[i], bb[i], cos2); - - } - - out = str8_pushf(scratch.arena, "Integration result for ten intervals: %.16f, Reference: %.16f \n", gq_sum_several_intervals, pi); - LOG(out.str); - - - scratch_release(scratch); - } -} - -function void -print_mat_F64(Mat_F64 *mat) -{ - - ArenaTemp scratch = scratch_get(0,0); - - for(U32 i = 0; i < mat->size1-1; i++) - { - for(U32 j = 0; j < mat->size2-1; j++) - { - F64 val = mat_F64_get(mat, i, j); - //if(val < 1e-15) - //{ - //val = 0.0; - //} - String8 out_str = str8_pushf(scratch.arena, " %.4e", val); - LOG(out_str.str); - } - LOG("\n"); - } - - scratch_release(scratch); - -} - - function void EntryPoint(void) { @@ -771,7 +193,7 @@ function void EntryPoint(void) // H = -0.5*d^2/dr^2 + l(l+1)/(2r^2) - Z/r { ArenaTemp scratch = scratch_get(0, 0); - + LOG("Setting up matrices.\n"); String8 setuplog = str8_pushf(scratch.arena, "Num knotpoints: %i, Bspline order k = %i, Matrix size1 = N-k-2 = %i, size2 = %i\n", @@ -803,12 +225,12 @@ function void EntryPoint(void) // And we integrate over the next k knotpoints. U32 end_knotpoint_index = bspl_index_i < bspl_index_j ? bspl_index_i+k : bspl_index_j+k; - + F64 term1 = 0.0; F64 term2 = 0.0; F64 term3 = 0.0; F64 Bmat_term = 0.0; - + for(U32 knotpoint_idx = start_knotpoint_index; knotpoint_idx < end_knotpoint_index; knotpoint_idx++) @@ -866,7 +288,6 @@ function void EntryPoint(void) Mat_F64 A = mat_F64(H.size1, H.size2); Arena *mkl_arena = m_make_arena(); - //print_mat_F64(&B); LOG("\n"); //print_mat_F64(&B_inv); @@ -926,8 +347,8 @@ function void EntryPoint(void) F64 *wr = PushArray(mkl_arena, F64, n); F64 *wi = PushArray(mkl_arena, F64, n); - F64 *vl = PushArray(mkl_arena, F64, ldvl*n); - F64 *vr = PushArray(mkl_arena, F64, ldvr*n); + F64 *vl = PushArray(mkl_arena, F64, ldvl*n); + F64 *vr = PushArray(mkl_arena, F64, ldvr*n); lwork = -1; F64 *a = A.data; @@ -943,183 +364,27 @@ function void EntryPoint(void) exit( 1 ); } + print_eigenvalues( "Eigenvalues unsorted:", n, wr, wi ); + U64 *sorted_indices = PushArray(mkl_arena, U64, n); + sort_and_get_indices_F64(wr, sorted_indices, n); + sort_by_indices_F64(wi, sorted_indices, n); /* Print eigenvalues */ - print_eigenvalues( "Eigenvalues", n, wr, wi ); + print_eigenvalues( "Eigenvalues sorted: ", n, wr, wi ); /* Free workspace */ free( (void*)work ); } - - } - function void -test_matrix() -{ - - Mat_F64 test_mat = mat_F64(5, 5); - - { - ArenaTemp scratch = scratch_get(0,0); - - for(U32 i = 0; i < test_mat.size1; i++) - { - for(U32 j = 0; j < test_mat.size2; j++) - { - F64 val = i * test_mat.size2 + j; - mat_F64_set(&test_mat, i, j, val); - } - } - - for(U32 i = 0; i < test_mat.size1; i++) - { - for(U32 j = 0; j < test_mat.size2; j++) - { - F64 val = mat_F64_get(&test_mat, i, j); - String8 out_str = str8_pushf(scratch.arena, " %2.2f", val); - LOG(out_str.str); - } - LOG("\n"); - } - - scratch_release(scratch); - } -} - - -function void mkl_things(void) -{ - - OS_InitReceipt os_receipt = OS_init(); - OS_InitGfxReceipt os_gfx_receipt = OS_gfx_init(os_receipt); - - Arena *arena = m_make_arena(); - - U32 N = 4; - Z64 *main_A = PushArray(arena, Z64, N*N); - main_A[0] = (Z64){-3.84, 2.25}; - main_A[1] = (Z64){-0.66, 0.83}; - main_A[2] = (Z64){-3.99, -4.73}; - main_A[3] = (Z64){ 7.74, 4.18}; - main_A[4] = (Z64){-8.94, -4.75}; - main_A[5] = (Z64){-4.40, -3.82}; - main_A[6] = (Z64){-5.88, -6.60}; - main_A[7] = (Z64){ 3.66, -7.53}; - main_A[8] = (Z64){ 8.95, -6.53}; - main_A[9] = (Z64){-3.50, -4.26}; - main_A[10] = (Z64){-3.36, -0.40}; - main_A[11] = (Z64){ 2.58, 3.60}; - main_A[12] = (Z64){-9.87, 4.82}; - main_A[13] = (Z64){-3.15, 7.36}; - main_A[14] = (Z64){-0.75, 5.23}; - main_A[15] = (Z64){ 4.59, 5.41}; - - LOG("\n\n---- Calling Intel MKL zgeev test (Using Z64 instead of MKL_Complex16 etc) ---- \n\n"); - - { - S32 n = N, lda = N, ldvl = N, ldvr = N, info, lwork; - Z64 wkopt; - Z64 *work; - - F64 *rwork = PushArray(arena, F64, 2*N); - Z64 *w = PushArray(arena, Z64, N); - Z64 *vl = PushArray(arena, Z64, N*N); - Z64 *vr = PushArray(arena, Z64, N*N); - Z64 *a = PushArray(arena, Z64, N*N); - for(U32 j = 0; j < N; j++) - { - for(U32 i = 0; i < N; i++) - { - U32 index = i*N+j; - - a[index] = main_A[index]; - } - - } - - - /* Executable statements */ - LOG( " ZGEEV Example Program Results\n" ); - /* Query and allocate the optimal workspace */ - lwork = -1; - zgeev( "Vectors", "Vectors", &n, a, &lda, w, vl, &ldvl, vr, &ldvr, - &wkopt, &lwork, rwork, &info ); - lwork = (S32)wkopt.re; - work = (Z64*)malloc( lwork*sizeof(Z64) ); - /* Solve eigenproblem */ - zgeev( "Vectors", "Vectors", &n, a, &lda, w, vl, &ldvl, vr, &ldvr, - work, &lwork, rwork, &info ); - /* Check for convergence */ - if( info > 0 ) { - LOG( "The algorithm failed to compute eigenvalues.\n" ); - exit( 1 ); - } - /* Print eigenvalues */ - print_matrix_Z64( "Eigenvalues", 1, n, w, 1 ); - /* Print left eigenvectors */ - print_matrix_Z64( "Left eigenvectors", n, n, vl, ldvl ); - /* Print right eigenvectors */ - print_matrix_Z64( "Right eigenvectors", n, n, vr, ldvr ); - /* Free workspace */ - free( (void*)work ); - } /* End of ZGEEV Example */ - - - - LOG("\n\n--- End of EntryPoint, exiting program. \n\n"); -} - - - function void -testing_MKL() -{ - - test_mkl_zgeev(); - test_mkl_dsyevd(); - -} - -/* function void */ -/* dgeev_example() */ -/* { */ -/* /1* Locals *1/ */ -/* MKL_INT n = N, lda = LDA, ldvl = LDVL, ldvr = LDVR, info, lwork; */ -/* double wkopt; */ -/* double* work; */ -/* /1* Local arrays *1/ */ -/* double wr[N], wi[N], vl[LDVL*N], vr[LDVR*N]; */ -/* double a[LDA*N] = { */ -/* -1.01, 3.98, 3.30, 4.43, 7.31, */ -/* 0.86, 0.53, 8.26, 4.96, -6.43, */ -/* -4.60, -7.04, -3.89, -7.66, -6.16, */ -/* 3.31, 5.29, 8.20, -7.33, 2.47, */ -/* -4.81, 3.55, -1.51, 6.18, 5.58 */ -/* }; */ -/* /1* Executable statements *1/ */ -/* printf( " DGEEV Example Program Results\n" ); */ -/* /1* Query and allocate the optimal workspace *1/ */ -/* lwork = -1; */ -/* dgeev( "Vectors", "Vectors", &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, */ -/* &wkopt, &lwork, &info ); */ -/* lwork = (MKL_INT)wkopt; */ -/* work = (double*)malloc( lwork*sizeof(double) ); */ -/* /1* Solve eigenproblem *1/ */ -/* dgeev( "Vectors", "Vectors", &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, */ -/* work, &lwork, &info ); */ -/* /1* Check for convergence *1/ */ -/* if( info > 0 ) { */ -/* printf( "The algorithm failed to compute eigenvalues.\n" ); */ -/* exit( 1 ); */ -/* } */ -/* /1* Print eigenvalues *1/ */ -/* print_eigenvalues( "Eigenvalues", n, wr, wi ); */ -/* /1* Print left eigenvectors *1/ */ -/* print_eigenvectors( "Left eigenvectors", n, wi, vl, ldvl ); */ -/* /1* Print right eigenvectors *1/ */ -/* print_eigenvectors( "Right eigenvectors", n, wi, vr, ldvr ); */ -/* /1* Free workspace *1/ */ -/* free( (void*)work ); */ - -/* } */ +//~ TODO +//- +/// +/// - Use arena for matrices instead of malloc maybe +/// - Factor l-dependent matrix +/// - Print eigenfunctions for plotting +/// - Solve for more shells +/// - Make some good orbital/atom abstraction +/// - Then try to make hartree fock work maybe. +/// - Need to make a Complex double (Z64) version if I want to go relativistic