diff --git a/.gitignore b/.gitignore index 11e1324..45f395b 100644 --- a/.gitignore +++ b/.gitignore @@ -128,3 +128,8 @@ dkms.conf *.cubin *.fatbin +# Custom +*.vs/ +*.rdi +*.pdb +*.ctm \ No newline at end of file diff --git a/build.bat b/build.bat new file mode 100644 index 0000000..acf766b --- /dev/null +++ b/build.bat @@ -0,0 +1,47 @@ +@echo off + +ctime -begin timeBuild.ctm + +@rem /WX /W4 /wd4201 /wd4100 /wd4189 /wd4244 /wd4127 /wd4456 +@rem set CommonCompilerFlags="/nologo /Zi /FC" +set CommonCompilerFlags=/nologo /Zi /FC /Od +@rem /WX /W4 /wd4201 /wd4100 /wd4189 /wd4244 /wd4127 /wd4456 +@rem + + +set mkl_root=D:/lib/oneAPI_mkl/mkl/2021.3.0 +set mkl_core=%mkl_root%/lib/intel64/mkl_core.lib +set mkl_intel_lp64=%mkl_root%/lib/intel64/mkl_intel_lp64.lib +set mkl_intel_thread=%mkl_root%/lib/intel64/mkl_intel_thread.lib +set MKLCOMPILER=D:/lib/oneAPI_mkl/compiler/2021.3.0/windows/compiler +set libiomp5md=%MKLCOMPILER%/lib/intel64_win/libiomp5md.lib + +set libiompdll_path=D:\lib\oneAPI_mkl\compiler\2021.3.0\windows\redist\intel64_win\compiler +set libiompdll_name=libiomp5md.dll +set libiompdll=%libiompdll_path%\%libiompdll_name% + +set Sources=../src/main.c + +IF NOT EXIST .\build mkdir .\build +pushd .\build + +if not exist "%libiompdll_name%" ( + echo Copying %libiompdll% + copy "%libiompdll%" . + if errorlevel 1 ( + echo Error copying openmp dll + ) else ( + echo Copied openmp dll: %libiompdll_name% + ) + +) + +cl %CommonCompilerFlags% %Sources% /I"%mkl_root%\include" /link %mkl_core% %mkl_intel_lp64% %mkl_intel_thread% %libiomp5md% + +set LastError=%ERRORLEVEL% +popd + +ctime -end timeBuild.ctm %LastError% +IF NOT %LastError%==0 GOTO :end + +:end diff --git a/src/base/base_context_cracking.h b/src/base/base_context_cracking.h new file mode 100644 index 0000000..f3ed4d6 --- /dev/null +++ b/src/base/base_context_cracking.h @@ -0,0 +1,178 @@ +#ifndef BASE_CONTEXT_CRACKING_H +#define BASE_CONTEXT_CRACKING_H + +// NOTE(antonl): +// This header is used for "context cracking", ie figuring out compile time context things like +// platform etc. + +// For now this is just copy pasted from RJFs layer, and probably that's all that's needed. + + +/////////////////////////////////////////////// +//~ MSVC extraction + +#if defined(_MSC_VER) + +# define COMPILER_MSVC 1 + +# if defined(_WIN32) +# define OS_WINDOWS 1 +# else +# error _MSC_VER is defined, but _WIN32 is not. This setup is not supported. +# endif + +# if defined(_M_AMD64) +# define ARCH_X64 1 +# elif defined(_M_IX86) +# define ARCH_X86 1 +# elif defined(_M_ARM64) +# define ARCH_ARM64 1 +# elif defined(_M_ARM) +# define ARCH_ARM32 1 +# else +# error Target architecture is not supported. _MSC_VER is defined, but one of {_M_AMD64, _M_IX86, _M_ARM64, _M_ARM} is not. +# endif + +#if _MSC_VER >= 1920 +#define COMPILER_MSVC_YEAR 2019 +#elif _MSC_VER >= 1910 +#define COMPILER_MSVC_YEAR 2017 +#elif _MSC_VER >= 1900 +#define COMPILER_MSVC_YEAR 2015 +#elif _MSC_VER >= 1800 +#define COMPILER_MSVC_YEAR 2013 +#elif _MSC_VER >= 1700 +#define COMPILER_MSVC_YEAR 2012 +#elif _MSC_VER >= 1600 +#define COMPILER_MSVC_YEAR 2010 +#elif _MSC_VER >= 1500 +#define COMPILER_MSVC_YEAR 2008 +#elif _MSC_VER >= 1400 +#define COMPILER_MSVC_YEAR 2005 +#else +#define COMPILER_MSVC_YEAR 0 +#endif + +//////////////////////////////// +//~ rjf: Clang Extraction + +#elif defined(__clang__) + +# define COMPILER_CLANG 1 + +# if defined(__APPLE__) && defined(__MACH__) +# define OS_MAC 1 +# elif defined(__gnu_linux__) +# define OS_LINUX 1 +# else +# error __clang__ is defined, but one of {__APPLE__, __gnu_linux__} is not. This setup is not supported. +# endif + +# if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) +# define ARCH_X64 1 +# elif defined(i386) || defined(__i386) || defined(__i386__) +# define ARCH_X86 1 +# elif defined(__aarch64__) +# define ARCH_ARM64 1 +# elif defined(__arm__) +# define ARCH_ARM32 1 +# else +# error Target architecture is not supported. __clang__ is defined, but one of {__amd64__, __amd64, __x86_64__, __x86_64, i386, __i386, __i386__, __aarch64__, __arm__} is not. +# endif + +//////////////////////////////// +//~ rjf: GCC Extraction + +#elif defined(__GNUC__) || defined(__GNUG__) + +# define COMPILER_GCC 1 + +# if defined(__gnu_linux__) +# define OS_LINUX 1 +# else +# error __GNUC__ or __GNUG__ is defined, but __gnu_linux__ is not. This setup is not supported. +# endif + +# if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) +# define ARCH_X64 1 +# elif defined(i386) || defined(__i386) || defined(__i386__) +# define ARCH_X86 1 +# elif defined(__aarch64__) +# define ARCH_ARM64 1 +# elif defined(__arm__) +# define ARCH_ARM32 1 +# else +# error Target architecture is not supported. __GNU_C__ or __GNUG__ is defined, but one of {__amd64__, __amd64, __x86_64__, __x86_64, i386, __i386, __i386__, __aarch64__, __arm__} is not. +# endif + +#else +# error Compiler is not supported. _MSC_VER, __clang__, __GNUC__, or __GNUG__ must be defined. +#endif + +#if defined(ARCH_X64) +# define ARCH_64BIT 1 +#elif defined(ARCH_X86) +# define ARCH_32BIT 1 + +#endif + +//////////////////////////////// +//~ rjf: Language + +#if defined(__cplusplus) +# define LANG_CPP 1 +#else +# define LANG_C 1 +#endif + +//////////////////////////////// +//~ rjf: Zero + +#if !defined(ARCH_32BIT) +# define ARCH_32BIT 0 +#endif +#if !defined(ARCH_64BIT) +# define ARCH_64BIT 0 +#endif +#if !defined(ARCH_X64) +# define ARCH_X64 0 +#endif +#if !defined(ARCH_X86) +# define ARCH_X86 0 +#endif +#if !defined(ARCH_ARM64) +# define ARCH_ARM64 0 +#endif +#if !defined(ARCH_ARM32) +# define ARCH_ARM32 0 +#endif +#if !defined(COMPILER_MSVC) +# define COMPILER_MSVC 0 +#endif +#if !defined(COMPILER_GCC) +# define COMPILER_GCC 0 +#endif +#if !defined(COMPILER_CLANG) +# define COMPILER_CLANG 0 +#endif +#if !defined(OS_WINDOWS) +# define OS_WINDOWS 0 +#endif +#if !defined(OS_LINUX) +# define OS_LINUX 0 +#endif +#if !defined(OS_MAC) +# define OS_MAC 0 +#endif +#if !defined(LANG_CPP) +# define LANG_CPP 0 +#endif +#if !defined(LANG_C) +# define LANG_C 0 +#endif + +// TODO(antonl); +// Build options context cracking, need to figure out what we should use here first. +#define BUILD_DEBUG 1 + +#endif /* BASE_CONTEXT_CRACKING_H */ diff --git a/src/base/base_core.h b/src/base/base_core.h new file mode 100644 index 0000000..91fd56f --- /dev/null +++ b/src/base/base_core.h @@ -0,0 +1,294 @@ +#ifndef BASE_TYPES_H +#define BASE_TYPES_H + +#include +#include +#include + +///////////////////////// +//~ Macros + +///////////////////////// +//- Linking keywords + +// TODO(anton): Understand this, yoinked from rjf's layer. +#if LANG_CPP +# define no_name_mangle extern "C" +#else +# define no_name_mangle +#endif + +// TODO(anton): OS_WINDOWS dll import/export macros + +///////////////////////// +//- Keywords +// Static is stupid and means different things depending on context in C and C++. +// These defines increases readability. +#define function static // Function internal to compilation unit. +#define local_persist static +#define global static +#define fallthrough // for use in switch statements, for clarity.. + +// TODO(anton): Understand and add good comment on this. +#if LANG_CPP +# define root_global no_name_mangle +# define root_function function +#else +# define root_global extern +# define root_function function +#endif + +#define inline_function inline static + +#if OS_WINDOWS +# pragma section(".roglob", read) +# define read_only __declspec(allocate(".roglob")) +#else +# define read_only +#endif + +#if COMPILER_MSVC +# define per_thread __declspec(thread) +#else +# error Thread keyword not abstracted on compiler. +#endif + +///////////////////////// +//- Memory operations +// It's nice to put these in macros, so we can swap out the functionality from standard library, eventually. +#define MemoryCopy memcpy +#define MemoryMove memmove +#define MemorySet memset + +// NOTE(anton): This gives a 4127 compiler warning for the sizeof conditional. This should be ignored +#define MemoryCopyStruct(dst, src) do { Assert(sizeof(*(dst)) == sizeof(*(src))); MemoryCopy((dst), (src), sizeof(*(dst))); } while(0) + +#define MemoryZero(ptr, size) MemorySet((ptr), 0, (size)) +#define MemoryZeroStruct(ptr) MemoryZero((ptr), sizeof(*(ptr))) +#define MemoryZeroArray(arr) MemoryZero((arr), sizeof(arr)) + +///////////////////////// +//- Integer/pointer/array/type manipulations + +#define ArrayCount(a) (sizeof(a) / sizeof((a)[0])) +#define IntFromPtr(p) (U64)(((U8*)p) - 0) +#define PtrFromInt(i) (void*)(((U8*)0) + i) +#define Member(type, member_name) ((type *)0)->member_name +// TODO(anton): Understand why this becomes offset actually +#define OffsetOf(type, member_name) IntFromPtr(&Member(type, member_name)) +// TODO(anton): Understand this +#define BaseFromMember(type, member_name, ptr) (type *)((U8 *)(ptr) - OffsetOf(type, member_name)) + +#define Bytes(n) (n) +#define Kilobytes(n) (n << 10) // 2^10 == 1024 etc +#define Megabytes(n) (n << 20) +#define Gigabytes(n) (((U64)n) << 30) +#define Terabytes(n) (((U64)n) << 40) + +#define Thousand(n) ((n)*1000) +#define Million(n) ((n)*1000000) +#define Billion(n) ((n)*1000000000LL) + +#define AbsoluteValueU64(x) (U64)llabs((U64)(x)) + +///////////////////////// +//- Linked list helpers + +#define CheckNull(p) ((p)==0) +#define SetNull(p) ((p)=0) +// Link list helper macros that are a bit involved + +// Suffixes N,P,Z means that we have (N)ext, (P)rev arguments and/or a (Z)ero check and/or set argument +// f, l, n are "first", "last", "node" I think? +// DLL +// Doubly Linked List: Each node has a prev and next pointer. Operations: Push back, Push front, remove +#define DLLInsert_NPZ(f,l,p,n,next,prev,zchk,zset) \ +(zchk(f) ? (((f) = (l) = (n)), zset((n)->next), zset((n)->prev)) :\ +zchk(p) ? (zset((n)->prev), (n)->next = (f), (zchk(f) ? (0) : ((f)->prev = (n))), (f) = (n)) :\ +((zchk((p)->next) ? (0) : (((p)->next->prev) = (n))), (n)->next = (p)->next, (n)->prev = (p), (p)->next = (n),\ +((p) == (l) ? (l) = (n) : (0)))) + +#define DLLPushBack_NPZ(f,l,n,next,prev,zchk,zset) DLLInsert_NPZ(f,l,l,n,next,prev,zchk,zset) + +#define DLLPushBack_NP(f, l, n, next, prev, zchk) \ +(zchk(f) ? ((f)=(l)=(n),(n)->next=(n)->prev=0) : ((n)->prev=(l),(l)->next=(n),(l)=(n),(n)->next=0)) + +// If f == n we put f to f->next, and f->prev = 0. +// Else if l == n, we put l=l->prev, l->next = 0. +// If l != n and f != n we set n->next->prev to n->prev, and n->prev->next to n->next + +#define DLLRemove_NP(f, l, n, next, prev) (((f) == (n) ? \ +((f)=(f)->next, (f)->prev=0) : \ +(l) == (n) ? \ +((l)=(l)->prev, (l)->next=0) : \ +((n)->next->prev=(n)->prev, \ +(n)->prev->next=(n)->next) )) + +#define DLLRemove_NPZ(f,l,n,next,prev,zchk,zset) (((f)==(n))?\ +((f)=(f)->next, (zchk(f) ? (zset(l)) : zset((f)->prev))):\ +((l)==(n))?\ +((l)=(l)->prev, (zchk(l) ? (zset(f)) : zset((l)->next))):\ +((zchk((n)->next) ? (0) : ((n)->next->prev=(n)->prev)),\ +(zchk((n)->prev) ? (0) : ((n)->prev->next=(n)->next)))) + +#define DLLPushBack(f, l, n) DLLPushBack_NPZ(f, l, n, next, prev, CheckNull, SetNull) +// For front push I can just switch prev/next! +#define DLLPushFront(f, l, n) DLLPushBack_NPZ(l, f, n, prev, next, CheckNull, SetNull) +#define DLLRemove(f, l, n) DLLRemove_NPZ(f, l, n, next, prev, CheckNull, SetNull) + + +// SLL, queue or stack. +// These are from rjf's layer. + +//////////////// +// Queue +// Queue has only a next pointer. But we can push from front also. +// zchk = zero check, zset = zero set +#define QueuePush_NZ(f, l, n, next, zchk, zset) (zchk(f)?\ +(((f)=(l)=(n)), zset((n)->next)):\ +((l)->next=(n),(l)=(n),zset((n)->next))) + +#define QueuePushFront_NZ(f, l, n, next, zchk, zset) ( zchk(f) ? \ +((f)=(l)=(n)), zset((n)->next) : \ +((n)->next = (f)), ((f) = (n)) ) + +#define QueuePop_NZ(f, l, next, zchk, zset) ( (f)==(l) ? \ +(zset(f), zset(l)) : ((f)=(f)->next)) + +#define QueuePush(f, l, n) QueuePush_NZ(f, l, n, next, CheckNull, SetNull) +#define QueuePushFront(f, l, n) QueuePushFront_NZ(f, l, n, next, CheckNull, SetNull) +#define QueuePop(f, l) QueuePop_NZ(f, l, next, CheckNull, SetNull) + +//////////////// +// Stack +#define StackPush_N(f, n, next) ((n)->next=(f), (f)=(n)) // Take the first element and set it to n->next, and set the first element to the node n. +#define StackPop_NZ(f, next, zchk) (zchk(f) ? 0 : ((f)=(f)->next)) // If first element is not zero we say that the first element is f->next, ie we pop f and put f->next on top. + +#define StackPush(f, n) StackPush_N(f, n, next) +#define StackPop(f) StackPop_NZ(f, next, CheckNull) + +///////////////////////// +//- Clamp/min/max +#define Min(a, b) (((a)<(b)) ? (a) : (b)) +#define Max(a, b) (((a)>(b)) ? (a) : (b)) +#define ClampTop(x, a) Min(x,a) // "Top" since we are cutting off anything above Min(x,a) +#define ClampBot(a, x) Max(a,x) // "Bot" since we're cutting off anything below Max(a,x) +// If a > x we get a, else we see if b < x and then get b if true, else x. +// TODO(anton): Is this actually what we want from a Clamp? +#define Clamp(a, x, b) (((a)>(x))?(a):((b)<(x))?(b):(x)) + +//- loop +#define DeferLoop(start, end) for(int _i_ = ((start), 0); _i_ == 0; _i_ += 1, (end)) +#define DeferLoopChecked(begin, end) for(int _i_ = 2 * !(begin); (_i_ == 2 ? ((end), 0) : !_i_); _i_ += 1, (end)) + +#define EachEnumVal(type, it) type it = (type)0; it < type##_COUNT; it = (type)(it+1) +#define EachNonZeroEnumVal(type, it) type it = (type)1; it < type##_COUNT; it = (type)(it+1) + + +///////////////////////// +//~ Base types +typedef int8_t S8; +typedef int16_t S16; +typedef int32_t S32; +typedef int64_t S64; +typedef uint8_t U8; +typedef uint16_t U16; +typedef uint32_t U32; +typedef uint64_t U64; +typedef S8 B8; +typedef S16 B16; +typedef S32 B32; +typedef S64 B64; +typedef float F32; +typedef double F64; +typedef void VoidFunction(void); + +///////////////////////// +//~ Numerical limits +read_only global U8 U8Max = 0xFF; +read_only global U8 U8Min = 0; +read_only global U32 U32Max = 0xFFFFFFFF; +read_only global U32 U32Min = 0; +read_only global U64 U64Max = 0xFFFFFFFFFFFFFFFF; + +// TODO(anton): Rest of the limits, unsigned and signed integer values +read_only global U32 SignF32 = 0x80000000; + + +//- compiler, shut up! helpers +#define unused_variable(name) (void)name + +///////////////////////// +//~ Base enums + +// Describing a 2-coordinate system +typedef enum Axis2 +{ + Axis2_Invalid = -1, + Axis2_X, + Axis2_Y, + Axis2_COUNT +} +Axis2; +#define Axis2_flip(a) ((Axis2)(!(a))) + +// Corners of a rectangle. +// 00 ----- 10 +// | | +// 01 ----- 11 +typedef enum Corner +{ + Corner_Invalid = -1, + Corner_00, + Corner_01, + Corner_10, + Corner_11, + Corner_COUNT +} +Corner; + +//////////////////////////////// +//~ Member Offset Helper + +typedef struct MemberOffset MemberOffset; +struct MemberOffset +{ + U64 v; +}; + +#define MemberOff(S, member) (MemberOffset){OffsetOf(S, member)} +#define MemberOffLit(S, member) {OffsetOf(S, member)} +#define MemberFromOff(ptr, type, memoff) (*(type *)((U8 *)ptr + memoff.v)) + +///////////////////////// +//~ Assertions + +#if OS_WINDOWS +# define break_debugger() __debugbreak() +#else +# error not implemented +#endif + +#undef Assert +#define Assert(b) do { if(!(b)) { break_debugger(); } } while(0) + +#if !defined(LOG_NOT_IMPLEMENTED) +# define LOG_NOT_IMPLEMENTED printf("\nFATAL ERROR: Not implemented yet.\n"); Assert(false); exit(1); +#endif + +///////////////////////// +//~ Bit patterns +#define AlignUpToPow2(bytes_to_align, alignment_bytes) (((bytes_to_align) + (alignment_bytes - 1)) & ~(alignment_bytes - 1)) + +inline_function F32 +absolute_value_F32(F32 f) +{ + union { U32 u; F32 f; } x; + x.f = f; + x.u = x.u & ~SignF32; + return x.f; +} + +// TODO(anton): Understand rjf's bit patterns + +#endif //BASE_TYPES_H diff --git a/src/base/base_inc.c b/src/base/base_inc.c new file mode 100644 index 0000000..7194c58 --- /dev/null +++ b/src/base/base_inc.c @@ -0,0 +1,4 @@ +#include "base_math.c" +#include "base_memory.c" +#include "base_strings.c" +#include "base_thread_context.c" \ No newline at end of file diff --git a/src/base/base_inc.h b/src/base/base_inc.h new file mode 100644 index 0000000..0598745 --- /dev/null +++ b/src/base/base_inc.h @@ -0,0 +1,11 @@ +#ifndef BASE_H +#define BASE_H + +#include "base_context_cracking.h" +#include "base_core.h" +#include "base_math.h" +#include "base_memory.h" +#include "base_strings.h" +#include "base_thread_context.h" + +#endif //BASE_H diff --git a/src/base/base_math.c b/src/base/base_math.c new file mode 100644 index 0000000..40e9cb0 --- /dev/null +++ b/src/base/base_math.c @@ -0,0 +1,107 @@ +//- Vec2 F32 +root_function Vec2_F32 +vec2_F32(F32 x, F32 y) +{ + Vec2_F32 result; + result.x = x; + result.y = y; + return result; +} + +root_function Vec2_F32 add2_F32(Vec2_F32 a, Vec2_F32 b) { return vec2_F32(a.x+b.x, a.y+b.y); } +root_function Vec2_F32 sub2_F32(Vec2_F32 a, Vec2_F32 b) { return vec2_F32(a.x-b.x, a.y-b.y); } + +//- Vec2 S32 +root_function Vec2_S32 +vec2_S32(S32 x, S32 y) +{ + Vec2_S32 result; + result.x = x; + result.y = y; + return result; +} + + +root_function Vec2_S64 +vec2_S64(S64 x, S64 y) +{ + Vec2_S64 result; + result.x = x; + result.y = y; + return result; +} + +root_function Vec3_F32 +vec3_F32(F32 x, F32 y, F32 z) +{ + Vec3_F32 result; + result.x = x; + result.y = y; + result.z = z; + return result; +} + +root_function Vec4_F32 +vec4_F32(F32 x, F32 y, F32 z, F32 w) +{ + Vec4_F32 result; + result.x = x; + result.y = y; + result.z = z; + result.w = w; + return result; +} + +//~ Range functions +root_function Rng2_F32 +rng2_F32(Vec2_F32 min, Vec2_F32 max) +{ + Rng2_F32 result = { min, max }; + return result; +} + +root_function Rng2_F32 +shift2_F32(Rng2_F32 r, Vec2_F32 v) { + // Shift the rectangle r by vector v. + r.x0 += v.x; + r.y0 += v.y; + r.x1 += v.x; + r.y1 += v.y; + return r; +} + +root_function Rng2_F32 +pad2_F32(Rng2_F32 r, F32 x) +{ + // Pad subtracts the p0 by value x on both axes, and adds to p1 on both axes, + // resulting in a rectangle that is value x larger than input rectangle r on both axes. + Vec2_F32 min = sub2_F32(r.min, vec2_F32(x, x)); + Vec2_F32 max = add2_F32(r.max, vec2_F32(x, x)); + return rng2_F32(min, max); +} + +root_function Vec2_F32 +dim2_F32(Rng2_F32 rng) +{ + return vec2_F32(absolute_value_F32(rng.max.x - rng.min.x), + absolute_value_F32(rng.max.y - rng.min.y)); +} + +// Check if a rect contains a point +root_function B32 +rng2_contains_vec2_F32(Rng2_F32 r, Vec2_F32 x) +{ + B32 c = (r.min.x <= x.x && x.x < r.max.x && r.min.y <= x.y && x.y < r.max.y); + return c; +} + +root_function Rng2_F32 +rng2_intersect_f32(Rng2_F32 a, Rng2_F32 b) +{ + Rng2_F32 c; + c.p0.x = Max(a.min.x, b.min.x); + c.p0.y = Max(a.min.y, b.min.y); + c.p1.x = Min(a.max.x, b.max.x); + c.p1.y = Min(a.max.y, b.max.y); + return c; +} \ No newline at end of file diff --git a/src/base/base_math.h b/src/base/base_math.h new file mode 100644 index 0000000..9942379 --- /dev/null +++ b/src/base/base_math.h @@ -0,0 +1,149 @@ +#ifndef BASE_MATH_H +#define BASE_MATH_H + +////////////////////////// +//~ Macros + +#define floor_F32(f) floorf(f) + + +////////////////////////// +//~ Vector types + +//- 2-vectors +typedef union Vec2_S32 Vec2_S32; +union Vec2_S32 +{ + struct + { + S32 x; + S32 y; + }; + S32 v[2]; +}; + +typedef union Vec2_S64 Vec2_S64; +union Vec2_S64 +{ + struct + { + S64 x; + S64 y; + }; + S64 v[2]; +}; + +typedef union Vec2_F32 Vec2_F32; +union Vec2_F32 +{ + struct + { + F32 x; + F32 y; + }; + F32 v[2]; +}; + +//- 3-vectors + +typedef union Vec3_F32 Vec3_F32; +union Vec3_F32 +{ + struct + { + F32 x; + F32 y; + F32 z; + }; + F32 v[3]; +}; + +//- 4-vectors +typedef union Vec4_F32 Vec4_F32; +union Vec4_F32 +{ + struct + { + F32 x; + F32 y; + F32 z; + F32 w; + }; + struct + { + Vec2_F32 xy; + Vec2_F32 zw; + }; + F32 v[4]; +}; + +//- vector macros +#define vec2_F32_from_vec(v) vec2_F32((F32)(v).x, (F32)(v).y); +#define vec2_S32_from_vec(v) vec2_S32((S32)(v).x, (S32)(v).y); +#define vec2_S64_from_vec(v) vec2_S64((S64)(v).x, (S64)(v).y); +////////////////////////// +//~ Matrix types +typedef struct Mat3x3_F32 Mat3x3_F32; +struct Mat3x3_F32 +{ + F32 elements[3][3]; +}; + +typedef struct Mat4x4_F32 Mat4x4_F32; +struct Mat4x4_F32 +{ + F32 elements[4][4]; +}; + +////////////////////////// +//~ Range types + +//- 2D interval +// +typedef union Rng2_F32 Rng2_F32; +union Rng2_F32 +{ + struct + { + Vec2_F32 min; + Vec2_F32 max; + }; + struct + { + Vec2_F32 p0; + Vec2_F32 p1; + }; + struct + { + F32 x0; + F32 y0; + F32 x1; + F32 y1; + }; + Vec2_F32 v[2]; +}; + +//~ Vector functions + +//- Vec2 F32 +root_function Vec2_F32 vec2_F32(F32 x, F32 y); +root_function Vec2_F32 add2_F32(Vec2_F32 a, Vec2_F32 b); +root_function Vec2_F32 sub2_F32(Vec2_F32 a, Vec2_F32 b); + +//- Vec2 S32 +root_function Vec2_S32 vec2_S32(S32 x, S32 y); + +root_function Vec2_S64 vec2_S64(S64 x, S64 y); + +root_function Vec3_F32 vec3_F32(F32 x, F32 y, F32 z); + +root_function Vec4_F32 vec4_F32(F32 x, F32 y, F32 z, F32 w); + +//~ Range functions +root_function Rng2_F32 rng2_F32(Vec2_F32 min, Vec2_F32 max); +root_function Rng2_F32 shift2_F32(Rng2_F32 r, Vec2_F32 v); +root_function Rng2_F32 pad2_F32(Rng2_F32 r, F32 x); +root_function Vec2_F32 dim2_F32(Rng2_F32 rng); +root_function B32 rng2_contains_vec2_F32(Rng2_F32 r, Vec2_F32 x); +root_function Rng2_F32 rng2_intersect_f32(Rng2_F32 a, Rng2_F32 b); +#endif //BASE_MATH_H diff --git a/src/base/base_memory.c b/src/base/base_memory.c new file mode 100644 index 0000000..1482517 --- /dev/null +++ b/src/base/base_memory.c @@ -0,0 +1,163 @@ +#include +#include + +#if !defined(m_reserve) +#error missing definition for 'm_reserve' type: (U64)->void* +#endif +#if !defined(m_commit) +#error missing definition for 'm_commit' type: (void*, U64)->void +#endif +#if !defined(m_decommit) +#error missing definition for 'm_decommit' type: (void*, U64)->void +#endif +#if !defined(m_release) +#error missing definition for 'm_release' type: (void*, U64)->void +#endif + +static Arena *g_scratch_arena = 0; + +root_function void +m_change_memory_noop(void *ptr, U64 size) {} + +// Malloc implementation of the M_Base_memory +root_function void* +m_malloc_reserve(U64 size) { + return malloc(size); +} + +root_function void +m_malloc_release(void *ptr, U64 size) { + free(ptr); +} + +//~ 64-bit memory arena + +root_function Arena +*m_make_arena_reserve(U64 reserve_size) { + Arena *result = 0; + U64 initial_commit_size = ARENA_COMMIT_GRANULARITY; + if (reserve_size >= initial_commit_size) { + void *memory = m_reserve(reserve_size); + // Since we use "header" space we must ensure the initial commit can fit the Arena struct. + Assert(initial_commit_size >= sizeof(Arena)); + m_commit(memory, ARENA_COMMIT_GRANULARITY); + result = (Arena*)memory;//(Arena*)result; <- this has to be mistake in Allen's video.. ? + // After we have pointed to our newly reserved and commited memory, + // we fill in the "header" parts, which are just the members of the arena type. + result->capacity = reserve_size; + result->commit_pos = initial_commit_size; + result->align = 8; // 8-bytes alignment? + result->pos = sizeof(Arena); // Here we point the position to after the Arena "header" section. + } + return result; +} + +root_function Arena* +m_make_arena() { + Arena* result = m_make_arena_reserve(M_DEFAULT_RESERVE_SIZE); + return result; +} + +root_function void* +m_arena_push(Arena *arena, U64 size) { + void *result = 0; + if (arena->pos + size <= arena->capacity) { + /*U8 *base = (U8 *)arena; // Get memory base pointer. + // Adjust by any alignment if necessary. + // Doing modulo ensures we get a number in the 0-align-1 range. + U64 post_align_pos = (arena->pos + (arena->align-1)): + post_align_pos = post_align_pos % arena->align; + // What's happening here? Almost certainly the align will overflow here? + // Are we filling the allocated space backwards or what's up? + // TODO(anton): UNDERSTAND + U64 align = post_align_pos - arena->pos; + result = base + arena->pos + align; + arena->pos += size + align;*/ + // Do Allen4th version until I understand the above. + result = ((U8*) arena) + arena->pos; + arena->pos += size; // increment pos by what we want to push + + U64 p = arena->pos; + U64 commit_p = arena->commit_pos; + if (p > commit_p) { + U64 p_aligned = AlignUpToPow2(p, M_COMMIT_BLOCK_SIZE); + U64 next_commit_p = ClampTop(p_aligned, arena->capacity); // Make sure new commit_p won't overshoot capacity + U64 commit_size = next_commit_p - commit_p; + m_commit((U8 *)arena + commit_p, commit_size); + arena->commit_pos = next_commit_p; + } + } else { + // NOTE(anton): Should implement some fallback but now we fail. + } + return result; +} + +root_function void +m_arena_pop_to(Arena *arena, U64 pos) { + if (pos < arena->pos) { + arena->pos = pos; + + U64 p = arena->pos; + U64 p_aligned = AlignUpToPow2(p, M_COMMIT_BLOCK_SIZE); + U64 next_commit_p = ClampTop(p_aligned, arena->capacity); + + U64 commit_p = arena->commit_pos; + if (next_commit_p < commit_p) { + U64 decommit_size = commit_p - next_commit_p; + m_decommit((U8 *)arena + next_commit_p, decommit_size); + arena->commit_pos = next_commit_p; + } + } +} + +root_function void m_arena_pop(Arena* arena, U64 size) { + U64 min_pos = sizeof(Arena); + U64 size_to_pop = Min(size, arena->pos); + U64 new_pos = arena->pos - size_to_pop; + new_pos = Max(new_pos, min_pos); + m_arena_pop_to(arena, new_pos); +} + +/** Push size and set the memory to zero. */ +root_function void* +m_arena_push_zero(Arena *arena, U64 size) { + void *result = m_arena_push(arena, size); + MemoryZero(result, size); + return result; +} + +root_function void +m_arena_clear(Arena *arena) +{ + // We clear the input arena by popping off everything + // after the actual Arena information. + m_arena_pop_to(arena, sizeof(Arena)); +} + +root_function void +m_arena_align(Arena *arena, U64 pow2_alignment) { + U64 p = arena->pos; + U64 p_aligned = AlignUpToPow2(p, pow2_alignment); + U64 z = p_aligned - p; + if (z > 0) { + m_arena_push(arena, z); + } +} + +root_function void +m_arena_release(Arena* arena) { + m_release(arena, arena->capacity); +} + +root_function ArenaTemp +m_arena_temp_begin(Arena *arena) { + ArenaTemp temp = { 0 }; + temp.arena = arena; + temp.pos = arena->pos; + return temp; +} + +root_function void +m_arena_temp_end(ArenaTemp temp) { + m_arena_pop_to(temp.arena, temp.pos); +} diff --git a/src/base/base_memory.h b/src/base/base_memory.h new file mode 100644 index 0000000..b3be5ab --- /dev/null +++ b/src/base/base_memory.h @@ -0,0 +1,60 @@ +/* date = April 20th 2023 9:43 pm */ + +#ifndef BASE_MEMORY_H +#define BASE_MEMORY_H + +#if !defined(ARENA_COMMIT_GRANULARITY) +#define ARENA_COMMIT_GRANULARITY Kilobytes(4) +#endif + +#if !defined(ARENA_DECOMMIT_THRESHOLD) +#define ARENA_DECOMMIT_THRESHOLD Megabytes(64) +#endif + +#if !defined(M_DEFAULT_RESERVE_SIZE) +#define M_DEFAULT_RESERVE_SIZE Megabytes(512) +#endif + +#if !defined(M_COMMIT_BLOCK_SIZE) +#define M_COMMIT_BLOCK_SIZE Megabytes(64) +#endif + +// We store this information in the header of the allocated memory for the arena!!! +typedef struct Arena Arena; +struct Arena { + U64 pos; + U64 commit_pos; + U64 capacity; + U64 align; +}; + +typedef struct ArenaTemp ArenaTemp; +struct ArenaTemp { + Arena *arena; + U64 pos; +}; + +root_function void m_change_memory_noop(void *ptr, U64 size); + +root_function Arena* m_make_arena_reserve(U64 reserve_size); +root_function Arena* m_make_arena(); + +root_function void m_arena_release(Arena *arena); +root_function void* m_arena_push(Arena *arena, U64 size); +root_function void m_arena_pop_to(Arena *arena, U64 pos); +root_function void m_arena_pop(Arena* arena, U64 size); +root_function void m_arena_align(Arena *arena, U64 pow2_alignment); +root_function void* m_arena_push_zero(Arena *arena, U64 size); +root_function void m_arena_clear(Arena *arena); + +#define PushArrayNoZero(arena, type, count) (type *)m_arena_push((arena), sizeof(type)*(count)) +#define PushArray(arena, type, count) (type *)m_arena_push_zero((arena), sizeof(type)*(count)) +//~ temp arena + +root_function ArenaTemp m_arena_temp_begin(Arena *arena); +root_function void m_arena_temp_end(ArenaTemp temp); + +// TODO(anton): Not sure when I should use this? +#define ArenaTempBlock(arena, name) ArenaTemp name = { 0 }; DeferLoop(name = m_arena_temp_begin(arena), m_arena_temp_end(name)) + +#endif //BASE_MEMORY_H diff --git a/src/base/base_strings.c b/src/base/base_strings.c new file mode 100644 index 0000000..da79cad --- /dev/null +++ b/src/base/base_strings.c @@ -0,0 +1,510 @@ +#include + +//~ Helpers +root_function U64 +calculate_string_C_string_length(char *cstr) { + /*U64 length = 0; + for(char* p = cstr; p != '\0'; p += 1) { + length += 1; + } + return length;*/ + // A cool way to write this is this while loop + U64 length = 0; + for (/* empty here means just while loop*/; + /* While we're not at null terminator */ cstr[length]; + /* Increment */ length += 1); + + // Then we're actually done and just return length; + return length; +} + + +//~ Constructors +root_function String8 +str8(U8 *str, U64 size) { + String8 string; + string.str = str; + string.size = size; + return string; +} + +root_function String8 +str8_range(U8 *first, U8 *one_past_last) { + String8 string; + string.str = first; + string.size = (U64)(one_past_last - first); + return string; +} + +//~ Substrings +//- String8 +root_function String8 +str8_substr(String8 string, U64 first, U64 one_past_last) { + // We get a substring from the range one_past_last - first + U64 min = first; + U64 max = one_past_last; + // Logic to prepare for swithing input to a range instead of first/one_past_last + if (max > string.size) { + max = string.size; + } + if (min > string.size) { + min = string.size; + } + if (min > max) { + U64 swap = min; + min = max; + max = swap; + } + string.size = max - min; + string.str += min; // Increment the pointer of the String8 to the min. + return string; +} + +root_function String8 str8_prefix(String8 string, U64 size) { return str8_substr(string, 0, size); } +root_function String8 str8_chop(String8 string, U64 amount) { return str8_substr(string, 0, string.size-amount); } +root_function String8 str8_suffix(String8 string, U64 size) { return str8_substr(string, string.size-size, string.size); } +root_function String8 str8_skip(String8 string, U64 amount) { return str8_substr(string, amount, string.size); } + +// String16 +// String32 + +//~ Lists +//- String8 +root_function void +str8_list_push_node(String8List *list, String8Node *n) { + QueuePush(list->first, list->last, n); + list->node_count += 1; + list->total_size += n->string.size; +} + +root_function void +str8_list_push_node_front(String8List *list, String8Node *n) { + QueuePushFront(list->first, list->last, n); + list->node_count += 1; + list->total_size += n->string.size; +} + +// Wrapper that pushes the memory for a node onto the arena, and then puts the node in the linked list (in the back). +root_function void +str8_list_push(Arena *arena, String8List *list, String8 string) { + String8Node *n = PushArray(arena, String8Node, 1); + n->string = string; + str8_list_push_node(list, n); +} + +// Wrapper that pushes the memory for a node onto the arena, and then puts the node in the linked list (in the front). +root_function void +str8_list_push_front(Arena *arena, String8List *list, String8 string) { + String8Node *n = PushArray(arena, String8Node, 1); + n->string = string; + str8_list_push_node_front(list, n); +} + +root_function void +str8_list_concat(String8List *list, String8List *to_push) { + // If to_push is a non-zero length String8List, + // we add it to the input list. + if (to_push->first) { + list->node_count += to_push->node_count; + list->total_size += to_push->total_size; + // If the input list's last element is null + // we had a zero length input list, and we just set the input list equal to to_push + if (list->last == 0) { + *list = *to_push; + } else { + // Else we append the to_push list to the input list. + list->last->next = to_push->first; + list->last = to_push->last; + } + } + // TODO(anton): Why are we zeroing the memory here? + MemoryZero(to_push, sizeof(*to_push)); + //LOG_NOT_IMPLEMENTED; +} + +// TODO(anton): Understand this function and write comments about the logic. +root_function String8List +str8_split(Arena *arena, String8 string, int split_count, String8 *splits) { + + String8List list = { 0 }; + + U64 split_start = 0; + for (U64 i = 0; i < string.size; i += 1) + { + B32 was_split = 0; + for (int split_idx = 0; split_idx < split_count; split_idx += 1) + { + B32 match = 0; + if (i + splits[split_idx].size <= string.size) + { + match = 1; + for (U64 split_i = 0; split_i < splits[split_idx].size && i + split_i < string.size; split_i += 1) + { + if (splits[split_idx].str[split_i] != string.str[i + split_i]) + { + match = 0; + break; + + } + } + } + if (match) + { + String8 split_string = str8(string.str + split_start, i - split_start); + str8_list_push(arena, &list, split_string); + split_start = i + splits[split_idx].size; + i += splits[split_idx].size - 1; + was_split = 1; + break; + } + } + + if (was_split == 0 && i == string.size - 1) + { + String8 split_string = str8(string.str + split_start, i + 1 - split_start); + str8_list_push(arena, &list, split_string); + break; + } + } + + return list; +} + +// TODO(anton): Understand this function and write good comments explaining. +root_function String8 +str8_list_join(Arena *arena, String8List list, StringJoin *optional_params) { + // rjf: setup join parameters + StringJoin join = { 0 }; + if (optional_params != 0) + { + MemoryCopy(&join, optional_params, sizeof(join)); + } + + // rjf: calculate size & allocate + U64 sep_count = 0; + if (list.node_count > 1) + { + sep_count = list.node_count - 1; + } + String8 result = { 0 }; + result.size = (list.total_size + join.pre.size + + sep_count*join.sep.size + join.post.size); + result.str = PushArray(arena, U8, result.size + 1); + + // rjf: fill + U8 *ptr = result.str; + MemoryCopy(ptr, join.pre.str, join.pre.size); + ptr += join.pre.size; + for (String8Node *node = list.first; node; node = node->next) + { + MemoryCopy(ptr, node->string.str, node->string.size); + ptr += node->string.size; + if (node != list.last) + { + MemoryCopy(ptr, join.sep.str, join.sep.size); + ptr += join.sep.size; + } + } + MemoryCopy(ptr, join.post.str, join.post.size); + ptr += join.post.size; + + // rjf: add null + result.str[result.size] = 0; + + return result; +} + +//~ Allocation and format strings +root_function String8 +str8_copy(Arena *arena, String8 string) { + String8 result; + result.size = string.size; + result.str = PushArray(arena, U8, string.size + 1); + MemoryCopy(result.str, string.str, string.size); + result.str[string.size] = 0; // TODO(anton): What is this? + return result; +} + +root_function String8 +str8_pushfv(Arena *arena, char *fmt, va_list args) { + + // Might need to try a second time so copy args + va_list args2; + va_copy(args2, args); + + // Try to build string using 1024 bytes + U64 buffer_size = 1024; + U8 *buffer = PushArray(arena, U8, buffer_size); + // The vsnprintf takes the bundled arguments list args and puts the format strings it into buffer. + U64 actual_size = vsnprintf((char*)buffer, buffer_size, fmt, args); + + String8 result = { 0 }; + if (actual_size < buffer_size) { + // The first try worked and we can pop whatever wasn't used from the buffer + // and get our resulting string. + m_arena_pop(arena, buffer_size - actual_size - 1); // -1 because of null terminated in char *fmt? + result = str8(buffer, actual_size); + } else { + // If first try failed we try again with better size + m_arena_pop(arena, buffer_size); + U8 *fixed_buffer = PushArray(arena, U8, actual_size + 1); + U64 final_size = vsnprintf((char*)fixed_buffer, actual_size + 1, fmt, args2); + result = str8(fixed_buffer, final_size); + } + + // va_end to help compiler do its thing. + va_end(args2); + + return result; +} + +root_function String8 +str8_pushf(Arena *arena, char*fmt, ...) { + String8 result = { 0 }; + va_list args; + va_start(args, fmt); + result = str8_pushfv(arena, fmt, args); + va_end(args); + return result; +} + +root_function void +str8_list_pushf(Arena *arena, String8List *list, char *fmt, ...) { + va_list args; + va_start(args, fmt); + String8 string = str8_pushfv(arena, fmt, args); + va_end(args); + str8_list_push(arena, list, string); +} + +//~ Unicode conversions + +#define bitmask1 0x01 // Mask first bit +#define bitmask2 0x03 // Mask 2 bits, 3 = 0x03 = 0000 0011 = 2^2 - 1 +#define bitmask3 0x07 // Mask 3 bits, 7 = 0x07 = 0000 0111 = 2^3 - 1 +#define bitmask4 0x0F // Mask 4 bits, 15 = 0x0F = 0000 1111 = 2^4 - 1 +#define bitmask5 0x1F // Mask 5 bits, 31 = 0x1F = 0001 1111 = 2^5 - 1 +#define bitmask6 0x3F // Mask 6 bits, 63 = 0x3F = 0011 1111 = 2^6 - 1 +#define bitmask7 0x7F // Mask 7 bits, 127 = 0x7F = 0111 1111 = 2^7 - 1 +#define bitmask8 0xFF // Mask 8 bits, 255 = 0xFF = 1111 1111 = 2^8 - 1 + +// Note that we're only decoding valid cases and not handling invalid/errors +root_function DecodeCodepoint +decode_from_utf8(U8 *str, U64 max) { + // This table will give us wheter or not we have a codepoint encoded by one, two, three or four bytes. + local_persist U8 utf8_class[] = { + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 0, 0, 0, 0, + 2, 2, 2, 2, + 3, 3, + 4, + 5 // error + }; + + DecodeCodepoint result = { ~((U32)0), 1 }; + // We'll shift out the lowest 3 bits since those are not important in the decoding. + // This is the byte >> 3 into the static array. + U8 byte = str[0]; + U8 byte_class = utf8_class[byte >> 3]; + + switch (byte_class) { + case 1: { + // Just a single byte encoding. + result.codepoint = byte; // Actually the 8th bit must be zero for valid UTF encoding. + } break; + + case 2: { + if (2 <= max) { + U8 cont_byte = str[1]; + // Check the second byte + if (utf8_class[cont_byte >> 3] == 0) { + // codepoint is 32-bits + // The case with two bytes has byte1 110xxxxx, ie encoded in the last 5 bits. + // and byte2 is 10xxxxxx, encoded in the last 6 bits. So we use mask5 on first byte, shift by 6, + // and mask6 on second byte. + result.codepoint = (byte & bitmask5) << 6; + result.codepoint |= (cont_byte & bitmask6); + result.advance = 2; + } + } + } break; + + case 3: { + if (3 <= max) { + // encoded by 3 bytes, so we have two more cont_bytes. + U8 cont_byte[2] = { str[1], str[2] }; + if (utf8_class[cont_byte[0] >> 3] == 0 && utf8_class[cont_byte[1] >> 3] == 0) { + // For this case the first byte is encoded in the last 4 bits, 1110xxxx + // The second and third is 10xxxxxx (last 6 bits) + result.codepoint = (byte & bitmask4) << 12; + result.codepoint |= (cont_byte[0] & bitmask6) << 6; + result.codepoint |= (cont_byte[1] & bitmask6); + result.advance = 3; + } + } + } break; + + case 4: { + if (4 <= max) { + U8 cont_byte[3] = { str[1], str[2], str[3] }; + if (utf8_class[cont_byte[0] >> 3] == 0 && + utf8_class[cont_byte[1] >> 3] == 0 && + utf8_class[cont_byte[2] >> 3] == 0) { + // Here first byte is encoded in last 3 bits, and byte 2,3,4 are encoded in last 6 bits. + // Thus we shift the first byte by 3*6 = 18 bits into the 32 bit codepoint; + result.codepoint = (byte & bitmask3) << 18; + result.codepoint |= (cont_byte[0] & bitmask6) << 12; + result.codepoint |= (cont_byte[1] & bitmask6) << 6; + result.codepoint |= (cont_byte[2] & bitmask6); + result.advance = 4; + } + } + } break; + } + + return result; +} + +// Encode function +root_function U32 +utf8_from_codepoint(U8* out, U32 codepoint) { + U8 bit8 = 0x80; + U32 advance = 0; + + if (codepoint <= bitmask7 /* 0111 1111 */) { + // We know that the whole encoding is in the last 7 bits, so it's a 1 byte encoding + out[0] = (U8)codepoint; + advance = 1; + + } else if (codepoint <= 0x07FF /*0000 0111 1111 1111*/) { + // The case with two bytes has byte1 110xxxxx, ie encoded in the last 5 bits. + // and byte2 is 10xxxxxx, encoded in the last 6 bits. + out[0] = (bitmask2 << 6) | ((codepoint >> 6) & bitmask5); + out[1] = bit8 | (codepoint & bitmask6); + advance = 2; + + } else if (codepoint <= 0xFFFF /* 1111 1111 1111 1111 */) { + // For this case the first byte is encoded in the last 4 bits, 1110xxxx + // The second and third is 10xxxxxx (last 6 bits) + out[0] = (bitmask3 << 5) | ((codepoint >> 12) & bitmask4); + out[1] = bit8 | ((codepoint >> 6) & bitmask6); + out[2] = bit8 | ((codepoint) & bitmask6); + advance = 3; + + } else if (codepoint <= 0x10FFFF /*0001 0000 1111 1111 1111 1111 */) { + // Here first byte is encoded in last 3 bits, and byte 2,3,4 are encoded in last 6 bits. + // Thus we shift the first byte by 3*6 = 18 bits into the 32 bit codepoint; + out[0] = (bitmask4 << 4) | ((codepoint >> 18) & bitmask3); + out[1] = bit8 | ((codepoint >> 12) & bitmask6); + out[2] = bit8 | ((codepoint >> 6) & bitmask6); + out[3] = bit8 | ((codepoint) & bitmask6); + advance = 4; + } else { + out[0] = '?'; // ERrror? + advance = 1; + } + + return advance; +} + +root_function DecodeCodepoint decode_from_utf16(U16 *str, U64 max) { + DecodeCodepoint result = { ~((U32)0), 1 }; + result.codepoint = str[0]; + result.advance = 1; + // Usually codepoints fit into a single 16 bit chunk. + // But when we're not in the ranges 0x0000 to 0xD7FF and 0xE000 to 0xFFFF, + // we need two 16 bit stores. + // So what we have in str[0] = W1 is the "high surrogate", and + // str[1] = W2 is the "low surrogate". We then get the codepoint U = U' + 0x10000, + // where U' is a 20-bit number with the 10 lower bits from W1 in the high bits, and 10 lower bits of W2 in the lower. + if (max > 1) { + U16 w1 = str[0]; + U16 w2 = str[1]; + if (0xD800 <= w1 && w1 < 0xDC00 && 0xDC00 <= w2 && w2 < 0xE000) { + // Get W1 ten bits + U16 y = w1 - 0xD800; + U16 x = w2 - 0xDC00; + U32 uprim = (y << 10) | x; + result.codepoint = uprim + 0x10000; + result.advance = 2; + } + } + return result; +} + +root_function U32 utf16_from_codepoint(U16* out, U32 codepoint) { + + U32 advance = 1; + if (codepoint == ~((U32)0)) { + // Error? + out[0] = (U16)'?'; + } else if (codepoint < 0x10000) { + // single 16 bit code unit + out[0] = (U16)codepoint; + } else { + // store 20 bits in uprim + U32 uprim = codepoint - 0x10000; + // create W1 + out[0] = 0xD800 + (uprim >> 10); + // 0x03FF = bitmask for 10 lowest bits + // create W2 + out[1] = 0xDC00 + (uprim & 0x03FF); + advance = 2; + } + return advance; +} + +// TODO(anton): understand this and write comments on steps +root_function String8 +str8_from16(Arena *arena, String16 string) { + U64 cap = string.size*3; + U8 *str = PushArray(arena, U8, cap + 1); + U16 *ptr = string.str; + U16 *one_past_last = ptr + string.size; + U64 size = 0; + DecodeCodepoint consume; + for (; ptr < one_past_last;) { + consume = decode_from_utf16(ptr, one_past_last - ptr); + ptr += consume.advance; + size += utf8_from_codepoint(str + size, consume.codepoint); + } + str[size] = 0; + m_arena_pop(arena, cap - size); + return str8(str, size); +} + +root_function String8 +str8_from32(Arena *arena, String32 string) { + +} + +// TODO(anton): understand this and write comments on steps +root_function String16 +str16_from8(Arena* arena, String8 string) { + U64 cap = string.size*2; + U16 *str = PushArray(arena, U16, cap + 1); + U8 *ptr = string.str; + U8 *one_past_last = ptr + string.size; + U64 size = 0; + DecodeCodepoint consume; + for (; ptr < one_past_last;) { + consume = decode_from_utf8(ptr, one_past_last - ptr); + ptr += consume.advance; + size += utf16_from_codepoint(str + size, consume.codepoint); + } + str[size] = 0; + m_arena_pop(arena, 2*(cap - size)); + String16 result; + result.str = str; + result.size = size; + return result; +} + +root_function String32 +str32_from8(Arena *arena, String8 string) { + +} diff --git a/src/base/base_strings.h b/src/base/base_strings.h new file mode 100644 index 0000000..acdaec7 --- /dev/null +++ b/src/base/base_strings.h @@ -0,0 +1,124 @@ +/* date = April 23rd 2023 10:47 am */ + +#ifndef BASE_STRINGS_H +#define BASE_STRINGS_H + +// We decide that the basic string handling will be immutable. +// This means that whatever memory we got when initialising the string is what we have to live with. +// This will give us easy interfaces and work for most cases. +// The downside is that it might not have the best performance, always. +// In such cases we will develop special code for handling the special cases. + +///////////////////////// +//~ Basic string types, lists and arrays + +typedef struct String8 String8; +struct String8 { + U8* str; + U64 size; +}; + +typedef struct String16 String16; +struct String16 { + U16 *str; + U64 size; +}; + +typedef struct String32 String32; +struct String32 { + U32 *str; + U64 size; +}; + +typedef struct String8Node String8Node; +struct String8Node { + String8Node *next; + String8 string; +}; + +typedef struct String8List String8List; +struct String8List { + String8Node *first; + String8Node *last; + U64 node_count; + U64 total_size; +}; + +typedef struct String8Array String8Array; +struct String8Array { + U64 count; + String8 *v; +}; + +///////////////////////// +//~ String operations +typedef struct StringJoin StringJoin; +struct StringJoin { + String8 pre; + String8 sep; + String8 post; +}; + +typedef struct DecodeCodepoint DecodeCodepoint; +struct DecodeCodepoint { + U32 codepoint; + U32 advance; +}; + + +///////////////////////// +//~ String operations +//~ String functions + +//- Helpers +root_function U64 calculate_string_C_string_length(char *cstr); + +//- Constructors +root_function String8 str8(U8 *str, U64 size); +// Get a String8 from a C-string. +#define str8_C(cstring) str8((U8 *)cstring,calculate_string_C_string_length(cstring)) +// Get a String8 from a literal +#define str8_lit(s) str8((U8*)(s), sizeof(s) - 1) // -1 since we don't want null terminated, + // but this still stores the null char for interop with APIs + // that expect Cstrings. +// Specify a Str8 as just its struct members +#define str8_lit_comp(s) {(U8*)(s), sizeof(s)-1} +root_function String8 str8_range(U8 *first, U8 *one_past_last); + +#define str8_struct(ptr) str8((U8 *)(ptr), sizeof(*(ptr))) + +//- Substrings +root_function String8 str8_substr(String8 string, U64 first, U64 one_past_last); +root_function String8 str8_prefix(String8 string, U64 size); +root_function String8 str8_chop(String8 string, U64 amount); +root_function String8 str8_suffix(String8 string, U64 size); +root_function String8 str8_skip(String8 string, U64 amount); + +// Used in format strings! +#define str8_expand(s) (int)((s).size), ((s).str) + +//- Lists +root_function void str8_list_push_node(String8List *list, String8Node *n); +root_function void str8_list_push_node_front(String8List *list, String8Node *n); +root_function void str8_list_push(Arena *arena, String8List *list, String8 string); +root_function void str8_list_push_front(Arena *arena, String8List *list, String8 string); +root_function void str8_list_concat(String8List *list, String8List *to_push); +root_function String8List str8_split(Arena *arena, String8 string, int split_count, String8 *splits); +root_function String8 str8_list_join(Arena *arena, String8List list, StringJoin *optional_params); + +//- Allocation and format strings +root_function String8 str8_copy(Arena *arena, String8 string); +root_function String8 str8_pushfv(Arena *arena, char *fmt, va_list args); +root_function String8 str8_pushf(Arena *arena, char* fmt, ...); +root_function void str8_list_pushf(Arena *arena, String8List *list, char *fmt, ...); + +//~ Unicode conversions +root_function DecodeCodepoint decode_from_utf8(U8 *str, U64 max); +root_function U32 utf8_from_codepoint(U8* out, U32 codepoint); +root_function DecodeCodepoint decode_from_utf16(U16 *str, U64 max); +root_function U32 utf16_from_codepoint(U16* out, U32 codepoint); +root_function String8 str8_from16(Arena *arena, String16 string); +root_function String8 str8_from32(Arena *arena, String32 string); +root_function String16 str16_from8(Arena* arena, String8 string); +root_function String32 str32_from8(Arena *arena, String8 string); +#endif //BASE_STRINGS_H diff --git a/src/base/base_thread_context.c b/src/base/base_thread_context.c new file mode 100644 index 0000000..124d84f --- /dev/null +++ b/src/base/base_thread_context.c @@ -0,0 +1,82 @@ + +root_function ThreadContext +thread_context_alloc(void) +{ + ThreadContext result = { 0 }; + + for (U64 arena_index = 0; arena_index < ArrayCount(result.arenas); arena_index += 1) + { + result.arenas[arena_index] = m_make_arena_reserve(Gigabytes(8)); + } + return result; +} + +root_function void +thread_context_release(ThreadContext *context) +{ + for (U64 arena_index = 0; arena_index < ArrayCount(context->arenas); arena_index += 1) + { + m_arena_release(context->arenas[arena_index]); + } +} + + +per_thread ThreadContext *tl_thread_context; + +no_name_mangle void +thread_context_set(ThreadContext *context) +{ + tl_thread_context = context; +} + +no_name_mangle ThreadContext * +thread_context_get(void) +{ + return tl_thread_context; +} + + +root_function B32 is_main_thread(void) +{ + ThreadContext *context = thread_context_get(); + return context->is_main_thread; +} + +root_function ArenaTemp +scratch_get(Arena **conflicts, U64 conflict_count) +{ + ArenaTemp scratch = { 0 }; + ThreadContext *thread_context = thread_context_get(); + for (U64 arena_index = 0; arena_index < ArrayCount(thread_context->arenas); arena_index += 1) + { + B32 is_conflicting = 0; + for(Arena **conflict = conflicts; conflict < conflicts+conflict_count; conflict += 1) + { + if(*conflict == thread_context->arenas[arena_index]) + { + is_conflicting = 1; + break; + } + } + if(is_conflicting == 0) + { + scratch.arena = thread_context->arenas[arena_index]; + scratch.pos = scratch.arena->pos; + break; + } + } + return scratch; +} + +root_function void +base_main_thread_entry(void (*entry)(void), U64 argument_count, char **arguments) +{ + // Here we get memory for the thread arenas, and notify that it's a main thread. + ThreadContext thread_context = thread_context_alloc(); + thread_context.is_main_thread = 1; + // Here we set it to the global thread context variable + thread_context_set(&thread_context); + // Then call the entry point function for our program + entry(); + thread_context_release(&thread_context); +} \ No newline at end of file diff --git a/src/base/base_thread_context.h b/src/base/base_thread_context.h new file mode 100644 index 0000000..42a0776 --- /dev/null +++ b/src/base/base_thread_context.h @@ -0,0 +1,39 @@ +/* date = April 29th 2023 9:18 pm */ + +#ifndef BASE_THREAD_CONTEXT_H +#define BASE_THREAD_CONTEXT_H + +typedef struct ThreadContext ThreadContext; +struct ThreadContext +{ + Arena *arenas[2]; // WHy 2 arenas? + char *file_name; + U64 line_number; + U8 thread_name[32]; + U64 thread_name_size; + B32 is_main_thread; +}; + +//root_function ThreadContext make_thread_context(void); + +root_function ThreadContext thread_context_alloc(void); +root_function void thread_context_release(ThreadContext *context); + +no_name_mangle void +thread_context_set(ThreadContext *context); +no_name_mangle ThreadContext * +thread_context_gett(void); + + +root_function B32 is_main_thread(void); + +//~ scratch memory +root_function ArenaTemp scratch_get(Arena **conflicts, U64 conflict_count); +#define scratch_release(temp) m_arena_temp_end(temp) + +//~ entry +// Takes a function pointer to the app entry function. +root_function void +base_main_thread_entry(void (*entry)(void), U64 argument_count, char **arguments); + +#endif //BASE_THREAD_CONTEXT_H diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..12f8dbb --- /dev/null +++ b/src/main.c @@ -0,0 +1,453 @@ +#include + +#define ENABLE_LOGGING 1 +#if ENABLE_LOGGING +#define LOG(msg) { OutputDebugString(msg); } +#else +#define LOG(msg) +#endif + +// --- +// Header includes +#include "base/base_inc.h" +#include "os/os_inc.h" + +// --- +// .C includes +#include "base/base_inc.c" +#include "os/os_inc.c" +#include "os/os_entry_point.c" + +#define grid_file_path_bin "D:\\dev\\hf_again\\out\\grid.bin" +#define grid_file_path "D:\\dev\\hf_again\\out\\grid.dat" +#define knotpoints_file_path "D:\\dev\\hf_again\\out\\knotpoints.dat" +#define bspline_array_file_path "D:\\dev\\hf_again\\out\\bsplines.dat" + +// Complex number with double precision +typedef struct Z64 Z64; +struct Z64 +{ + F64 re; + F64 im; +}; + +typedef struct Grid Grid; +struct Grid +{ + F64 start; + F64 end; + U32 num_steps; + F64 *points; +}; + + +typedef struct BSplineCtx BSplineCtx; +struct BSplineCtx +{ + Arena* arena; + U32 order; + F64 *knotpoints; + U32 num_knotpoints; + U32 num_bsplines; + U32 num_phys_points; + F64 *bsplines; +}; + +typedef struct Orbital Orbital; +struct Orbital +{ + U32 n; + U32 l; + U32 j; +}; + + +typedef struct Atom Atom; +struct Atom +{ + U32 N; + Orbital *orbitals; +}; + + +//~ Globals +global Grid g_grid = {0}; +global BSplineCtx g_bspline_ctx = {0}; + +function void +write_array_binary_F64(String8 path_to_file, F64 *values, U32 array_size) +{ + OS_Handle file_handle = OS_file_open(OS_AccessFlag_Write | OS_AccessFlag_CreateNew, + path_to_file); + { + ArenaTemp scratch = scratch_get(0, 0); + String8List list = {0}; + String8 temp = {0}; + temp.str = (U8*)values; + temp.size = sizeof(F64)*array_size; + str8_list_push(scratch.arena, &list, temp); + OS_file_write(scratch.arena, file_handle, 0, list, 0); + + String8List log_list = {0}; + str8_list_push(scratch.arena, &log_list, str8_lit("Wrote binary array data to")); + str8_list_push(scratch.arena, &log_list, path_to_file); + StringJoin join = {0}; + join.sep = str8_lit(" "); + join.post = str8_lit("\n"); + String8 log_msg = str8_list_join(scratch.arena, log_list, &join); + LOG(log_msg.str); + } + OS_file_close(file_handle); +} + +function void +write_string_list_to_file(Arena *arena, String8 path, String8List *list) +{ + + OS_Handle file_handle = OS_file_open(OS_AccessFlag_Write | OS_AccessFlag_CreateNew, + path); + OS_file_write(arena, file_handle, 0, *list, 0); + + U32 debug = 1; + if(debug) + { + String8List log_list = {0}; + str8_list_push(arena, &log_list, str8_lit("Wrote array to")); + str8_list_push(arena, &log_list, path); + StringJoin join = {0}; + join.sep = str8_lit(" "); + join.post = str8_lit("\n"); + String8 log_msg = str8_list_join(arena, log_list, &join); + LOG(log_msg.str); + } + OS_file_close(file_handle); +} + + +function void +write_array_F64(String8 path_to_file, F64 *values, U32 array_size, char* fmt) +{ + ArenaTemp scratch = scratch_get(0, 0); + String8List list = {0}; + for(U32 i = 0; i < array_size; i++) + { + str8_list_pushf(scratch.arena, &list, fmt, values[i]); + } + write_string_list_to_file(scratch.arena, path_to_file, &list); +} + +function F64 +bspline_recursion(F64 x, U32 k, U32 i) +{ + F64 *t = g_bspline_ctx.knotpoints; + + if(k == 1) + { + if(i == g_bspline_ctx.num_bsplines-1 && x == g_grid.end) + { + // TODO(anton): + // This is like a hack to get the last bspline to be 1 at the last point. + // I dont get how the Cox-de Boor recursion formula can force the last bspline + // to unity at the last point, actually. I have to check this. + return 1.0; + } + else if(i < g_bspline_ctx.num_bsplines && (x >= t[i] && x < t[i+1])) + { + return 1.0; + } else { + return 0.0; + } + } + else + { + F64 recursion1 = bspline_recursion(x, k-1, i); + F64 term1_enum = (x - t[i]); + F64 term1_denom = (t[i+k-1] - t[i]); + F64 term1 = recursion1 > 0.0 ? (term1_enum/term1_denom)*recursion1 : 0.0; + + F64 recursion2 = bspline_recursion(x, k-1, i+1); + F64 term2_enum = (t[i+k] - x); + F64 term2_denom = (t[i+k] - t[i+1]); + F64 term2 = recursion2 > 0.0 ? (term2_enum/term2_denom)*recursion2 : 0.0; + + return term1 + term2; + } + + +} + + +function F64 +get_bspline_F64(F64 x_coord, U32 index) +{ + U32 k = g_bspline_ctx.order; + F64 out = bspline_recursion(x_coord, k, index); + return out; +} + +function void +set_up_grid(Arena *arena) +{ + g_grid.start = 0.0; + g_grid.end = 10.0; + g_grid.num_steps = 100; + + g_grid.points = PushArray(arena, F64, g_grid.num_steps); + F64 step_size = (g_grid.end-g_grid.start)/(F64)g_grid.num_steps; + g_grid.points[0] = g_grid.start; + g_grid.points[g_grid.num_steps-1] = g_grid.end; + for(U32 i = 1; i < g_grid.num_steps-1; i++) + { + g_grid.points[i] = g_grid.points[i-1] + step_size; + } + +} + + +function void +set_up_bspline_context(Arena* arena) +{ + // Create knotpoint sequence. + U32 k = 4; + U32 bspl_N = 14; + g_bspline_ctx.order = k; + g_bspline_ctx.num_knotpoints = bspl_N; + g_bspline_ctx.num_bsplines = bspl_N-k; + g_bspline_ctx.num_phys_points = bspl_N-(2*k)+2; // Remove k points at each end, and then add back the first and last points. + g_bspline_ctx.arena = arena; + g_bspline_ctx.knotpoints = PushArray(arena, F64, g_bspline_ctx.num_knotpoints); + // Set up physical points; + F64 delta = (g_grid.end-g_grid.start)/(g_bspline_ctx.num_phys_points-1); + // Set ghost points including first physical + U32 phys_point_last_index = g_bspline_ctx.num_phys_points + k-1; + for(U32 i = 0; i < k; i++) + { + g_bspline_ctx.knotpoints[i] = g_grid.start; + } + for(U32 i = k; i < phys_point_last_index; i++) + { + g_bspline_ctx.knotpoints[i] = g_bspline_ctx.knotpoints[i-1] + delta; + } + // Set the last points + F64 last_physical = g_grid.end; + for(U32 i = phys_point_last_index; i < g_bspline_ctx.num_knotpoints; i++) + { + g_bspline_ctx.knotpoints[i] = last_physical; + } +} + + +function void +write_bsplines_to_matrix_F64(Arena *arena) +{ + + U64 num_bsplines = g_bspline_ctx.num_bsplines; + U64 k = g_bspline_ctx.order; + F64 *t = g_bspline_ctx.knotpoints; + U32 grid_size = g_grid.num_steps; + + // For sanity check we make the first 4 bsplines by hand. + { + F64 *bspl0 = PushArray(arena, F64, grid_size); + F64 *bspl1 = PushArray(arena, F64, grid_size); + F64 *bspl2 = PushArray(arena, F64, grid_size); + F64 *bspl3 = PushArray(arena, F64, grid_size); + F64 *bspl9 = PushArray(arena, F64, grid_size); + for(U32 i = 0; i < grid_size; i++) + { + F64 x = g_grid.points[i]; + bspl0[i] = get_bspline_F64(x, 0); + bspl1[i] = get_bspline_F64(x, 1); + bspl2[i] = get_bspline_F64(x, 2); + bspl3[i] = get_bspline_F64(x, 3); + bspl9[i] = get_bspline_F64(x, 9); + } + + F64 test = get_bspline_F64(g_grid.points[grid_size-1], 9); + + write_array_F64(str8_lit("D:\\dev\\eigsol_gpu\\out\\bspline0.dat"), bspl0, grid_size, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\eigsol_gpu\\out\\bspline1.dat"), bspl1, grid_size, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\eigsol_gpu\\out\\bspline2.dat"), bspl2, grid_size, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\eigsol_gpu\\out\\bspline3.dat"), bspl3, grid_size, "%13.6e\n"); + write_array_F64(str8_lit("D:\\dev\\eigsol_gpu\\out\\bspline9.dat"), bspl9, grid_size, "%13.6e\n"); + } + + g_bspline_ctx.bsplines = PushArray(arena, F64, grid_size*num_bsplines); + for(U32 i = 0; i < g_grid.num_steps; i++) + { + for(U32 j = 0; j < num_bsplines; j++) + { + U32 index = g_grid.num_steps * i + j; + g_bspline_ctx.bsplines[index] = get_bspline_F64(g_grid.points[i], j); + } + } + + ArenaTemp scratch = scratch_get(0, 0); + String8 bspline_filename = str8_lit("D:\\dev\\eigsol_gpu\\out\\Bsplines.dat"); + + // First line is just the bspline indices. + String8List first_line_list = {0}; + StringJoin join = {0}; + join.sep = str8_lit("\t\t"); + for(U32 i = 0; i < num_bsplines; i++) + { + str8_list_pushf(scratch.arena, &first_line_list, "%i", i); + } + String8 first_line = str8_list_join(scratch.arena, first_line_list, &join); + + String8List bspline_array_list = {0}; + for(U32 i = 0; i < g_grid.num_steps; i++) + { + String8List row = {0}; + for(U32 j = 0; j < num_bsplines; j++) + { + U32 index = g_grid.num_steps * i + j; + F64 bspline_value = g_bspline_ctx.bsplines[index]; + str8_list_pushf(scratch.arena, &row, "%13.6e", bspline_value); + } + StringJoin bspl_join = {0}; + bspl_join.sep = str8_lit(" "); + bspl_join.post = str8_lit("\n"); + String8 row_joined = str8_list_join(scratch.arena, row, &bspl_join); + str8_list_push(scratch.arena, &bspline_array_list, row_joined); + } + + write_string_list_to_file(scratch.arena, str8_lit(bspline_array_file_path), &bspline_array_list); + +} + +function void bspline_things() +{ + Arena *arena = m_make_arena(); + + //- Set up grid and write to file. + set_up_grid(arena); + write_array_binary_F64(str8_lit(grid_file_path_bin), g_grid.points, g_grid.num_steps); + write_array_F64(str8_lit(grid_file_path), g_grid.points, g_grid.num_steps, "%13.6e\n"); + + //- The BSpline context is the knotpoints and the BSpline order etc. + set_up_bspline_context(arena); + write_array_F64(str8_lit(knotpoints_file_path), + g_bspline_ctx.knotpoints, g_bspline_ctx.num_knotpoints, + "%13.6e\n"); + + //- Then we generate the BSplines and save them off for reference and debugging. + write_bsplines_to_matrix_F64(arena); + +} + + + +/* Auxiliary routine: printing a matrix */ +function void +print_matrix_Z64( char* desc, int m, int n, Z64* a, int lda ) { + ArenaTemp scratch = scratch_get(0,0); + int i, j; + String8 newline = str8_lit("\n"); + String8 header = str8_pushf(scratch.arena, "\n %s\n", desc ); + LOG(header.str); + //printf("\n %s \n", desc); + for( i = 0; i < m; i++ ) { + for( j = 0; j < n; j++ ) { + String8 outstr = str8_pushf(scratch.arena, " (%6.2f,%6.2f)", a[i+j*lda].re, a[i+j*lda].im ); + LOG(outstr.str); + //printf(" (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag ); + } + LOG(newline.str); + //printf("\n"); + } +} + + + +function void EntryPoint(void) +{ + + OS_InitReceipt os_receipt = OS_init(); + OS_InitGfxReceipt os_gfx_receipt = OS_gfx_init(os_receipt); + + Arena *arena = m_make_arena(); + + U32 N = 4; + Z64 *main_A = PushArray(arena, Z64, N*N); + main_A[0] = (Z64){-3.84, 2.25}; + main_A[1] = (Z64){-0.66, 0.83}; + main_A[2] = (Z64){-3.99, -4.73}; + main_A[3] = (Z64){ 7.74, 4.18}; + main_A[4] = (Z64){-8.94, -4.75}; + main_A[5] = (Z64){-4.40, -3.82}; + main_A[6] = (Z64){-5.88, -6.60}; + main_A[7] = (Z64){ 3.66, -7.53}; + main_A[8] = (Z64){ 8.95, -6.53}; + main_A[9] = (Z64){-3.50, -4.26}; + main_A[10] = (Z64){-3.36, -0.40}; + main_A[11] = (Z64){ 2.58, 3.60}; + main_A[12] = (Z64){-9.87, 4.82}; + main_A[13] = (Z64){-3.15, 7.36}; + main_A[14] = (Z64){-0.75, 5.23}; + main_A[15] = (Z64){ 4.59, 5.41}; + + LOG("\n\n---- Calling Intel MKL zgeev test (Using Z64 instead of MKL_Complex16 etc) ---- \n\n"); + + { + S32 n = N, lda = N, ldvl = N, ldvr = N, info, lwork; + Z64 wkopt; + Z64 *work; + + F64 *rwork = PushArray(arena, F64, 2*N); + Z64 *w = PushArray(arena, Z64, N); + Z64 *vl = PushArray(arena, Z64, N*N); + Z64 *vr = PushArray(arena, Z64, N*N); + Z64 *a = PushArray(arena, Z64, N*N); + for(U32 j = 0; j < N; j++) + { + for(U32 i = 0; i < N; i++) + { + U32 index = i*N+j; + + a[index] = main_A[index]; + } + + } + + + /* Executable statements */ + LOG( " ZGEEV Example Program Results\n" ); + /* Query and allocate the optimal workspace */ + lwork = -1; + zgeev( "Vectors", "Vectors", &n, a, &lda, w, vl, &ldvl, vr, &ldvr, + &wkopt, &lwork, rwork, &info ); + lwork = (S32)wkopt.re; + work = (Z64*)malloc( lwork*sizeof(Z64) ); + /* Solve eigenproblem */ + zgeev( "Vectors", "Vectors", &n, a, &lda, w, vl, &ldvl, vr, &ldvr, + work, &lwork, rwork, &info ); + /* Check for convergence */ + if( info > 0 ) { + LOG( "The algorithm failed to compute eigenvalues.\n" ); + exit( 1 ); + } + /* Print eigenvalues */ + print_matrix_Z64( "Eigenvalues", 1, n, w, 1 ); + /* Print left eigenvectors */ + print_matrix_Z64( "Left eigenvectors", n, n, vl, ldvl ); + /* Print right eigenvectors */ + print_matrix_Z64( "Right eigenvectors", n, n, vr, ldvr ); + /* Free workspace */ + free( (void*)work ); + } /* End of ZGEEV Example */ + + + + LOG("\n\n--- End of EntryPoint, exiting program. \n\n"); +} + + + function void +testing_MKL() +{ + + test_mkl_zgeev(); + test_mkl_dsyevd(); + +} diff --git a/src/os/os_core.c b/src/os/os_core.c new file mode 100644 index 0000000..24db847 --- /dev/null +++ b/src/os/os_core.c @@ -0,0 +1,5 @@ +root_function B32 +OS_handle_match(OS_Handle a, OS_Handle b) +{ + return a.u64[0] == b.u64[0]; +} \ No newline at end of file diff --git a/src/os/os_core.h b/src/os/os_core.h new file mode 100644 index 0000000..8b66327 --- /dev/null +++ b/src/os/os_core.h @@ -0,0 +1,90 @@ +#ifndef OS_CORE_H +#define OS_CORE_H + +typedef U32 OS_AccessFlags; +enum { + OS_AccessFlag_Read = (1<<0), + OS_AccessFlag_Write = (1<<1), + OS_AccessFlag_Execute = (1<<2), + OS_AccessFlag_CreateNew = (1<<3), + OS_AccessFlag_All = 0xFFFFFFFF, +}; + +typedef struct OS_Handle OS_Handle; +struct OS_Handle { + U64 u64[1]; +}; + +typedef enum OS_ErrorCode +{ + OS_ErrorCode_Null, + OS_ErrorCode_COUNT +} OS_ErrorCode; + +typedef struct OS_Error OS_Error; +struct OS_Error { + OS_Error *next; + OS_ErrorCode code; +}; + +typedef struct OS_ErrorList OS_ErrorList; +struct OS_ErrorList { + OS_Error *first; + OS_Error *last; + U64 count; +}; + +typedef struct OS_InitReceipt OS_InitReceipt; +struct OS_InitReceipt +{ + U64 u64[1]; +}; + +typedef U64 OS_Timestamp; + +typedef struct OS_FileAttributes OS_FileAttributes; +struct OS_FileAttributes +{ + U64 size; + OS_Timestamp last_modified; +}; + +//////////////////////////////// +//~ Helpers +root_function B32 OS_handle_match(OS_Handle a, OS_Handle b); + +//////////////////////////////// +//~ @os_per_backend Memory + +root_function U64 OS_page_size(void); +root_function void* OS_reserve(U64 size); +root_function void OS_release(void *ptr, U64 size); +root_function void OS_commit(void *ptr, U64 size); +root_function void OS_decommit(void *ptr, U64 size); +root_function void OS_set_memory_access_flags(void *ptr, U64 size, OS_AccessFlags flags); + +//////////////////////////////// +//~ Thread and process types + +typedef void OS_Thread_Function(void *params); // void function pointer ? + +root_function OS_InitReceipt OS_init(void); +root_function void OS_thread_context_set(void *ptr); +root_function void* OS_thread_context_get(void); + +//////////////////////////////// +//~ @os_per_backend File System +root_function OS_Handle OS_file_open(OS_AccessFlags access_flags, String8 path); +root_function void OS_file_close(OS_Handle file); +root_function String8 OS_file_read(Arena *arena, OS_Handle file, U64 min, U64 max); +// We supply whatever we want to write as a String8List, +// so we can pull data from different places with no intermediate buffer. +root_function void OS_file_write(Arena *arena, OS_Handle file, U64 off, + String8List data, OS_ErrorList *out_errors); +root_function OS_FileAttributes OS_attributes_from_file(OS_Handle file); + +/////////////////////////////// +//~ @os_per_backend Numerical value to string conversion +root_function String8List OS_to_string_list_F64(Arena *arena, F64 *values, U32 values_size, String8 format); + +#endif /* OS_CORE_H */ diff --git a/src/os/os_entry_point.c b/src/os/os_entry_point.c new file mode 100644 index 0000000..cab9923 --- /dev/null +++ b/src/os/os_entry_point.c @@ -0,0 +1,5 @@ +#if OS_WINDOWS +#include "win32/os_entry_point_win32.c" +#else +#error Entry point not defined +#endif \ No newline at end of file diff --git a/src/os/os_gfx.c b/src/os/os_gfx.c new file mode 100644 index 0000000..4cb7229 --- /dev/null +++ b/src/os/os_gfx.c @@ -0,0 +1,20 @@ +#include "os_gfx_meta.c" + + +root_function B32 +OS_key_press(OS_EventList *events, OS_Handle window, OS_Key key) +{ + B32 result = 0; + for(OS_Event *e = events->first; e != 0; e = e->next) + { + + if(e->kind == OS_EventKind_Press && OS_handle_match(window, e->window) && e->key == key) + // TODO(anton): modifiers + { + OS_consume_event(events, e); + result = 1; + break; + } + } + return result; +} \ No newline at end of file diff --git a/src/os/os_gfx.h b/src/os/os_gfx.h new file mode 100644 index 0000000..6eea74e --- /dev/null +++ b/src/os/os_gfx.h @@ -0,0 +1,94 @@ +#ifndef OS_GFX_H +#define OS_GFX_H + +typedef U32 OS_Window_Flags; + +typedef struct OS_InitGfxReceipt OS_InitGfxReceipt; +struct OS_InitGfxReceipt +{ + U64 u64[1]; +}; + +#include "os_gfx_meta.h" + +//////////////////////////////// +//~ Cursor Types + +typedef enum OS_Cursor +{ + OS_Cursor_Pointer, + OS_Cursor_IBar, + OS_Cursor_LeftRight, + OS_Cursor_UpDown, + OS_Cursor_DownRight, + OS_Cursor_UpRight, + OS_Cursor_UpDownLeftRight, + OS_Cursor_HandPoint, + OS_Cursor_Disabled, + OS_Cursor_COUNT, +} +OS_Cursor; + +//////////////////////////////// +//~ Events +typedef enum OS_EventKind +{ + OS_EventKind_Null, + OS_EventKind_Press, + OS_EventKind_Release, + OS_EventKind_MouseMove, + OS_EventKind_Text, + OS_EventKind_Scroll, + OS_EventKind_WindowLoseFocus, + OS_EventKind_WindowClose, + OS_EventKind_FileDrop, + OS_EventKind_Wakeup, + OS_EventKind_COUNT +} +OS_EventKind; + + +typedef struct OS_Event OS_Event; +struct OS_Event +{ + OS_Event *next; + OS_Event *prev; + OS_Handle window; + OS_EventKind kind; + //OS_Modifiers modifiers; + OS_Key key; + U32 character; + Vec2_F32 position; + Vec2_F32 scroll; + String8 path; +}; + +typedef struct OS_EventList OS_EventList; +struct OS_EventList +{ + OS_Event *first; + OS_Event *last; + U64 count; +}; + +//////////////////////////////// +//~ Event Helpers +root_function U64 OS_character_from_key(OS_Key key); +root_function String8 OS_string_from_event(Arena *arena, OS_Event *event); +root_function B32 OS_key_press(OS_EventList *events, OS_Handle window, OS_Key key); +root_function B32 OS_key_release(OS_EventList *events, OS_Handle window); +root_function B32 OS_text_codepoint(OS_EventList *events, OS_Handle window, U32 codepoint); +root_function Vec2_F32 OS_mouse_from_window(OS_Handle handle); + +//////////////////////////////// +//~ @os_per_backend Init and windowing +root_function OS_InitGfxReceipt OS_gfx_init(OS_InitReceipt os_init_receipt); +root_function OS_Handle OS_window_open(OS_Window_Flags flags, Vec2_S64 size, String8 title); +root_function Rng2_F32 OS_client_rect_from_window(OS_Handle window_handle); + +//////////////////////////////// +//~ @os_per_backend Events +root_function OS_EventList OS_get_events(Arena *arena); +root_function void OS_consume_event(OS_EventList *events, OS_Event *event); + +#endif /* OS_GFX_H */ diff --git a/src/os/os_gfx_meta.c b/src/os/os_gfx_meta.c new file mode 100644 index 0000000..5df8d70 --- /dev/null +++ b/src/os/os_gfx_meta.c @@ -0,0 +1,95 @@ +String8 os_g_key_string_table[92] = +{ + str8_lit_comp("Null"), + str8_lit_comp("Escape"), + str8_lit_comp("F1"), + str8_lit_comp("F2"), + str8_lit_comp("F3"), + str8_lit_comp("F4"), + str8_lit_comp("F5"), + str8_lit_comp("F6"), + str8_lit_comp("F7"), + str8_lit_comp("F8"), + str8_lit_comp("F9"), + str8_lit_comp("F10"), + str8_lit_comp("F11"), + str8_lit_comp("F12"), + str8_lit_comp("F13"), + str8_lit_comp("F14"), + str8_lit_comp("F15"), + str8_lit_comp("F16"), + str8_lit_comp("F17"), + str8_lit_comp("F18"), + str8_lit_comp("F19"), + str8_lit_comp("F20"), + str8_lit_comp("F21"), + str8_lit_comp("F22"), + str8_lit_comp("F23"), + str8_lit_comp("F24"), + str8_lit_comp("Grave Accent"), + str8_lit_comp("0"), + str8_lit_comp("1"), + str8_lit_comp("2"), + str8_lit_comp("3"), + str8_lit_comp("4"), + str8_lit_comp("5"), + str8_lit_comp("6"), + str8_lit_comp("7"), + str8_lit_comp("8"), + str8_lit_comp("9"), + str8_lit_comp("Minus"), + str8_lit_comp("Equal"), + str8_lit_comp("Backspace"), + str8_lit_comp("Delete"), + str8_lit_comp("Tab"), + str8_lit_comp("A"), + str8_lit_comp("B"), + str8_lit_comp("C"), + str8_lit_comp("D"), + str8_lit_comp("E"), + str8_lit_comp("F"), + str8_lit_comp("G"), + str8_lit_comp("H"), + str8_lit_comp("I"), + str8_lit_comp("J"), + str8_lit_comp("K"), + str8_lit_comp("L"), + str8_lit_comp("M"), + str8_lit_comp("N"), + str8_lit_comp("O"), + str8_lit_comp("P"), + str8_lit_comp("Q"), + str8_lit_comp("R"), + str8_lit_comp("S"), + str8_lit_comp("T"), + str8_lit_comp("U"), + str8_lit_comp("V"), + str8_lit_comp("W"), + str8_lit_comp("X"), + str8_lit_comp("Y"), + str8_lit_comp("Z"), + str8_lit_comp("Space"), + str8_lit_comp("Enter"), + str8_lit_comp("Ctrl"), + str8_lit_comp("Shift"), + str8_lit_comp("Alt"), + str8_lit_comp("Up"), + str8_lit_comp("Left"), + str8_lit_comp("Down"), + str8_lit_comp("Right"), + str8_lit_comp("Page Up"), + str8_lit_comp("Page Down"), + str8_lit_comp("Home"), + str8_lit_comp("End"), + str8_lit_comp("Forward Slash"), + str8_lit_comp("Period"), + str8_lit_comp("Comma"), + str8_lit_comp("Quote"), + str8_lit_comp("Left Bracket"), + str8_lit_comp("Right Bracket"), + str8_lit_comp("Insert"), + str8_lit_comp("Left Mouse Button"), + str8_lit_comp("Middle Mouse Button"), + str8_lit_comp("Right Mouse Button"), + str8_lit_comp("Semicolon"), +}; diff --git a/src/os/os_gfx_meta.h b/src/os/os_gfx_meta.h new file mode 100644 index 0000000..1cc6b07 --- /dev/null +++ b/src/os/os_gfx_meta.h @@ -0,0 +1,114 @@ +/* date = April 5th 2024 7:56 pm */ + +#ifndef OS_GFX_META_H +#define OS_GFX_META_H + +// TODO(anton): This is generated code in Ryans codebase. I just copy it here now and make some generation +// myself later. + + +typedef enum OS_Key +{ + OS_Key_Null, + OS_Key_Esc, + OS_Key_F1, + OS_Key_F2, + OS_Key_F3, + OS_Key_F4, + OS_Key_F5, + OS_Key_F6, + OS_Key_F7, + OS_Key_F8, + OS_Key_F9, + OS_Key_F10, + OS_Key_F11, + OS_Key_F12, + OS_Key_F13, + OS_Key_F14, + OS_Key_F15, + OS_Key_F16, + OS_Key_F17, + OS_Key_F18, + OS_Key_F19, + OS_Key_F20, + OS_Key_F21, + OS_Key_F22, + OS_Key_F23, + OS_Key_F24, + OS_Key_GraveAccent, + OS_Key_0, + OS_Key_1, + OS_Key_2, + OS_Key_3, + OS_Key_4, + OS_Key_5, + OS_Key_6, + OS_Key_7, + OS_Key_8, + OS_Key_9, + OS_Key_Minus, + OS_Key_Equal, + OS_Key_Backspace, + OS_Key_Delete, + OS_Key_Tab, + OS_Key_A, + OS_Key_B, + OS_Key_C, + OS_Key_D, + OS_Key_E, + OS_Key_F, + OS_Key_G, + OS_Key_H, + OS_Key_I, + OS_Key_J, + OS_Key_K, + OS_Key_L, + OS_Key_M, + OS_Key_N, + OS_Key_O, + OS_Key_P, + OS_Key_Q, + OS_Key_R, + OS_Key_S, + OS_Key_T, + OS_Key_U, + OS_Key_V, + OS_Key_W, + OS_Key_X, + OS_Key_Y, + OS_Key_Z, + OS_Key_Space, + OS_Key_Enter, + OS_Key_Ctrl, + OS_Key_Shift, + OS_Key_Alt, + OS_Key_Up, + OS_Key_Left, + OS_Key_Down, + OS_Key_Right, + OS_Key_PageUp, + OS_Key_PageDown, + OS_Key_Home, + OS_Key_End, + OS_Key_ForwardSlash, + OS_Key_Period, + OS_Key_Comma, + OS_Key_Quote, + OS_Key_LeftBracket, + OS_Key_RightBracket, + OS_Key_Insert, + OS_Key_MouseLeft, + OS_Key_MouseMiddle, + OS_Key_MouseRight, + OS_Key_Semicolon, + OS_Key_COUNT, +} +OS_Key; + +root_global String8 os_g_key_string_table[92]; + + + + + +#endif //OS_GFX_META_H diff --git a/src/os/os_inc.c b/src/os/os_inc.c new file mode 100644 index 0000000..9fa5346 --- /dev/null +++ b/src/os/os_inc.c @@ -0,0 +1,9 @@ +#include "os/os_core.c" +#include "os/os_gfx.c" + +#if OS_WINDOWS +#include "win32/os_core_win32.c" +#include "win32/os_gfx_win32.c" +#else +# error OS layer for this platform not implemented yet +#endif \ No newline at end of file diff --git a/src/os/os_inc.h b/src/os/os_inc.h new file mode 100644 index 0000000..876d7b4 --- /dev/null +++ b/src/os/os_inc.h @@ -0,0 +1,28 @@ +#ifndef OS_INC_H +#define OS_INC_H + +// TODO(anton): Change this to OS implementations of the memory +#if !defined(m_reserve) +#define m_reserve OS_reserve +#endif +#if !defined(m_commit) +#define m_commit OS_commit +#endif +#if !defined(m_decommit) +#define m_decommit OS_decommit +#endif +#if !defined(m_release) +#define m_release OS_release +#endif + +#include "os/os_core.h" +#include "os/os_gfx.h" + +#if OS_WINDOWS +# include "win32/os_core_win32.h" +# include "win32/os_gfx_win32.h" +#else +# error OS layer for this platform not implemented yet +#endif + +#endif //OS_INC_H diff --git a/src/os/win32/os_core_win32.c b/src/os/win32/os_core_win32.c new file mode 100644 index 0000000..f295ceb --- /dev/null +++ b/src/os/win32/os_core_win32.c @@ -0,0 +1,299 @@ +#pragma comment(lib, "user32") +#pragma comment(lib, "winmm") +#pragma comment(lib, "shell32") +#pragma comment(lib, "Shlwapi.lib") + +//~ Memory +root_function U64 +OS_page_size(void) { + SYSTEM_INFO info; + GetSystemInfo(&info); + return info.dwPageSize; +} + +root_function void +*OS_reserve(U64 size) { + U64 gb_snapped_size = size; + // Align the reserved memory to nearest gigabyte? + gb_snapped_size += M_DEFAULT_RESERVE_SIZE - 1; + gb_snapped_size -= gb_snapped_size % M_DEFAULT_RESERVE_SIZE; + void *ptr = VirtualAlloc(0, gb_snapped_size, MEM_RESERVE, PAGE_NOACCESS); + return ptr; +} + +root_function void +OS_release(void *ptr, U64 size) { + VirtualFree(ptr, 0, MEM_RELEASE); +} + +root_function void +OS_commit(void *ptr, U64 size) { + U64 page_snapped_size = size; + page_snapped_size += OS_page_size() - 1; + page_snapped_size -= page_snapped_size%OS_page_size(); + VirtualAlloc(ptr, page_snapped_size, MEM_COMMIT, PAGE_READWRITE); +} + +root_function void +OS_decommit(void *ptr, U64 size){ + VirtualFree(ptr, size, MEM_DECOMMIT); +} + +//~ Thread +root_function OS_InitReceipt OS_init(void) +{ + if (is_main_thread() && os_g_w32_state == 0) + { + Arena *arena = m_make_arena_reserve(Gigabytes(1)); + os_g_w32_state = PushArray(arena, OS_W32_State, 1); + os_g_w32_state->arena = arena; + + os_g_w32_state->thread_arena = m_make_arena_reserve(Kilobytes(256)); + + } + + OS_InitReceipt out; + out.u64[0] = 1; + return out; +} + + +root_function void OS_thread_context_set(void *ptr) { + TlsSetValue(os_g_w32_state->thread_context_index, ptr); +} + +root_function void* OS_thread_context_get(void) { + void *result = TlsGetValue(os_g_w32_state->thread_context_index); + return result; +} + +/* // TODO(anton): This is an interesting function to protect virtual allocs, but it doesn't look like it's +used in the app template right now. I'll add it when I need it. +root_function void +OS_set_memory_access_flags(void *ptr, U64 size, OS_AccessFlags flags) { + + U64 page_snapped_size = size; + page_snapped_size += OS_page_size() - 1; + page_snapped_size -= page_snapped_size%OS_page_size(); + + + DWORD new_flags = 0; + { + switch(flags) + { + default: + { + new_flags = PAGE_NOACCESS; + }break; +#define Map(win32_code, bitflags) case bitflags:{new_flags = win32_code;}break + Map(PAGE_EXECUTE, OS_AccessFlag_Execute); + Map(PAGE_EXECUTE_READ, OS_AccessFlag_Execute|OS_AccessFlag_Read); + Map(PAGE_EXECUTE_READWRITE, OS_AccessFlag_Execute|OS_AccessFlag_Read|OS_AccessFlag_Write); + Map(PAGE_EXECUTE_WRITECOPY, OS_AccessFlag_Execute|OS_AccessFlag_Write); + Map(PAGE_READONLY, OS_AccessFlag_Read); + Map(PAGE_READWRITE, OS_AccessFlag_Read|OS_AccessFlag_Write); +#undef Map + } + } + + + DWORD old_flags = 0; + VirtualProtect(ptr, page_snapped_size, new_flags, &old_flags); +} +*/ + + + +//~ @os_per_backend File System +root_function OS_Handle +OS_file_open(OS_AccessFlags access_flags, String8 path) { + ArenaTemp scratch = scratch_get(0, 0); + String16 path16 = str16_from8(scratch.arena, path); + + // Map to win32 access flags + DWORD desired_access = 0; + if(access_flags & OS_AccessFlag_Read) { desired_access |= GENERIC_READ; } + if(access_flags & OS_AccessFlag_Write) { desired_access |= GENERIC_WRITE; } + + DWORD share_mode = 0; + + SECURITY_ATTRIBUTES security_attributes = { + (DWORD)sizeof(SECURITY_ATTRIBUTES), + 0, + 0, + }; + + // Map to win32 creation disposition + DWORD creation_disposition = 0; + if(!(access_flags & OS_AccessFlag_CreateNew)) { + creation_disposition = OPEN_EXISTING; + } else { + creation_disposition = CREATE_ALWAYS; + } + + + DWORD flags_and_attribs = 0; + HANDLE template_file = 0; + + HANDLE file = CreateFileW((WCHAR*)path16.str, + desired_access, + share_mode, + &security_attributes, + creation_disposition, + flags_and_attribs, + template_file); + + if(file == INVALID_HANDLE_VALUE) { + // TODO(anton): Append to errors + // + DWORD error = GetLastError(); + break_debugger(); + } + + // Map to abstract handle + OS_Handle handle = {0}; + handle.u64[0] = (U64)file; + + scratch_release(scratch); + return handle; +} + +root_function void +OS_file_close(OS_Handle file) { + HANDLE handle = (HANDLE)file.u64[0]; + if(handle != INVALID_HANDLE_VALUE) { + CloseHandle(handle); + } +} + +root_function String8 +OS_file_read(Arena *arena, OS_Handle file, U64 min, U64 max) { + String8 result = {0}; + + HANDLE handle = (HANDLE)file.u64[0]; + if(handle == INVALID_HANDLE_VALUE) { + // TODO(anton): accumulate errors + } else { + U64 bytes_to_read = AbsoluteValueU64(max - min); + U64 bytes_actually_read = 0; + result.str = PushArray(arena, U8, bytes_to_read); + result.size = 0; + U8 *ptr = result.str; + U8 *one_past_last = result.str + bytes_to_read; + + for(;;) { + U64 unread = (U64)(one_past_last - ptr); + DWORD to_read = (DWORD)(ClampTop(unread, U32Max)); + DWORD did_read = 0; + // TODO(anton): Understand WINAPI + if(!ReadFile(handle, ptr, to_read, &did_read, 0)) { + break; + } + ptr += did_read; + result.size += did_read; + if(ptr >= one_past_last) { + break; + } + + } + } + return result; +} + +root_function OS_FileAttributes +OS_attributes_from_file(OS_Handle file) +{ + HANDLE handle = (HANDLE)file.u64[0]; + OS_FileAttributes attrs = {0}; + U32 high_bits = 0; + U32 low_bits = GetFileSize(handle, (DWORD *)&high_bits); + FILETIME last_write_time = {0}; + GetFileTime(handle, 0, 0, &last_write_time); + attrs.size = (U64)low_bits | (((U64)high_bits) << 32); + attrs.last_modified = ((U64)last_write_time.dwLowDateTime) | + (((U64)last_write_time.dwHighDateTime) << 32); + return attrs; +} + +root_function void +OS_file_write(Arena *arena, OS_Handle file, U64 off, String8List data, OS_ErrorList *out_errors) { + HANDLE handle = (HANDLE)file.u64[0]; + if(handle == 0 || handle == INVALID_HANDLE_VALUE) + { + // TODO(anton): accumulate errors + } + else for(String8Node *node = data.first; node != 0; node = node->next) + { + U8 *ptr = node->string.str; + U8 *opl = ptr + node->string.size; + for(;;) + { + U64 unwritten = (U64)(opl - ptr); + DWORD to_write = (DWORD)(ClampTop(unwritten, U32Max)); + DWORD did_write = 0; + // TODO(anton): understand winapi + if(!WriteFile(handle, ptr, to_write, &did_write, 0)) + { + goto fail_out; + } + ptr += did_write; + if(ptr >= opl) + { + break; + } + } + } + fail_out:; +} + +root_function Rng2_F32 +OS_client_rect_from_window(OS_Handle window_handle) +{ + Rng2_F32 rect = {0}; + OS_W32_Window *window = (OS_W32_Window *)window_handle.u64[0]; + if(window != 0) + { + RECT w32_rect = {0}; + if(GetClientRect(window->hwnd, &w32_rect)) + { + rect.x0 = (F32)w32_rect.left; + rect.y0 = (F32)w32_rect.top; + rect.x1 = (F32)w32_rect.right; + rect.y1 = (F32)w32_rect.bottom; + } + } + return rect; +} + +root_function String8List +OS_to_string_list_F64(Arena *arena, F64 *values, U32 values_size, String8 format) +{ + // Parse the format string, assume it is of the form + // numchars.precisione\n, for example "13.6e" + if(format.size < 3 || format.str[2] != '.') + { + break_debugger(); + OutputDebugStringA("Wrong format when converting values to string list"); + exit(1); + } + U8 first_parse[3]; + first_parse[0] = format.str[0]; + first_parse[1] = format.str[1]; + first_parse[2] = '\0'; + // We can add space for the dot right away since this is assumed in the formatting. + U32 character_length = (U32)StrToIntA(first_parse) + 1; + + + String8List out = {0}; + for(U32 i = 0; i < values_size; i++) + { + String8 value_string = {0}; + value_string.size = character_length; + value_string.str = PushArray(arena, U8, character_length); + StringCchPrintfA(value_string.str, character_length, "%e\n", values[i]); + str8_list_push(arena, &out, value_string); + } + + return out; +} + diff --git a/src/os/win32/os_core_win32.h b/src/os/win32/os_core_win32.h new file mode 100644 index 0000000..e7b1e26 --- /dev/null +++ b/src/os/win32/os_core_win32.h @@ -0,0 +1,51 @@ +/* date = April 25th 2023 9:46 pm */ + +#ifndef OS_CORE_WIN32_H +#define OS_CORE_WIN32_H + +// To avoid C4042 error when including windows we use some +// preprocessor praga macro things.. So when including the windows headers we +// won't have defined the "function" keyword (we define it in base_core.h), +// and then we redefine it after when popping. +#pragma push_macro("function") +#undef function +#define WIN32_LEAN_AND_MEAN +#include +#include +#include +#pragma pop_macro("function") + + +// We have a nice debugbreak assert macro, but it is nice to have one also specifically for windows HRESULT +#define AssertHR(hr) Assert(SUCCEEDED(hr)) + +///////////////////////////////////// + +// Processes and threads +typedef struct OS_W32_Thread OS_W32_Thread; +struct OS_W32_Thread +{ + OS_W32_Thread *next; + HANDLE handle; + DWORD thread_id; + void *params; + OS_Thread_Function *func; +}; + + +///////////////////////////////////// +// Global state +typedef struct OS_W32_State OS_W32_State; +struct OS_W32_State +{ + Arena *arena; + + Arena *thread_arena; + DWORD thread_context_index; +}; + +global HINSTANCE os_g_w32_hinstance; +global OS_W32_State *os_g_w32_state; + +//root_function OS_W32_Window* OS_W32_window_from_handle(OS_Handle handle); +#endif //OS_CORE_WIN32_H diff --git a/src/os/win32/os_entry_point_win32.c b/src/os/win32/os_entry_point_win32.c new file mode 100644 index 0000000..fa4078e --- /dev/null +++ b/src/os/win32/os_entry_point_win32.c @@ -0,0 +1,9 @@ +function void EntryPoint(void); + + +int WinMain(HINSTANCE instance, HINSTANCE prev_instance, LPSTR lp_cmd_line, int n_show_cmd) +{ + os_g_w32_hinstance = instance; + base_main_thread_entry(EntryPoint, (U64)__argc, __argv); + return 0; +} \ No newline at end of file diff --git a/src/os/win32/os_gfx_win32.c b/src/os/win32/os_gfx_win32.c new file mode 100644 index 0000000..5d0a28f --- /dev/null +++ b/src/os/win32/os_gfx_win32.c @@ -0,0 +1,328 @@ +#pragma comment(lib, "gdi32") + +#define OS_W32_GraphicalWindowClassName L"ApplicationWindowClass" + +root_function OS_InitGfxReceipt +OS_gfx_init(OS_InitReceipt os_init_receipt) +{ + if (is_main_thread() && os_g_w32_gfx_state == 0) + { + + { + // Global state + Arena *arena = m_make_arena_reserve(Gigabytes(1)); + os_g_w32_gfx_state = PushArray(arena, OS_W32_Gfx_State, 1); + os_g_w32_gfx_state->arena = arena; + os_g_w32_gfx_state->window_arena = m_make_arena_reserve(Gigabytes(1)); + } + // TODO(antonl) DPI awareness + + // Register window class + { + /* WNDCLASSW window_class = { 0 }; */ + /* window_class.style = CS_HREDRAW | CS_VREDRAW; */ + /* window_class.lpfnWndProc = OS_W32_WindowProc; */ + /* window_class.hInstance = g_os_w32_hinstance; */ + /* window_class.lpszClassName = OS_W32_GraphicalWindowClassName; */ + /* window_class.hCursor = LoadCursor(0, IDC_ARROW); */ + /* RegisterClassW(&window_class); */ + WNDCLASSEXW window_class = {0}; + window_class.cbSize = sizeof(WNDCLASSEXW); + window_class.lpfnWndProc = OS_W32_window_proc; + window_class.hInstance = os_g_w32_hinstance; + window_class.lpszClassName = OS_W32_GraphicalWindowClassName; + if(!RegisterClassExW(&window_class)) + { + break_debugger(); + } + } + + // Rjf makes a "global invisible window", but why? + { + os_g_w32_gfx_state->global_hwnd = CreateWindowExW(0, + OS_W32_GraphicalWindowClassName, + L"", + WS_OVERLAPPEDWINDOW, + 100,100, + 0,0, + 0,0, + os_g_w32_hinstance, 0); + os_g_w32_gfx_state->global_hdc = GetDC(os_g_w32_gfx_state->global_hwnd); + } + } + + OS_InitGfxReceipt out; + out.u64[0] = 1; + return out; +} + +root_function OS_Handle +OS_W32_handle_from_window(OS_W32_Window *window) +{ + OS_Handle handle = { 0 }; + handle.u64[0] = (U64)window; + return handle; +} + +root_function OS_W32_Window* +OS_W32_window_from_handle(OS_Handle handle) +{ + OS_W32_Window *window = (OS_W32_Window *)handle.u64[0]; + return window; +} + +root_function OS_Handle +OS_window_open(OS_Window_Flags flags, Vec2_S64 size, String8 title) +{ + OS_Handle handle = { 0 }; + + { + // Window allocation + OS_W32_Window *window = os_g_w32_gfx_state->free_window; + { + // Windows are stored in a stack on the gfx state + if (window != 0) + { + StackPop(os_g_w32_gfx_state->free_window); + } + else + { + window = PushArray(os_g_w32_gfx_state->window_arena, OS_W32_Window, 1); + } + MemoryZeroStruct(window); + DLLPushBack(os_g_w32_gfx_state->first_window, os_g_w32_gfx_state->last_window, window); + } + + // Open window + HWND hwnd = 0; + HDC hdc = 0; + { + ArenaTemp scratch = scratch_get(0, 0); + String16 title16 = str16_from8(scratch.arena, title); + hwnd = CreateWindowExW(0, + OS_W32_GraphicalWindowClassName, (LPCWSTR)title16.str, + WS_OVERLAPPEDWINDOW | WS_VISIBLE, + 100, 100, + size.x, size.y, 0, 0, os_g_w32_hinstance, 0); + hdc = GetDC(hwnd); + SetWindowLongPtr(hwnd, GWLP_USERDATA, (LONG_PTR)window); + scratch_release(scratch); + } + + { + window->hwnd = hwnd; + window->hdc = hdc; + } + + handle = OS_W32_handle_from_window(window); + } + return handle; +} + +function LRESULT +OS_W32_window_proc(HWND hwnd, UINT message, WPARAM w_param, LPARAM l_param) +{ + LRESULT result = 0; + + OS_Event *event = 0; + OS_W32_Window *window = (OS_W32_Window *)GetWindowLongPtr(hwnd, GWLP_USERDATA); + OS_Handle window_handle = OS_W32_handle_from_window(window); + ArenaTemp scratch = scratch_get(&os_w32_tl_events_arena, 1); + OS_EventList fallback_event_list = {0}; + if(os_w32_tl_events_arena == 0) + { + os_w32_tl_events_arena = scratch.arena; + os_w32_tl_events_list = &fallback_event_list; + } + + B32 is_release = 0; + Axis2 scroll_axis = Axis2_Y; + switch(message) + { + default: + { + result = DefWindowProcW(hwnd, message, w_param, l_param); + } break; + + //- General window events + case WM_CLOSE: + { + event = PushArray(os_w32_tl_events_arena, OS_Event, 1); + event->kind = OS_EventKind_WindowClose; + event->window = window_handle; + } break; + + //- Mouse buttons + case WM_LBUTTONUP: + case WM_MBUTTONUP: + case WM_RBUTTONUP: + { + ReleaseCapture(); + is_release = 1; + } fallthrough; + case WM_LBUTTONDOWN: + case WM_MBUTTONDOWN: + case WM_RBUTTONDOWN: + { + if(is_release == 0) + { + SetCapture(hwnd); + } + OS_EventKind kind = is_release ? OS_EventKind_Release : OS_EventKind_Press; + OS_Key key = OS_Key_MouseLeft; + switch(message) + { + case WM_MBUTTONUP: case WM_MBUTTONDOWN: key = OS_Key_MouseMiddle; break; + case WM_RBUTTONUP: case WM_RBUTTONDOWN: key = OS_Key_MouseRight; break; + } + event = PushArray(os_w32_tl_events_arena, OS_Event, 1); + event->kind = kind; + event->window = window_handle; + event->key = key; + event->position = OS_mouse_from_window(window_handle); + } break; + + //- Keyboard events + case WM_SYSKEYDOWN: case WM_SYSKEYUP: + { + result = DefWindowProcW(hwnd, message, w_param, l_param); + } fallthrough; + case WM_KEYDOWN: + case WM_KEYUP: + { + // TODO(anton): Just check this thing with was down, is down.., WINAPI crap + B32 was_down = !!(l_param & (1 << 30)); + B32 is_down = !(l_param & (1 << 31)); + OS_EventKind kind = is_down ? OS_EventKind_Press : OS_EventKind_Release; + + // TODO(anton): Here we use statics but maybe we should not... + // Could just move this out and pre-init or generate it and include or whatever... + // probably should just be in some meta header. + local_persist OS_Key key_table[256] = {0}; + local_persist B32 key_table_initialised = 0; + if(!key_table_initialised) + { + key_table_initialised = 1; + + for (U32 i = 'A', j = OS_Key_A; i <= 'Z'; i += 1, j += 1) + { + key_table[i] = (OS_Key)j; + } + for (U32 i = '0', j = OS_Key_0; i <= '9'; i += 1, j += 1) + { + key_table[i] = (OS_Key)j; + } + for (U32 i = VK_F1, j = OS_Key_F1; i <= VK_F24; i += 1, j += 1) + { + key_table[i] = (OS_Key)j; + } + + key_table[VK_ESCAPE] = OS_Key_Esc; + key_table[VK_OEM_3] = OS_Key_GraveAccent; + key_table[VK_OEM_MINUS] = OS_Key_Minus; + key_table[VK_OEM_PLUS] = OS_Key_Equal; + key_table[VK_BACK] = OS_Key_Backspace; + key_table[VK_TAB] = OS_Key_Tab; + key_table[VK_SPACE] = OS_Key_Space; + key_table[VK_RETURN] = OS_Key_Enter; + key_table[VK_CONTROL] = OS_Key_Ctrl; + key_table[VK_SHIFT] = OS_Key_Shift; + key_table[VK_MENU] = OS_Key_Alt; + key_table[VK_UP] = OS_Key_Up; + key_table[VK_LEFT] = OS_Key_Left; + key_table[VK_DOWN] = OS_Key_Down; + key_table[VK_RIGHT] = OS_Key_Right; + key_table[VK_DELETE] = OS_Key_Delete; + key_table[VK_PRIOR] = OS_Key_PageUp; + key_table[VK_NEXT] = OS_Key_PageDown; + key_table[VK_HOME] = OS_Key_Home; + key_table[VK_END] = OS_Key_End; + key_table[VK_OEM_2] = OS_Key_ForwardSlash; + key_table[VK_OEM_PERIOD] = OS_Key_Period; + key_table[VK_OEM_COMMA] = OS_Key_Comma; + key_table[VK_OEM_7] = OS_Key_Quote; + key_table[VK_OEM_4] = OS_Key_LeftBracket; + key_table[VK_OEM_6] = OS_Key_RightBracket; + key_table[VK_INSERT] = OS_Key_Insert; + key_table[VK_OEM_1] = OS_Key_Semicolon; + } + + OS_Key key = OS_Key_Null; + if(w_param < ArrayCount(key_table)) + { + key = key_table[w_param]; + } + + event = PushArray(os_w32_tl_events_arena, OS_Event, 1); + event->kind = kind; + event->window = window_handle; + event->key = key; + } break; + + + } + // If we registered an event we push it to the event list. + if(event) + { + DLLPushBack(os_w32_tl_events_list->first, os_w32_tl_events_list->last, event); + os_w32_tl_events_list->count += 1; + } + + scratch_release(scratch); + return result; +} + +root_function Vec2_F32 +OS_mouse_from_window(OS_Handle handle) +{ + Vec2_F32 result = vec2_F32(-100, -100); + OS_W32_Window *window = OS_W32_window_from_handle(handle); + if(window != 0) + { + POINT point; + if(GetCursorPos(&point)) + { + if(ScreenToClient(window->hwnd, &point)) + { + result = vec2_F32(point.x, point.y); + } + } + } + + return result; +} + +root_function OS_EventList +OS_get_events(Arena* arena) +{ + OS_EventList list = {0}; + os_w32_tl_events_arena = arena; + os_w32_tl_events_list = &list; + for(MSG message; PeekMessage(&message, 0, 0, 0, PM_REMOVE);) + { + TranslateMessage(&message); + DispatchMessage(&message); + } + os_w32_tl_events_arena = 0; + os_w32_tl_events_list = 0; + + return list; +} + +root_function void +OS_consume_event(OS_EventList *events, OS_Event *event) +{ + DLLRemove(events->first, events->last, event); + events->count -= 1; + event->kind = OS_EventKind_Null; +} + +root_function void +OS_window_first_paint(OS_Handle handle) +{ + ArenaTemp scratch = scratch_get(0,0); + OS_W32_Window *window = OS_W32_window_from_handle(handle); + ShowWindow(window->hwnd, SW_SHOW); + UpdateWindow(window->hwnd); + scratch_release(scratch); +} \ No newline at end of file diff --git a/src/os/win32/os_gfx_win32.h b/src/os/win32/os_gfx_win32.h new file mode 100644 index 0000000..76a0f06 --- /dev/null +++ b/src/os/win32/os_gfx_win32.h @@ -0,0 +1,36 @@ +#ifndef OS_GFX_WIN32_H +#define OS_GFX_WIN32_H + +typedef struct OS_W32_Window OS_W32_Window; +struct OS_W32_Window +{ + OS_W32_Window *next; + OS_W32_Window *prev; + HWND hwnd; + HDC hdc; +}; + +typedef struct OS_W32_Gfx_State OS_W32_Gfx_State; +struct OS_W32_Gfx_State +{ + Arena *arena; + HWND global_hwnd; + HDC global_hdc; + + Arena *window_arena; + OS_W32_Window *first_window; + OS_W32_Window *last_window; + OS_W32_Window *free_window; +}; + +root_global OS_W32_Gfx_State *os_g_w32_gfx_state = 0; +extern per_thread Arena *os_w32_tl_events_arena = 0; +extern per_thread OS_EventList *os_w32_tl_events_list = 0; + +root_function OS_Handle OS_W32_handle_from_window(OS_W32_Window *window); +root_function OS_W32_Window *OS_W32_window_from_handle(OS_Handle handle); +function LRESULT OS_W32_window_proc(HWND hwnd, UINT message, WPARAM w_param, LPARAM l_param); + + + +#endif // OS_GFX_WIN32_H diff --git a/src/test_mkl.c b/src/test_mkl.c new file mode 100644 index 0000000..41a9bbb --- /dev/null +++ b/src/test_mkl.c @@ -0,0 +1,272 @@ + +/******************************************************************************* + * Copyright 2009-2021 Intel Corporation. + * + * This software and the related documents are Intel copyrighted materials, and + * your use of them is governed by the express license under which they were + * provided to you (License). Unless the License provides otherwise, you may not + * use, modify, copy, publish, distribute, disclose or transmit this software or + * the related documents without Intel's prior written permission. + * + * This software and the related documents are provided as is, with no express + * or implied warranties, other than those that are expressly stated in the + * License. + *******************************************************************************/ + +/* + ZGEEV Example. + ============== + + Program computes the eigenvalues and left and right eigenvectors of a general + rectangular matrix A: + + ( -3.84, 2.25) ( -8.94, -4.75) ( 8.95, -6.53) ( -9.87, 4.82) + ( -0.66, 0.83) ( -4.40, -3.82) ( -3.50, -4.26) ( -3.15, 7.36) + ( -3.99, -4.73) ( -5.88, -6.60) ( -3.36, -0.40) ( -0.75, 5.23) + ( 7.74, 4.18) ( 3.66, -7.53) ( 2.58, 3.60) ( 4.59, 5.41) + + Description. + ============ + + The routine computes for an n-by-n complex nonsymmetric matrix A, the + eigenvalues and, optionally, the left and/or right eigenvectors. The right + eigenvector v(j) of A satisfies + + A*v(j)= lambda(j)*v(j) + + where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies + + u(j)H*A = lambda(j)*u(j)H + + where u(j)H denotes the conjugate transpose of u(j). The computed + eigenvectors are normalized to have Euclidean norm equal to 1 and + largest component real. + + Example Program Results. + ======================== + + ZGEEV Example Program Results + + Eigenvalues + ( -9.43,-12.98) ( -3.44, 12.69) ( 0.11, -3.40) ( 5.76, 7.13) + + Left eigenvectors + ( 0.24, -0.18) ( 0.61, 0.00) ( -0.18, -0.33) ( 0.28, 0.09) + ( 0.79, 0.00) ( -0.05, -0.27) ( 0.82, 0.00) ( -0.55, 0.16) + ( 0.22, -0.27) ( -0.21, 0.53) ( -0.37, 0.15) ( 0.45, 0.09) + ( -0.02, 0.41) ( 0.40, -0.24) ( 0.06, 0.12) ( 0.62, 0.00) + + Right eigenvectors + ( 0.43, 0.33) ( 0.83, 0.00) ( 0.60, 0.00) ( -0.31, 0.03) + ( 0.51, -0.03) ( 0.08, -0.25) ( -0.40, -0.20) ( 0.04, 0.34) + ( 0.62, 0.00) ( -0.25, 0.28) ( -0.09, -0.48) ( 0.36, 0.06) + ( -0.23, 0.11) ( -0.10, -0.32) ( -0.43, 0.13) ( 0.81, 0.00) + */ +#include +#include +#include + +/* Auxiliary routines prototypes */ +function void +print_matrix_cmplx16( char* desc, int m, int n, MKL_Complex16* a, int lda ); + +#ifndef TEST_ZGEEV_N +#define TEST_ZGEEV_N 4 +#endif + +/* Main program */ +static void +test_mkl_zgeev(void) { + /* Locals */ + int N = TEST_ZGEEV_N; + MKL_INT n = N, lda = N, ldvl = N, ldvr = N, info, lwork; + MKL_Complex16 wkopt; + MKL_Complex16* work; + /* Local arrays */ + /* rwork dimension should be at least 2*n */ + double rwork[2*TEST_ZGEEV_N]; + MKL_Complex16 w[TEST_ZGEEV_N], vl[TEST_ZGEEV_N*TEST_ZGEEV_N], vr[TEST_ZGEEV_N*TEST_ZGEEV_N]; + MKL_Complex16 a[TEST_ZGEEV_N*TEST_ZGEEV_N] = { + {-3.84, 2.25}, {-0.66, 0.83}, {-3.99, -4.73}, { 7.74, 4.18}, + {-8.94, -4.75}, {-4.40, -3.82}, {-5.88, -6.60}, { 3.66, -7.53}, + { 8.95, -6.53}, {-3.50, -4.26}, {-3.36, -0.40}, { 2.58, 3.60}, + {-9.87, 4.82}, {-3.15, 7.36}, {-0.75, 5.23}, { 4.59, 5.41} + }; + /* Executable statements */ + LOG( " ZGEEV Example Program Results\n" ); + /* Query and allocate the optimal workspace */ + lwork = -1; + zgeev( "Vectors", "Vectors", &n, a, &lda, w, vl, &ldvl, vr, &ldvr, + &wkopt, &lwork, rwork, &info ); + lwork = (MKL_INT)wkopt.real; + work = (MKL_Complex16*)malloc( lwork*sizeof(MKL_Complex16) ); + /* Solve eigenproblem */ + zgeev( "Vectors", "Vectors", &n, a, &lda, w, vl, &ldvl, vr, &ldvr, + work, &lwork, rwork, &info ); + /* Check for convergence */ + if( info > 0 ) { + LOG( "The algorithm failed to compute eigenvalues.\n" ); + exit( 1 ); + } + /* Print eigenvalues */ + print_matrix_cmplx16( "Eigenvalues", 1, n, w, 1 ); + /* Print left eigenvectors */ + print_matrix_cmplx16( "Left eigenvectors", n, n, vl, ldvl ); + /* Print right eigenvectors */ + print_matrix_cmplx16( "Right eigenvectors", n, n, vr, ldvr ); + /* Free workspace */ + free( (void*)work ); + //exit( 0 ); +} /* End of ZGEEV Example */ + +/* Auxiliary routine: printing a matrix */ +function void +print_matrix_cmplx16( char* desc, int m, int n, MKL_Complex16* a, int lda ) { + ArenaTemp scratch = scratch_get(0,0); + int i, j; + String8 newline = str8_lit("\n"); + String8 header = str8_pushf(scratch.arena, "\n %s\n", desc ); + LOG(header.str); + //printf("\n %s \n", desc); + for( i = 0; i < m; i++ ) { + for( j = 0; j < n; j++ ) { + String8 outstr = str8_pushf(scratch.arena, " (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag ); + LOG(outstr.str); + //printf(" (%6.2f,%6.2f)", a[i+j*lda].real, a[i+j*lda].imag ); + } + LOG(newline.str); + //printf("\n"); + } +} + +/******************************************************************************* + * Copyright 2009-2021 Intel Corporation. + * + * This software and the related documents are Intel copyrighted materials, and + * your use of them is governed by the express license under which they were + * provided to you (License). Unless the License provides otherwise, you may not + * use, modify, copy, publish, distribute, disclose or transmit this software or + * the related documents without Intel's prior written permission. + * + * This software and the related documents are provided as is, with no express + * or implied warranties, other than those that are expressly stated in the + * License. + *******************************************************************************/ + +/* + DSYEVD Example. + ============== + + Program computes all eigenvalues and eigenvectors of a real symmetric + matrix A using divide and conquer algorithm, where A is: + + 6.39 0.13 -8.23 5.71 -3.18 + 0.13 8.37 -4.46 -6.10 7.21 + -8.23 -4.46 -9.58 -9.25 -7.42 + 5.71 -6.10 -9.25 3.72 8.54 + -3.18 7.21 -7.42 8.54 2.51 + + Description. + ============ + + The routine computes all eigenvalues and, optionally, eigenvectors of an + n-by-n real symmetric matrix A. The eigenvector v(j) of A satisfies + + A*v(j) = lambda(j)*v(j) + + where lambda(j) is its eigenvalue. The computed eigenvectors are + orthonormal. + If the eigenvectors are requested, then this routine uses a divide and + conquer algorithm to compute eigenvalues and eigenvectors. + + Example Program Results. + ======================== + + DSYEVD Example Program Results + + Eigenvalues + -17.44 -11.96 6.72 14.25 19.84 + + Eigenvectors (stored columnwise) + -0.26 0.31 -0.74 0.33 0.42 + -0.17 -0.39 -0.38 -0.80 0.16 + -0.89 0.04 0.09 0.03 -0.45 + -0.29 -0.59 0.34 0.31 0.60 + -0.19 0.63 0.44 -0.38 0.48 + */ +#include +#include +#include + +/* Auxiliary routines prototypes */ +extern void print_matrix( char* desc, int m, int n, double* a, int lda ); + +/* Parameters */ +#define TEST_DSYEVD_N 5 +#define LDA TEST_DSYEVD_N + +/* Main program */ +function void +test_mkl_dsyevd(void) { + /* Locals */ + MKL_INT n = TEST_DSYEVD_N, lda = LDA, info, lwork, liwork; + MKL_INT iwkopt; + MKL_INT* iwork; + double wkopt; + double* work; + /* Local arrays */ + double w[TEST_DSYEVD_N]; + double a[LDA*TEST_DSYEVD_N] = { + 6.39, 0.00, 0.00, 0.00, 0.00, + 0.13, 8.37, 0.00, 0.00, 0.00, + -8.23, -4.46, -9.58, 0.00, 0.00, + 5.71, -6.10, -9.25, 3.72, 0.00, + -3.18, 7.21, -7.42, 8.54, 2.51 + }; + /* Executable statements */ + LOG( " DSYEVD Example Program Results\n" ); + /* Query and allocate the optimal workspace */ + lwork = -1; + liwork = -1; + dsyevd( "Vectors", "Upper", &n, a, &lda, w, &wkopt, &lwork, &iwkopt, + &liwork, &info ); + lwork = (MKL_INT)wkopt; + work = (double*)malloc( lwork*sizeof(double) ); + liwork = iwkopt; + iwork = (MKL_INT*)malloc( liwork*sizeof(MKL_INT) ); + /* Solve eigenproblem */ + dsyevd( "Vectors", "Upper", &n, a, &lda, w, work, &lwork, iwork, + &liwork, &info ); + /* Check for convergence */ + if( info > 0 ) { + LOG( "The algorithm failed to compute eigenvalues.\n" ); + exit( 1 ); + } + /* Print eigenvalues */ + print_matrix( "Eigenvalues", 1, n, w, 1 ); + /* Print eigenvectors */ + print_matrix( "Eigenvectors (stored columnwise)", n, n, a, lda ); + /* Free workspace */ + free( (void*)iwork ); + free( (void*)work ); +} /* End of DSYEVD Example */ + +/* Auxiliary routine: printing a matrix */ +void print_matrix( char* desc, int m, int n, double* a, int lda ) { + ArenaTemp scratch = scratch_get(0, 0); + int i, j; + //printf( "\n %s\n", desc ); + String8 header = str8_pushf(scratch.arena, "\n %s\n", desc); + LOG(header.str); + for( i = 0; i < m; i++ ) { + for( j = 0; j < n; j++ ) + { + String8 out = str8_pushf(scratch.arena, " %6.2f", a[i+j*lda] ); + LOG(out.str); + //printf( " %6.2f", a[i+j*lda] ); + } + LOG(str8_lit("\n").str); + } +} + +