|
1 | | -#pragma once |
| 1 | +#pragma once |
2 | 2 |
|
3 | | -#include <cpuid.h> |
4 | 3 | #include <cstring> |
5 | | -#include <string> |
6 | 4 | #include <iostream> |
| 5 | +#include <string> |
| 6 | + |
| 7 | +#if defined(__APPLE__) |
| 8 | +# include <sys/sysctl.h> |
| 9 | +#endif |
| 10 | +#if defined(__x86_64__) || defined(_M_X64) |
| 11 | +# include <cpuid.h> |
| 12 | +# define TENSORIUM_X86 1 |
| 13 | +#elif defined(__aarch64__) || defined(__arm64__) |
| 14 | +# define TENSORIUM_ARM 1 |
| 15 | +#else |
| 16 | +# define TENSORIUM_FALLBACK 1 |
| 17 | +#endif |
7 | 18 |
|
| 19 | +// ─────────────────────────────── CPU Brand ─────────────────────────────── |
8 | 20 | inline std::string get_cpu_brand() { |
9 | | - char brand[0x40] = {0}; |
10 | | - unsigned int regs[4] = {0}; |
11 | | - for (int i = 0; i < 3; ++i) { |
12 | | - __cpuid(0x80000002 + i, regs[0], regs[1], regs[2], regs[3]); |
13 | | - std::memcpy(brand + i * 16, regs, sizeof(regs)); |
14 | | - } |
15 | | - return std::string(brand); |
| 21 | +#if defined(TENSORIUM_X86) |
| 22 | + char brand[0x40] = {0}; |
| 23 | + unsigned int regs[4] = {0}; |
| 24 | + for (int i = 0; i < 3; ++i) { |
| 25 | + __cpuid(0x80000002 + i, regs[0], regs[1], regs[2], regs[3]); |
| 26 | + std::memcpy(brand + i * 16, regs, sizeof(regs)); |
| 27 | + } |
| 28 | + return std::string(brand); |
| 29 | +#elif defined(TENSORIUM_ARM) |
| 30 | +// Apple Silicon / ARM64 fallback |
| 31 | +// cf. /proc/cpuinfo (Linux) ou sysctl hw.model (macOS) |
| 32 | +# if defined(__APPLE__) |
| 33 | + char buffer[128]; |
| 34 | + size_t size = sizeof(buffer); |
| 35 | + if (sysctlbyname("machdep.cpu.brand_string", &buffer, &size, NULL, 0) == 0) |
| 36 | + return std::string(buffer); |
| 37 | + if (sysctlbyname("hw.model", &buffer, &size, NULL, 0) == 0) |
| 38 | + return std::string(buffer); |
| 39 | + return "Apple ARM CPU"; |
| 40 | +# else |
| 41 | + return "Generic ARM CPU"; |
| 42 | +# endif |
| 43 | +#else |
| 44 | + return "Unknown CPU"; |
| 45 | +#endif |
16 | 46 | } |
17 | 47 |
|
| 48 | +// ─────────────────────────────── Block size heuristic ─────────────────────────────── |
18 | 49 | inline size_t detect_optimal_block_size() { |
19 | | - std::string brand = get_cpu_brand(); |
| 50 | + std::string brand = get_cpu_brand(); |
20 | 51 |
|
21 | | - if (brand.find("Xeon Phi") != std::string::npos) return 256; |
22 | | - if (brand.find("Xeon") != std::string::npos) return 128; |
23 | | - if (brand.find("Ryzen") != std::string::npos) return 96; |
24 | | - if (brand.find("Apple") != std::string::npos) return 64; |
25 | | - if (brand.find("Core(TM)") != std::string::npos) return 128; |
26 | | - std::cout << "Unknown CPU brand. Defaulting to 64." << std::endl; |
27 | | - return 64; |
28 | | -} |
| 52 | + if (brand.find("Xeon Phi") != std::string::npos) |
| 53 | + return 256; |
| 54 | + if (brand.find("Xeon") != std::string::npos) |
| 55 | + return 128; |
| 56 | + if (brand.find("Ryzen") != std::string::npos) |
| 57 | + return 96; |
| 58 | + if (brand.find("Apple") != std::string::npos) |
| 59 | + return 64; |
| 60 | + if (brand.find("Core(TM)") != std::string::npos) |
| 61 | + return 128; |
29 | 62 |
|
| 63 | +#if defined(TENSORIUM_ARM) |
| 64 | + return 64; // safe default for M1/M2 |
| 65 | +#else |
| 66 | + std::cout << "[detect_optimal_block_size] Unknown CPU brand. Defaulting to 64.\n"; |
| 67 | + return 64; |
| 68 | +#endif |
| 69 | +} |
0 commit comments