|
| 1 | +/** |
| 2 | + * bmw-256 MDT |
| 3 | + * tpruvot - 2015 |
| 4 | + */ |
| 5 | +extern "C" { |
| 6 | +#include "sph/sph_bmw.h" |
| 7 | +} |
| 8 | + |
| 9 | +#include <miner.h> |
| 10 | +#include <cuda_helper.h> |
| 11 | + |
| 12 | +static uint32_t *d_hash[MAX_GPUS]; |
| 13 | + |
| 14 | +extern void bmw256_midstate_init(int thr_id, uint32_t threads); |
| 15 | +extern void bmw256_midstate_free(int thr_id); |
| 16 | +extern void bmw256_setBlock_80(int thr_id, void *pdata); |
| 17 | +extern void bmw256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int swap); |
| 18 | + |
| 19 | +extern uint32_t cuda_check_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash); |
| 20 | + |
| 21 | +// CPU Hash |
| 22 | +extern "C" void bmw_hash(void *state, const void *input) |
| 23 | +{ |
| 24 | + uint32_t _ALIGN(64) hash[16]; |
| 25 | + sph_bmw256_context ctx; |
| 26 | + |
| 27 | + sph_bmw256_init(&ctx); |
| 28 | + sph_bmw256(&ctx, input, 80); |
| 29 | + sph_bmw256_close(&ctx, (void*) hash); |
| 30 | + |
| 31 | + memcpy(state, hash, 32); |
| 32 | +} |
| 33 | + |
| 34 | +static bool init[MAX_GPUS] = { 0 }; |
| 35 | + |
| 36 | +extern "C" int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done) |
| 37 | +{ |
| 38 | + uint32_t _ALIGN(64) endiandata[20]; |
| 39 | + uint32_t *pdata = work->data; |
| 40 | + uint32_t *ptarget = work->target; |
| 41 | + const uint32_t first_nonce = pdata[19]; |
| 42 | + uint32_t throughput = cuda_default_throughput(thr_id, 1U << 21); |
| 43 | + if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce); |
| 44 | + |
| 45 | + if (opt_benchmark) |
| 46 | + ptarget[7] = 0x0005; |
| 47 | + |
| 48 | + if (!init[thr_id]) { |
| 49 | + cudaSetDevice(device_map[thr_id]); |
| 50 | + if (opt_cudaschedule == -1 && gpu_threads == 1) { |
| 51 | + cudaDeviceReset(); |
| 52 | + // reduce cpu usage |
| 53 | + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); |
| 54 | + } |
| 55 | + gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); |
| 56 | + |
| 57 | + cuda_check_cpu_init(thr_id, throughput); |
| 58 | + bmw256_midstate_init(thr_id, throughput); |
| 59 | + |
| 60 | + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput)); |
| 61 | + |
| 62 | + init[thr_id] = true; |
| 63 | + } |
| 64 | + |
| 65 | + for (int k=0; k < 20; k++) { |
| 66 | + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); |
| 67 | + } |
| 68 | + |
| 69 | + cudaGetLastError(); |
| 70 | + bmw256_setBlock_80(thr_id, (void*)endiandata); |
| 71 | + |
| 72 | + cuda_check_cpu_setTarget(ptarget); |
| 73 | + |
| 74 | + do { |
| 75 | + bmw256_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], 1); |
| 76 | + |
| 77 | + *hashes_done = pdata[19] - first_nonce + throughput; |
| 78 | + |
| 79 | + work->nonces[0] = cuda_check_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id]); |
| 80 | + if (work->nonces[0] != UINT32_MAX) |
| 81 | + { |
| 82 | + const uint32_t Htarg = ptarget[7]; |
| 83 | + uint32_t _ALIGN(64) vhash[8]; |
| 84 | + be32enc(&endiandata[19], work->nonces[0]); |
| 85 | + bmw_hash(vhash, endiandata); |
| 86 | + |
| 87 | + if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) { |
| 88 | + work->valid_nonces = 1; |
| 89 | + work_set_target_ratio(work, vhash); |
| 90 | + work->nonces[1] = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1); |
| 91 | + if (work->nonces[1] != 0) { |
| 92 | + be32enc(&endiandata[19], work->nonces[1]); |
| 93 | + bmw_hash(vhash, endiandata); |
| 94 | + bn_set_target_ratio(work, vhash, 1); |
| 95 | + work->valid_nonces++; |
| 96 | + pdata[19] = max(work->nonces[0], work->nonces[1]) + 1; |
| 97 | + } else { |
| 98 | + pdata[19] = work->nonces[0] + 1; // cursor |
| 99 | + } |
| 100 | + return work->valid_nonces; |
| 101 | + } |
| 102 | + else if (vhash[7] > Htarg) { |
| 103 | + gpu_increment_reject(thr_id); |
| 104 | + if (!opt_quiet) |
| 105 | + gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", work->nonces[0]); |
| 106 | + pdata[19] = work->nonces[0] + 1; |
| 107 | + continue; |
| 108 | + } |
| 109 | + } |
| 110 | + |
| 111 | + if ((uint64_t) throughput + pdata[19] >= max_nonce) { |
| 112 | + pdata[19] = max_nonce; |
| 113 | + break; |
| 114 | + } |
| 115 | + |
| 116 | + pdata[19] += throughput; |
| 117 | + |
| 118 | + } while (!work_restart[thr_id].restart); |
| 119 | + |
| 120 | + *hashes_done = pdata[19] - first_nonce; |
| 121 | + return 0; |
| 122 | +} |
| 123 | + |
| 124 | +// cleanup |
| 125 | +extern "C" void free_bmw(int thr_id) |
| 126 | +{ |
| 127 | + if (!init[thr_id]) |
| 128 | + return; |
| 129 | + |
| 130 | + cudaThreadSynchronize(); |
| 131 | + |
| 132 | + cudaFree(d_hash[thr_id]); |
| 133 | + bmw256_midstate_free(thr_id); |
| 134 | + cuda_check_cpu_free(thr_id); |
| 135 | + |
| 136 | + cudaDeviceSynchronize(); |
| 137 | + init[thr_id] = false; |
| 138 | +} |
0 commit comments