From 999167004668855fa5821915dd5bcca721016b8a Mon Sep 17 00:00:00 2001 From: evgenykz Date: Sat, 25 Jan 2025 14:05:36 +0200 Subject: [PATCH 01/39] Added HW sha256 Added volatile task management Added sha256 validation Added tiny nerdSha256 optimization Added client mutex --- src/NerdMinerV2.ino.cpp | 225 +++++++++++++++++++++ src/ShaTests/nerdSHA256plus.cpp | 9 +- src/mining.cpp | 336 +++++++++++++++++++++++++------- src/mining.h | 9 +- src/utils.cpp | 2 - 5 files changed, 505 insertions(+), 76 deletions(-) diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index 9f369cb..af88135 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -19,6 +19,8 @@ #include "TouchHandler.h" #endif +//#define HW_SHA256_TEST + //3 seconds WDT #define WDT_TIMEOUT 3 //15 minutes WDT for miner task @@ -51,6 +53,222 @@ const char* ntpServer = "pool.ntp.org"; //void runMonitor(void *name); +#ifdef HW_SHA256_TEST + +#include "mbedtls/sha256.h" +#include +#include +#include + +static const uint8_t s_test_buffer[128] = +{ + 0x00, 0x00, 0x00, 0x22, 0x99, 0x44, 0xbb, 0xff, 0xbb, 0x00, 0x00, 0x77, 0x44, 0xcc, 0x11, 0x77, + 0x88, 0x55, 0xbb, 0x44, 0x55, 0x00, 0x77, 0x88, 0x99, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xbb, 0xbb, 0x66, 0x11, 0x88, 0x33, 0x44, 0x99, 0xcc, 0x33, 0xff, 0x22, + 0x11, 0xaa, 0x77, 0xee, 0xbb, 0x66, 0xee, 0xcc, 0xee, 0x66, 0xee, 0xdd, 0x77, 0x55, 0x22, 0x22, + 0xcc, 0xcc, 0x66, 0xee, 0x22, 0xdd, 0x99, 0x66, 0x66, 0x88, 0x00, 0x11, 0x2e, 0x33, 0x41, 0x19, + + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x80 +}; + +static const uint8_t s_test_buffer_aligned[128] __attribute__((aligned(256))) = +{ + 0x00, 0x00, 0x00, 0x22, 0x99, 0x44, 0xbb, 0xff, 0xbb, 0x00, 0x00, 0x77, 0x44, 0xcc, 0x11, 0x77, + 0x88, 0x55, 0xbb, 0x44, 0x55, 0x00, 0x77, 0x88, 0x99, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xbb, 0xbb, 0x66, 0x11, 0x88, 0x33, 0x44, 0x99, 0xcc, 0x33, 0xff, 0x22, + 0x11, 0xaa, 0x77, 0xee, 0xbb, 0x66, 0xee, 0xcc, 0xee, 0x66, 0xee, 0xdd, 0x77, 0x55, 0x22, 0x22, + 0xcc, 0xcc, 0x66, 0xee, 0x22, 0xdd, 0x99, 0x66, 0x66, 0x88, 0x00, 0x11, 0x2e, 0x33, 0x41, 0x19, + + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x80 +}; + +static uint8_t interResult_aligned[64] __attribute__((aligned(256))); +static uint8_t midstate_aligned[32] __attribute__((aligned(256))); +static uint8_t hash_aligned[64] __attribute__((aligned(256))); + +IRAM_ATTR void nerd_sha_hal_wait_idle() +{ + while (sha_ll_busy()) + {} +} + +IRAM_ATTR void HwShaTest() +{ + uint8_t interResult[64]; + uint8_t midstate[32]; + uint8_t hash[64]; + memset(interResult, 0, sizeof(interResult)); + interResult[32] = 0x80; + interResult[62] = 0x01; + interResult[63] = 0x00; + + memset(interResult_aligned, 0, sizeof(interResult_aligned)); + interResult_aligned[32] = 0x80; + interResult_aligned[62] = 0x01; + interResult_aligned[63] = 0x00; + + uint32_t time_start = micros(); + const int test_count = 1000000; + +#if 0 + //Generic software 16KH/s + mbedtls_sha256_context ctx; + mbedtls_sha256_init(&ctx); + for (int i = 0; i < test_count; ++i) + { + mbedtls_sha256_starts_ret(&ctx,0); + mbedtls_sha256_update_ret(&ctx, s_test_buffer, 80); + mbedtls_sha256_finish_ret(&ctx, interResult); + + mbedtls_sha256_starts_ret(&ctx,0); + mbedtls_sha256_update_ret(&ctx, interResult, 32); + mbedtls_sha256_finish_ret(&ctx, hash); + } + mbedtls_sha256_free(&ctx); +#endif + +#if 0 + //Hardware high level 62KH/s + esp_sha_acquire_hardware(); + for (int i = 0; i < test_count; ++i) + { + esp_sha_dma(SHA2_256, s_test_buffer+64, 64, s_test_buffer, 64, true); + esp_sha_read_digest_state(SHA2_256, interResult); + esp_sha_dma(SHA2_256, 0, 0, interResult, 64, true); + esp_sha_read_digest_state(SHA2_256, hash); + } + esp_sha_release_hardware(); +#endif + +#if 0 + //Hardware block + //NOT avaliable + esp_sha_lock_engine(SHA2_256); + for (int i = 0; i < test_count; ++i) + { + esp_sha_block(SHA2_256, s_test_buffer, true); + esp_sha_block(SHA2_256, s_test_buffer+64, false); + esp_sha_read_digest_state(SHA2_256, interResult); + esp_sha_block(SHA2_256, interResult, true); + esp_sha_read_digest_state(SHA2_256, hash); + } + esp_sha_unlock_engine(SHA2_256); +#endif + +#if 0 + //Hardware low level 132KH/s + esp_sha_acquire_hardware(); + for (int i = 0; i < test_count; ++i) + { + sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); + sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); + sha_hal_read_digest(SHA2_256, interResult); + sha_hal_hash_block(SHA2_256, interResult, 64/4, true); + sha_hal_read_digest(SHA2_256, hash); + } + esp_sha_release_hardware(); +#endif + +#if 0 + //Hardware low level + midstate 156KH/s + esp_sha_acquire_hardware(); + sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); + sha_hal_read_digest(SHA2_256, midstate); + for (int i = 0; i < test_count; ++i) + { + sha_hal_write_digest(SHA2_256, midstate); + sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); + sha_hal_read_digest(SHA2_256, interResult); + sha_hal_hash_block(SHA2_256, interResult, 64/4, true); + sha_hal_read_digest(SHA2_256, hash); + } + esp_sha_release_hardware(); +#endif + +#if 0 + //Hardware low level + midstate + aligned 156KH/s (No sense) + esp_sha_acquire_hardware(); + sha_hal_hash_block(SHA2_256, s_test_buffer_aligned, 64/4, true); + sha_hal_read_digest(SHA2_256, midstate_aligned); + for (int i = 0; i < test_count; ++i) + { + sha_hal_write_digest(SHA2_256, midstate_aligned); + sha_hal_hash_block(SHA2_256, s_test_buffer_aligned+64, 64/4, false); + sha_hal_read_digest(SHA2_256, interResult_aligned); + sha_hal_hash_block(SHA2_256, interResult_aligned, 64/4, true); + sha_hal_read_digest(SHA2_256, hash_aligned); + } + esp_sha_release_hardware(); + memcpy(hash, hash_aligned, sizeof(hash_aligned)); +#endif + +#if 1 + //Hardware LL 161KH/s + esp_sha_acquire_hardware(); + //sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); + sha_hal_wait_idle(); + sha_ll_fill_text_block(s_test_buffer, 64/4); + sha_ll_start_block(SHA2_256); + + //sha_hal_read_digest(SHA2_256, midstate); + sha_ll_load(SHA2_256); + sha_hal_wait_idle(); + sha_ll_read_digest(SHA2_256, midstate, 256 / 32); + + for (int i = 0; i < test_count; ++i) + { + //sha_hal_write_digest(SHA2_256, midstate); + sha_ll_write_digest(SHA2_256, midstate, 256 / 32); + + //sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); + sha_hal_wait_idle(); + sha_ll_fill_text_block(s_test_buffer+64, 64/4); + sha_ll_continue_block(SHA2_256); + + //sha_hal_read_digest(SHA2_256, interResult); + sha_ll_load(SHA2_256); + sha_hal_wait_idle(); + sha_ll_read_digest(SHA2_256, interResult, 256 / 32); + + //sha_hal_hash_block(SHA2_256, interResult, 64/4, true); + sha_hal_wait_idle(); + sha_ll_fill_text_block(interResult, 64/4); + sha_ll_start_block(SHA2_256); + + //sha_hal_read_digest(SHA2_256, hash); + sha_ll_load(SHA2_256); + sha_hal_wait_idle(); + sha_ll_read_digest(SHA2_256, hash, 256 / 32); + } + esp_sha_release_hardware(); +#endif + + uint32_t time_end = micros(); + double hash_rate = ((double)test_count * 1000000) / (double)(time_end - time_start); + Serial.print("DmaHashrate="); + Serial.print((int)hash_rate/1000); + Serial.println("KH/s"); + + Serial.print("interResult: "); + for (size_t i = 0; i < 32; i++) + Serial.printf("%02x", interResult[i]); + Serial.println(""); + + Serial.print("hash: "); + for (size_t i = 0; i < 32; i++) + Serial.printf("%02x", hash[i]); + Serial.println(""); + + //should be + //6fa464b007f2d577edfa5dfe9dfc3f9209f36d1a6711d314ea68ccdd03000000 +} + +#endif + /********* INIT *****/ void setup() { @@ -74,6 +292,13 @@ void setup() disableCore0WDT(); //disableCore1WDT(); +#ifdef HW_SHA256_TEST + while (1) + { + HwShaTest(); + } +#endif + // Setup the buttons #if defined(PIN_BUTTON_1) && !defined(PIN_BUTTON_2) //One button device button1.setPressMs(5*SECOND_MS); diff --git a/src/ShaTests/nerdSHA256plus.cpp b/src/ShaTests/nerdSHA256plus.cpp index fd457b4..7ad54cb 100644 --- a/src/ShaTests/nerdSHA256plus.cpp +++ b/src/ShaTests/nerdSHA256plus.cpp @@ -208,7 +208,7 @@ IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, uint8_t* dataIn, uint8 uint32_t* buffer32; //*********** Init 1rst SHA *********** - uint32_t W[16] = { GET_UINT32_BE(dataIn, 0), GET_UINT32_BE(dataIn, 4), + uint32_t W[64] = { GET_UINT32_BE(dataIn, 0), GET_UINT32_BE(dataIn, 4), GET_UINT32_BE(dataIn, 8), GET_UINT32_BE(dataIn, 12), 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 640}; @@ -380,12 +380,17 @@ IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, uint8_t* dataIn, uint8 P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(58), K[58]); P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(59), K[59]); P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(60), K[60]); + if ((uint32_t)(A[7] & 0xFFFF) != 0x32E7) + { + doubleHash[30] = 0xFF; + return false; + } P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(61), K[61]); P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(62), K[62]); P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(63), K[63]); PUT_UINT32_BE(0x5BE0CD19 + A[7], doubleHash, 28); - if(doubleHash[31] !=0 || doubleHash[30] !=0) return false; + //if(doubleHash[31] !=0 || doubleHash[30] !=0) return false; PUT_UINT32_BE(0x6A09E667 + A[0], doubleHash, 0); PUT_UINT32_BE(0xBB67AE85 + A[1], doubleHash, 4); PUT_UINT32_BE(0x3C6EF372 + A[2], doubleHash, 8); diff --git a/src/mining.cpp b/src/mining.cpp index 8e529a9..9f9a653 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -13,6 +13,17 @@ #include "timeconst.h" #include "drivers/displays/display.h" #include "drivers/storage/storage.h" +#include +#include "mbedtls/sha256.h" + +//#define SHA256_VALIDATE +#define HARDWARE_SHA265 + +#ifdef HARDWARE_SHA265 +#include +#include +#include +#endif nvs_handle_t stat_handle; @@ -23,8 +34,15 @@ uint32_t totalKHashes = 0; uint32_t elapsedKHs = 0; uint64_t upTime = 0; -uint32_t shares; // increase if blockhash has 32 bits of zeroes -uint32_t valids; // increased if blockhash <= target +volatile uint32_t shares; // increase if blockhash has 32 bits of zeroes +volatile uint32_t valids; // increased if blockhash <= target + +static std::mutex s_nonce_batch_mutex; +static volatile uint32_t s_nonce_batch = 0; + +static volatile uint8_t s_thread_busy[2] = {0, 0}; +static volatile uint32_t s_thread_task_id = 0; +static volatile uint32_t s_thread_task_aborted_id = 0; // Track best diff double best_diff = 0.0; @@ -37,6 +55,7 @@ IPAddress serverIP(1, 1, 1, 1); //Temporally save poolIPaddres //Global work data static WiFiClient client; +static std::mutex s_client_mutex; static miner_data mMiner; //Global miner data (Create a miner class TODO) mining_subscribe mWorker; mining_job mJob; @@ -91,7 +110,10 @@ bool checkPoolInactivity(unsigned int keepAliveTime, unsigned long inactivityTim mLastTXtoPool = millis(); Serial.println(" Sending : KeepAlive suggest_difficulty"); //if (client.print("{}\n") == 0) { + { + std::lock_guard lock(s_client_mutex); tx_suggest_difficulty(client, DEFAULT_DIFFICULTY); + } /*if(tx_suggest_difficulty(client, DEFAULT_DIFFICULTY)){ Serial.println(" Sending keepAlive to pool -> Detected client disconnected"); return true; @@ -144,7 +166,7 @@ void runStratumWorker(void *name) { if(!isMinerSuscribed){ //Stop miner current jobs - mMiner.inRun = false; + s_thread_task_aborted_id = s_thread_task_id; mWorker = init_mining_subscribe(); // STEP 1: Pool server connection (SUBSCRIBE) @@ -170,32 +192,50 @@ void runStratumWorker(void *name) { if(checkPoolInactivity(KEEPALIVE_TIME_ms, POOLINACTIVITY_TIME_ms)){ //Restart connection Serial.println(" Detected more than 2 min without data form stratum server. Closing socket and reopening..."); - client.stop(); + { + std::lock_guard lock(s_client_mutex); + client.stop(); + } + s_thread_task_aborted_id = s_thread_task_id; isMinerSuscribed=false; continue; } //Read pending messages from pool - while(client.connected() && client.available()){ + while(true) + { + String line; + { + std::lock_guard lock(s_client_mutex); + if (!client.connected() || !client.available()) + break; + line = client.readStringUntil('\n'); + } - Serial.println(" Received message from pool"); - String line = client.readStringUntil('\n'); + Serial.println(" Received message from pool"); stratum_method result = parse_mining_method(line); switch (result) { case STRATUM_PARSE_ERROR: Serial.println(" Parsed JSON: error on JSON"); break; - case MINING_NOTIFY: if(parse_mining_notify(line, mJob)){ + case MINING_NOTIFY: if(parse_mining_notify(line, mJob)) + { //Increse templates readed templates++; //Stop miner current jobs - mMiner.inRun = false; + //mMiner.inRun = false; + s_thread_task_aborted_id = s_thread_task_id; + while (s_thread_busy[0] || s_thread_busy[1]) + {} + //Prepare data for new jobs mMiner=calculateMiningData(mWorker,mJob); mMiner.poolDifficulty = currentPoolDifficulty; - mMiner.newJob = true; - mMiner.newJob2 = true; + { + std::lock_guard lock(s_nonce_batch_mutex); + s_nonce_batch = TARGET_NONCE - MAX_NONCE; + } + s_thread_task_id++; //Give new job to miner - } break; case MINING_SET_DIFFICULTY: parse_mining_set_difficulty(line, currentPoolDifficulty); @@ -225,58 +265,220 @@ void runMiner(void * task_id) { Serial.printf("[MINER] %d Started runMiner Task!\n", miner_id); - while(1){ + uint32_t task_current_id = 0; + while(1) + { //Wait new job - while(1){ - if(mMiner.newJob==true || mMiner.newJob2==true) break; - vTaskDelay(100 / portTICK_PERIOD_MS); //Small delay - } - vTaskDelay(10 / portTICK_PERIOD_MS); //Small delay to join both mining threads + s_thread_busy[miner_id] = 0; + while (task_current_id == s_thread_task_id) + vTaskDelay(1 / portTICK_PERIOD_MS); //Small delay to join both mining threads - if(mMiner.newJob) - mMiner.newJob = false; //Clear newJob flag - else if(mMiner.newJob2) - mMiner.newJob2 = false; //Clear newJob flag - mMiner.inRun = true; //Set inRun flag + task_current_id = s_thread_task_id; + s_thread_busy[miner_id] = 1; + Serial.printf("[MINER] %d Task=%d\n", miner_id, task_current_id); + mMonitor.NerdStatus = NM_hashing; //Prepare Premining data nerdSHA256_context nerdMidstate; //NerdShaplus uint8_t hash[32]; + uint8_t interResult[64]; + uint8_t hash_validate[32]; + uint8_t midstate[32]; - //Calcular midstate - nerd_mids(&nerdMidstate, mMiner.bytearray_blockheader); //NerdShaplus - - - // search a valid nonce - unsigned long nonce = TARGET_NONCE - MAX_NONCE; - // split up odd/even nonces between miner tasks - nonce += miner_id; - uint32_t startT = micros(); unsigned char *header64; - // each miner thread needs to track its own blockheader template - uint8_t temp; - - memcpy(mMiner.bytearray_blockheader2, &mMiner.bytearray_blockheader, 80); + //Calcular midstate if (miner_id == 0) + { + #ifdef HARDWARE_SHA265 + esp_sha_acquire_hardware(); + sha_hal_hash_block(SHA2_256, mMiner.bytearray_blockheader, 64/4, true); + sha_hal_read_digest(SHA2_256, midstate); + esp_sha_release_hardware(); + + memset(mMiner.bytearray_blockheader+80, 0, 128-80); + mMiner.bytearray_blockheader[80] = 0x80; + mMiner.bytearray_blockheader[126] = 0x02; + mMiner.bytearray_blockheader[127] = 0x80; + + memset(interResult, 0, sizeof(interResult)); + interResult[32] = 0x80; + interResult[62] = 0x01; + interResult[63] = 0x00; + + #else + nerd_mids(&nerdMidstate, mMiner.bytearray_blockheader); //NerdShaplus + #endif header64 = mMiner.bytearray_blockheader + 64; - else + } else + { + memcpy(mMiner.bytearray_blockheader2, &mMiner.bytearray_blockheader, 80); + nerd_mids(&nerdMidstate, mMiner.bytearray_blockheader2); //NerdShaplus header64 = mMiner.bytearray_blockheader2 + 64; + } + + uint32_t nonce = 0; + uint32_t nonce_end = 0; + uint32_t startT = micros(); + + // each miner thread needs to track its own blockheader template + uint8_t temp; bool is16BitShare=true; Serial.println(">>> STARTING TO HASH NONCES"); - while(true) { + while(true) + { + if (nonce >= nonce_end) + { + std::lock_guard lock(s_nonce_batch_mutex); + nonce = s_nonce_batch; +#ifdef HARDWARE_SHA265 + if (miner_id == 0) + nonce_end = nonce + 512; + else +#endif + nonce_end = nonce + 128; + if (nonce_end > TARGET_NONCE) + nonce_end = TARGET_NONCE; + s_nonce_batch = nonce_end; + } + +#ifdef HARDWARE_SHA265 if (miner_id == 0) - memcpy(mMiner.bytearray_blockheader + 76, &nonce, 4); - else - memcpy(mMiner.bytearray_blockheader2 + 76, &nonce, 4); + { + //Hardware + esp_sha_acquire_hardware(); + while (nonce < nonce_end) + { + memcpy(header64+12, &nonce, 4); + + sha_ll_write_digest(SHA2_256, midstate, 256 / 32); + sha_hal_wait_idle(); + sha_ll_fill_text_block(header64, 64/4); + sha_ll_continue_block(SHA2_256); + + sha_ll_load(SHA2_256); + sha_hal_wait_idle(); + sha_ll_read_digest(SHA2_256, interResult, 256 / 32); + + sha_hal_wait_idle(); + sha_ll_fill_text_block(interResult, 64/4); + sha_ll_start_block(SHA2_256); + sha_ll_load(SHA2_256); + sha_hal_wait_idle(); + sha_ll_read_digest(SHA2_256, hash, 256 / 32); + #ifdef SHA256_VALIDATE + mbedtls_sha256_context ctx; + mbedtls_sha256_init(&ctx); + mbedtls_sha256_starts_ret(&ctx,0); + mbedtls_sha256_update_ret(&ctx, header64-64, 80); + mbedtls_sha256_finish_ret(&ctx, interResult); - //nerd_double_sha2(&nerdMidstate, header64, hash); - is16BitShare=nerd_sha256d(&nerdMidstate, header64, hash); //Boosted 80Khs sha + mbedtls_sha256_starts_ret(&ctx,0); + mbedtls_sha256_update_ret(&ctx, interResult, 32); + mbedtls_sha256_finish_ret(&ctx, hash_validate); + mbedtls_sha256_free(&ctx); + + bool failed = false; + for (size_t i = 0; i < 32; i++) + { + if (hash[i] != hash_validate[i]) + failed = true; + } + if (failed) + { + Serial.println("Hardware SHA256 Failed"); + Serial.println("HwSha256:"); + for (size_t i = 0; i < 32; i++) + Serial.printf("%02x,", hash[i]); + Serial.println(""); + + Serial.println("mbedtls Sha256:"); + for (size_t i = 0; i < 32; i++) + Serial.printf("%02x,", hash_validate[i]); + Serial.println(""); + vTaskDelay(500 / portTICK_PERIOD_MS); + return; //Crash Here + } + #endif + + hashes++; + nonce++; + + if(hash[31] == 0 && hash[30] == 0) + break; + } + esp_sha_release_hardware(); + } else +#endif + { + while (nonce < nonce_end) + { + memcpy(header64+12, &nonce, 4); + nerd_sha256d(&nerdMidstate, header64, hash); //Boosted 80Khs sha + + #ifdef SHA256_VALIDATE + //Important - Remove Return optimization + mbedtls_sha256_context ctx; + mbedtls_sha256_init(&ctx); + mbedtls_sha256_starts_ret(&ctx,0); + mbedtls_sha256_update_ret(&ctx, header64-64, 80); + mbedtls_sha256_finish_ret(&ctx, interResult); + + mbedtls_sha256_starts_ret(&ctx,0); + mbedtls_sha256_update_ret(&ctx, interResult, 32); + mbedtls_sha256_finish_ret(&ctx, hash_validate); + mbedtls_sha256_free(&ctx); + + bool failed = false; + for (size_t i = 0; i < 32; i++) + { + if (hash[i] != hash_validate[i]) + failed = true; + } + if (failed) + { + Serial.println("SHA256 Failed"); + Serial.println("Input:"); + for (size_t i = 0; i < 80; i++) + { + Serial.printf("0x%02x,", (header64-64)[i]); + if (i % 16 == 15) + Serial.println(""); + } + Serial.println(""); + + Serial.println("Midstate:"); + for (size_t i = 0; i < 8; i++) + { + Serial.printf("0x%08x,", nerdMidstate.digest[i]); + Serial.println(""); + } + + Serial.println("NerdSha256:"); + for (size_t i = 0; i < 32; i++) + Serial.printf("%02x,", hash[i]); + Serial.println(""); + + Serial.println("mbedtls Sha256:"); + for (size_t i = 0; i < 32; i++) + Serial.printf("%02x,", hash_validate[i]); + Serial.println(""); + vTaskDelay(500 / portTICK_PERIOD_MS); + return; //Crash Here + } + #endif + hashes++; + nonce++; + + if(hash[31] == 0 && hash[30] == 0) + break; + } + } /*Serial.print("hash1: "); for (size_t i = 0; i < 32; i++) @@ -287,17 +489,19 @@ void runMiner(void * task_id) { Serial.printf("%02x", hash2[i]); Serial.println(""); */ - hashes++; - if (nonce > TARGET_NONCE) break; //exit - if(!mMiner.inRun) { Serial.println ("MINER WORK ABORTED >> waiting new job"); break;} + if (nonce >= TARGET_NONCE) + break; //exit + + + if(task_current_id <= s_thread_task_aborted_id) + { + Serial.printf("MINER %d WORK ABORTED Task=%d\n", miner_id); + break; + } // check if 16bit share - if(hash[31] !=0 || hash[30] !=0) { - //if(!is16BitShare){ - // increment nonce - nonce += 2; + if(hash[31] !=0 || hash[30] !=0) continue; - } //Check target to submit //Difficulty of 1 > 0x00000000FFFF0000000000000000000000000000000000000000000000000000 @@ -312,7 +516,10 @@ void runMiner(void * task_id) { if(diff_hash > mMiner.poolDifficulty)//(hash[29] <= 0x3B)//(diff_hash > 1e-9) { - tx_mining_submit(client, mWorker, mJob, nonce); + { + std::lock_guard lock(s_client_mutex); + tx_mining_submit(client, mWorker, mJob, nonce-1); + } Serial.print(" - Current diff share: "); Serial.println(diff_hash,12); Serial.print(" - Current pool diff : "); Serial.println(mMiner.poolDifficulty,12); Serial.print(" - TX SHARE: "); @@ -331,11 +538,8 @@ void runMiner(void * task_id) { } // check if 32bit share - if(hash[29] !=0 || hash[28] !=0) { - // increment nonce - nonce += 2; + if(hash[29] !=0 || hash[28] !=0) continue; - } shares++; // check if valid header @@ -346,17 +550,14 @@ void runMiner(void * task_id) { // wait for new job break; } - // increment nonce - nonce += 2; } // exit if found a valid result or nonce > MAX_NONCE //wc_Sha256Free(&sha256); - //wc_Sha256Free(midstate); - - mMiner.inRun = false; + //wc_Sha256Free(midstate); Serial.print(">>> Finished job waiting new data from pool"); - if(hashes>=MAX_NONCE_STEP) { + if(hashes>=MAX_NONCE_STEP) + { Mhashes=Mhashes+MAX_NONCE_STEP/1000000; hashes=hashes-MAX_NONCE_STEP; } @@ -364,7 +565,7 @@ void runMiner(void * task_id) { uint32_t duration = micros() - startT; if (esp_task_wdt_reset() == ESP_OK) Serial.print(">>> Resetting watchdog timer"); - } + } //while (1) } #define DELAY 100 @@ -383,8 +584,11 @@ void restoreStat() { size_t required_size = sizeof(double); nvs_get_blob(stat_handle, "best_diff", &best_diff, &required_size); nvs_get_u32(stat_handle, "Mhashes", &Mhashes); - nvs_get_u32(stat_handle, "shares", &shares); - nvs_get_u32(stat_handle, "valids", &valids); + uint32_t nv_shares, nv_valids; + nvs_get_u32(stat_handle, "shares", &nv_shares); + nvs_get_u32(stat_handle, "valids", &nv_valids); + shares = nv_shares; + valids = nv_valids; nvs_get_u32(stat_handle, "templates", &templates); nvs_get_u64(stat_handle, "upTime", &upTime); } @@ -439,7 +643,7 @@ void runMonitor(void *name) if (elapsedKHs == 0) { Serial.printf(">>> [i] Miner: newJob>%s / inRun>%s) - Client: connected>%s / subscribed>%s / wificonnected>%s\n", - mMiner.newJob ? "true" : "false", mMiner.inRun ? "true" : "false", + (s_thread_task_id != s_thread_task_aborted_id) ? "true" : "false", s_thread_busy[0] ? "true" : "false", client.connected() ? "true" : "false", isMinerSuscribed ? "true" : "false", WiFi.status() == WL_CONNECTED ? "true" : "false"); } diff --git a/src/mining.h b/src/mining.h index a1adf14..c45a68e 100644 --- a/src/mining.h +++ b/src/mining.h @@ -6,7 +6,7 @@ #define MAX_NONCE_STEP 5000000U #define MAX_NONCE 25000000U #define TARGET_NONCE 471136297U -#define DEFAULT_DIFFICULTY 1e-4 +#define DEFAULT_DIFFICULTY 0.0002 #define KEEPALIVE_TIME_ms 30000 #define POOLINACTIVITY_TIME_ms 60000 @@ -23,13 +23,10 @@ typedef struct{ uint8_t bytearray_target[32]; uint8_t bytearray_pooltarget[32]; uint8_t merkle_result[32]; - uint8_t bytearray_blockheader[80]; + uint8_t bytearray_blockheader[128]; uint8_t bytearray_blockheader2[80]; double poolDifficulty; - bool inRun; - bool newJob; - bool newJob2; -}miner_data; +} miner_data; #endif // UTILS_API_H \ No newline at end of file diff --git a/src/utils.cpp b/src/utils.cpp index 20120be..afd305f 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -174,8 +174,6 @@ miner_data init_miner_data(void){ miner_data newMinerData; newMinerData.poolDifficulty = DEFAULT_DIFFICULTY; - newMinerData.inRun = false; - newMinerData.newJob = false; return newMinerData; } From 1095645366de1424baae05b0a42b38386fb667c3 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Sat, 25 Jan 2025 14:19:10 +0200 Subject: [PATCH 02/39] Log fix --- src/mining.cpp | 2 +- src/mining.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mining.cpp b/src/mining.cpp index 9f9a653..205b21a 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -495,7 +495,7 @@ void runMiner(void * task_id) { if(task_current_id <= s_thread_task_aborted_id) { - Serial.printf("MINER %d WORK ABORTED Task=%d\n", miner_id); + Serial.printf("MINER %d WORK ABORTED Task=%d Abort=%d\n", miner_id, task_current_id, s_thread_task_aborted_id); break; } diff --git a/src/mining.h b/src/mining.h index c45a68e..11b27cb 100644 --- a/src/mining.h +++ b/src/mining.h @@ -6,7 +6,7 @@ #define MAX_NONCE_STEP 5000000U #define MAX_NONCE 25000000U #define TARGET_NONCE 471136297U -#define DEFAULT_DIFFICULTY 0.0002 +#define DEFAULT_DIFFICULTY 0.00015 #define KEEPALIVE_TIME_ms 30000 #define POOLINACTIVITY_TIME_ms 60000 From c44a115b8ba6711bfc354473f44b4d6e1470001a Mon Sep 17 00:00:00 2001 From: evgenykz Date: Sat, 25 Jan 2025 17:34:36 +0200 Subject: [PATCH 03/39] nerd sha256 bake optimization --- src/ShaTests/nerdSHA256plus.cpp | 290 +++++++++++++++++++++++++++++++- src/ShaTests/nerdSHA256plus.h | 3 + src/mining.cpp | 32 +++- 3 files changed, 323 insertions(+), 2 deletions(-) diff --git a/src/ShaTests/nerdSHA256plus.cpp b/src/ShaTests/nerdSHA256plus.cpp index 7ad54cb..fb50d68 100644 --- a/src/ShaTests/nerdSHA256plus.cpp +++ b/src/ShaTests/nerdSHA256plus.cpp @@ -400,4 +400,292 @@ IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, uint8_t* dataIn, uint8 PUT_UINT32_BE(0x1F83D9AB + A[6], doubleHash, 24); return true; -} \ No newline at end of file +} + + +IRAM_ATTR void nerd_sha256_bake(const uint32_t* digest, uint8_t* dataIn, uint32_t* bake) //15 words +{ + bake[0] = GET_UINT32_BE(dataIn, 0); + bake[1] = GET_UINT32_BE(dataIn, 4); + bake[2] = GET_UINT32_BE(dataIn, 8); + //w[3] = GET_UINT32_BE(dataIn, 12); + + bake[3] = S1( 0) + 0 + S0(bake[1]) + bake[0]; + bake[4] = S1(640) + 0 + S0(bake[2]) + bake[1]; + + uint32_t* a = bake + 5; + a[0] = digest[0]; + a[1] = digest[1]; + a[2] = digest[2]; + a[3] = digest[3]; + a[4] = digest[4]; + a[5] = digest[5]; + a[6] = digest[6]; + a[7] = digest[7]; + + uint32_t temp1, temp2; + P(a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], bake[0], K[0]); + P(a[7], a[0], a[1], a[2], a[3], a[4], a[5], a[6], bake[1], K[1]); + P(a[6], a[7], a[0], a[1], a[2], a[3], a[4], a[5], bake[2], K[2]); + + //P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], W[3], K[3]); + //P(a, b, c, d, e, f, g, h, x, K) + bake[13] = a[4] + S3(a[1]) + F1(a[1], a[2], a[3]) + K[3];// + x; + bake[14] = S2(a[5]) + F0(a[5], a[6], a[7]); +} + + +IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, uint8_t* dataIn, const uint32_t* bake, uint8_t* doubleHash) +{ + uint32_t temp1, temp2; + uint8_t temp3, temp4; + uint32_t* buffer32; + //*********** Init 1rst SHA *********** + + //W0 W1 W2 is same !! + uint32_t W[64] = { bake[0], bake[1], bake[2], GET_UINT32_BE(dataIn, 12), + 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 640 }; + W[16] = bake[3]; + W[17] = bake[4]; + + const uint32_t* a = bake + 5; + uint32_t A[8] = { a[0], a[1], a[2], a[3], + a[4], a[5], a[6], a[7] }; + + union { + uint32_t num; + uint8_t b[4]; + } u; + uint8_t* p = NULL; + + uint8_t i; + + //P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], W[0], K[0]); + //P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], W[1], K[1]); + //P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], W[2], K[2]); + + //P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], W[3], K[3]); + //P(a, b, c, d, e, f, g, h, x, K) + temp1 = bake[13] + W[3]; + temp2 = bake[14]; + A[0] += temp1; + A[4] = temp1 + temp2; + + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], W[4], K[4]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], W[5], K[5]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], W[6], K[6]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], W[7], K[7]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], W[8], K[8]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], W[9], K[9]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], W[10], K[10]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], W[11], K[11]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], W[12], K[12]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], W[13], K[13]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], W[14], K[14]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], W[15], K[15]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], W[16], K[16]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], W[17], K[17]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(18), K[18]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(19), K[19]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(20), K[20]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(21), K[21]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(22), K[22]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(23), K[23]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(24), K[24]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(25), K[25]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(26), K[26]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(27), K[27]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(28), K[28]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(29), K[29]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(30), K[30]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(31), K[31]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(32), K[32]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(33), K[33]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(34), K[34]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(35), K[35]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(36), K[36]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(37), K[37]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(38), K[38]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(39), K[39]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(40), K[40]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(41), K[41]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(42), K[42]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(43), K[43]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(44), K[44]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(45), K[45]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(46), K[46]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(47), K[47]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(48), K[48]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(49), K[49]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(50), K[50]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(51), K[51]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(52), K[52]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(53), K[53]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(54), K[54]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(55), K[55]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(56), K[56]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(57), K[57]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(58), K[58]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(59), K[59]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(60), K[60]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(61), K[61]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(62), K[62]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(63), K[63]); + + //*********** end SHA_finish *********** + + /* Calculate the second hash (double SHA-256) */ + + W[0] = A[0] + digest[0]; + W[1] = A[1] + digest[1]; + W[2] = A[2] + digest[2]; + W[3] = A[3] + digest[3]; + W[4] = A[4] + digest[4]; + W[5] = A[5] + digest[5]; + W[6] = A[6] + digest[6]; + W[7] = A[7] + digest[7]; + W[8] = 0x80000000; + W[9] = 0; + W[10] = 0; + W[11] = 0; + W[12] = 0; + W[13] = 0; + W[14] = 0; + W[15] = 256; + + + A[0] = 0x6A09E667; + A[1] = 0xBB67AE85; + A[2] = 0x3C6EF372; + A[3] = 0xA54FF53A; + A[4] = 0x510E527F; + A[5] = 0x9B05688C; + A[6] = 0x1F83D9AB; + A[7] = 0x5BE0CD19; + + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], W[0], K[0]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], W[1], K[1]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], W[2], K[2]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], W[3], K[3]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], W[4], K[4]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], W[5], K[5]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], W[6], K[6]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], W[7], K[7]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], W[8], K[8]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], W[9], K[9]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], W[10], K[10]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], W[11], K[11]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], W[12], K[12]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], W[13], K[13]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], W[14], K[14]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], W[15], K[15]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(16), K[16]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(17), K[17]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(18), K[18]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(19), K[19]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(20), K[20]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(21), K[21]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(22), K[22]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(23), K[23]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(24), K[24]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(25), K[25]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(26), K[26]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(27), K[27]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(28), K[28]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(29), K[29]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(30), K[30]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(31), K[31]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(32), K[32]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(33), K[33]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(34), K[34]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(35), K[35]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(36), K[36]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(37), K[37]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(38), K[38]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(39), K[39]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(40), K[40]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(41), K[41]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(42), K[42]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(43), K[43]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(44), K[44]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(45), K[45]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(46), K[46]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(47), K[47]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(48), K[48]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(49), K[49]); + P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(50), K[50]); + P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(51), K[51]); + P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(52), K[52]); + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(53), K[53]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(54), K[54]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(55), K[55]); + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(56), K[56]); + P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(57), K[57]); + + //Unroll 58 + //P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(58), K[58]); + //P(a, b, c, d, e, f, g, h, x, K) + uint32_t z1 = A[5] + S3(A[2]) + F1(A[2], A[3], A[4]) + K[58] + R(58); + //uint32_t z2 = S2(A[6]) + F0(A[6], A[7], A[0]); + uint32_t z0 = A[0]; + A[1] += z1; + //A[5] = z1 + z2; + + //Unroll 59 + //P(A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(59), K[59]); + //P(a, b, c, d, e, f, g, h, x, K) + uint32_t t1 = A[4] + S3(A[1]) + F1(A[1], A[2], A[3]) + K[59] + R(59); + //uint32_t t2 = S2(A[5]) + F0(A[5], A[6], A[7]); + A[0] += t1; + //A[4] = t1 + t2; + + //Unroll 60 + //P(A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(60), K[60]); + //P(a, b, c, d, e, f, g, h, x, K) + temp1 = A[3] + S3(A[0]) + F1(A[0], A[1], A[2]) + K[60] + R(60); + uint32_t a7 = A[7] + temp1; + if ((uint32_t)(a7 & 0xFFFF) != 0x32E7) + { + doubleHash[30] = 0xFF; + return; + } + + //Post 58 + uint32_t z2 = S2(A[6]) + F0(A[6], A[7], z0); + A[5] = z1 + z2; + + //Post 59 + uint32_t t2 = S2(A[5]) + F0(A[5], A[6], A[7]); + //uint32_t t2 = S2(A[5]) + F0(A[5], A[6], a7); + A[4] = t1 + t2; + + //Post 60 + A[7] = a7; + temp2 = S2(A[4]) + F0(A[4], A[5], A[6]); + A[3] = temp1 + temp2; + + P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(61), K[61]); + P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(62), K[62]); + P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(63), K[63]); + +#if 1 + temp1 = 0x6A09E667 + A[0]; ((uint32_t*)doubleHash)[0] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0xBB67AE85 + A[1]; ((uint32_t*)doubleHash)[1] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0x3C6EF372 + A[2]; ((uint32_t*)doubleHash)[2] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0xA54FF53A + A[3]; ((uint32_t*)doubleHash)[3] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0x510E527F + A[4]; ((uint32_t*)doubleHash)[4] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0x9B05688C + A[5]; ((uint32_t*)doubleHash)[5] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0x1F83D9AB + A[6]; ((uint32_t*)doubleHash)[6] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0x5BE0CD19 + A[7]; ((uint32_t*)doubleHash)[7] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); +#else + PUT_UINT32_BE(0x6A09E667 + A[0], doubleHash, 0); + PUT_UINT32_BE(0xBB67AE85 + A[1], doubleHash, 4); + PUT_UINT32_BE(0x3C6EF372 + A[2], doubleHash, 8); + PUT_UINT32_BE(0xA54FF53A + A[3], doubleHash, 12); + PUT_UINT32_BE(0x510E527F + A[4], doubleHash, 16); + PUT_UINT32_BE(0x9B05688C + A[5], doubleHash, 20); + PUT_UINT32_BE(0x1F83D9AB + A[6], doubleHash, 24); + PUT_UINT32_BE(0x5BE0CD19 + A[7], doubleHash, 28); +#endif +} diff --git a/src/ShaTests/nerdSHA256plus.h b/src/ShaTests/nerdSHA256plus.h index d3e1774..fccf463 100644 --- a/src/ShaTests/nerdSHA256plus.h +++ b/src/ShaTests/nerdSHA256plus.h @@ -29,6 +29,9 @@ IRAM_ATTR void nerd_mids(nerdSHA256_context* midstate, uint8_t* dataIn); IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, uint8_t* dataIn, uint8_t* doubleHash); +IRAM_ATTR void nerd_sha256_bake(const uint32_t* digest, uint8_t* dataIn, uint32_t* bake); //15 words +IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, uint8_t* dataIn, const uint32_t* bake, uint8_t* doubleHash); + void ByteReverseWords(uint32_t* out, const uint32_t* in, uint32_t byteCount); #endif /* nerdSHA256plus_H_ */ \ No newline at end of file diff --git a/src/mining.cpp b/src/mining.cpp index 205b21a..4b75bb1 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -287,6 +287,7 @@ void runMiner(void * task_id) { uint8_t interResult[64]; uint8_t hash_validate[32]; uint8_t midstate[32]; + uint32_t bake[16]; unsigned char *header64; @@ -318,6 +319,7 @@ void runMiner(void * task_id) { memcpy(mMiner.bytearray_blockheader2, &mMiner.bytearray_blockheader, 80); nerd_mids(&nerdMidstate, mMiner.bytearray_blockheader2); //NerdShaplus header64 = mMiner.bytearray_blockheader2 + 64; + nerd_sha256_bake(nerdMidstate.digest, header64, bake); } uint32_t nonce = 0; @@ -419,7 +421,8 @@ void runMiner(void * task_id) { while (nonce < nonce_end) { memcpy(header64+12, &nonce, 4); - nerd_sha256d(&nerdMidstate, header64, hash); //Boosted 80Khs sha + //nerd_sha256d(&nerdMidstate, header64, hash); //Boosted 80Khs sha + nerd_sha256d_baked(nerdMidstate.digest, header64, bake, hash); #ifdef SHA256_VALIDATE //Important - Remove Return optimization @@ -503,6 +506,33 @@ void runMiner(void * task_id) { if(hash[31] !=0 || hash[30] !=0) continue; +#if 0 + if (miner_id == 1) + { + //validate + mbedtls_sha256_context ctx; + mbedtls_sha256_init(&ctx); + mbedtls_sha256_starts_ret(&ctx,0); + mbedtls_sha256_update_ret(&ctx, header64-64, 80); + mbedtls_sha256_finish_ret(&ctx, interResult); + + mbedtls_sha256_starts_ret(&ctx,0); + mbedtls_sha256_update_ret(&ctx, interResult, 32); + mbedtls_sha256_finish_ret(&ctx, hash_validate); + mbedtls_sha256_free(&ctx); + + bool failed = false; + for (size_t i = 0; i < 32; i++) + { + if (hash[i] != hash_validate[i]) + failed = true; + } + if (failed) + Serial.printf("MINER %d Sha256 Fail\n", miner_id); + else + Serial.printf("MINER %d Sha256 Good\n", miner_id); + } +#endif //Check target to submit //Difficulty of 1 > 0x00000000FFFF0000000000000000000000000000000000000000000000000000 //NM2 pool diff 1e-9 > Target = diff_1 / diff_pool > 0x00003B9ACA00....00 From 1f3326d4f4c6a1a1b5ae4b5dc4c9572d143f0041 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Sat, 25 Jan 2025 21:27:23 +0200 Subject: [PATCH 04/39] ESP32-S2 fixes, 150KH/s --- src/NerdMinerV2.ino.cpp | 7 +++++-- src/mining.cpp | 8 ++++++-- src/utils.cpp | 18 ++++++++++++++---- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index af88135..585820b 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -19,6 +19,7 @@ #include "TouchHandler.h" #endif +#include //#define HW_SHA256_TEST //3 seconds WDT @@ -368,10 +369,12 @@ void setup() //BaseType_t res = xTaskCreate(runWorker, name, 35000, (void*)name, 1, NULL); TaskHandle_t minerTask1, minerTask2 = NULL; xTaskCreate(runMiner, "Miner0", 6000, (void*)0, 1, &minerTask1); - xTaskCreate(runMiner, "Miner1", 6000, (void*)1, 1, &minerTask2); - esp_task_wdt_add(minerTask1); + +#if (SOC_CPU_CORES_NUM >= 2) + xTaskCreate(runMiner, "Miner1", 6000, (void*)1, 1, &minerTask2); esp_task_wdt_add(minerTask2); +#endif /******** MONITOR SETUP *****/ setup_monitor(); diff --git a/src/mining.cpp b/src/mining.cpp index 4b75bb1..be4d1a8 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -224,8 +224,12 @@ void runStratumWorker(void *name) { //Stop miner current jobs //mMiner.inRun = false; s_thread_task_aborted_id = s_thread_task_id; - while (s_thread_busy[0] || s_thread_busy[1]) - {} + + #if (SOC_CPU_CORES_NUM >= 2) + while (s_thread_busy[0] || s_thread_busy[1]) { vTaskDelay(1 / portTICK_PERIOD_MS); } + #else + while (s_thread_busy[0]) { vTaskDelay(1 / portTICK_PERIOD_MS); } + #endif //Prepare data for new jobs mMiner=calculateMiningData(mWorker,mJob); diff --git a/src/utils.cpp b/src/utils.cpp index afd305f..40699af 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -202,10 +202,20 @@ miner_data calculateMiningData(mining_subscribe& mWorker, mining_job mJob){ // get extranonce2 - extranonce2 = hex(random.randint(0,2**32-1))[2:].zfill(2*extranonce2_size) //To review - char extranonce2_char[2 * mWorker.extranonce2_size+1]; - mWorker.extranonce2.toCharArray(extranonce2_char, 2 * mWorker.extranonce2_size + 1); - getNextExtranonce2(mWorker.extranonce2_size, extranonce2_char); - mWorker.extranonce2 = String(extranonce2_char); + //char extranonce2_char[2 * mWorker.extranonce2_size+1]; + //mWorker.extranonce2.toCharArray(extranonce2_char, 2 * mWorker.extranonce2_size + 1); + //getNextExtranonce2(mWorker.extranonce2_size, extranonce2_char); + if (mWorker.extranonce2_size == 2) + mWorker.extranonce2 = "0001"; + else if (mWorker.extranonce2_size == 4) + mWorker.extranonce2 = "00000001"; + else if (mWorker.extranonce2_size == 8) + mWorker.extranonce2 = "0000000000000001"; + else + { + Serial.println("Unknown extranonce2"); + mWorker.extranonce2 = "00000001"; + } //mWorker.extranonce2 = "00000002"; //get coinbase - coinbase_hash_bin = hashlib.sha256(hashlib.sha256(binascii.unhexlify(coinbase)).digest()).digest() From f71b57f10e9ba49902186f8b1fdf7be0d3fe0add Mon Sep 17 00:00:00 2001 From: evgenykz Date: Sat, 25 Jan 2025 22:19:57 +0200 Subject: [PATCH 05/39] const fixes, disable hardware sha256 for esp32D --- src/NerdMinerV2.ino.cpp | 24 ++++++++++++++++++++++++ src/ShaTests/nerdSHA256plus.cpp | 8 ++++---- src/ShaTests/nerdSHA256plus.h | 8 ++++---- src/mining.cpp | 2 ++ 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index 585820b..a5013e6 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -56,6 +56,7 @@ const char* ntpServer = "pool.ntp.org"; #ifdef HW_SHA256_TEST +#include "ShaTests/nerdSHA256plus.h" #include "mbedtls/sha256.h" #include #include @@ -112,6 +113,8 @@ IRAM_ATTR void HwShaTest() interResult_aligned[62] = 0x01; interResult_aligned[63] = 0x00; + uint32_t bake[16]; + uint32_t time_start = micros(); const int test_count = 1000000; @@ -132,6 +135,27 @@ IRAM_ATTR void HwShaTest() mbedtls_sha256_free(&ctx); #endif +#if 0 + //nerdSha256 (ESP32 39KH/s) + nerdSHA256_context ctx; + nerd_mids(&ctx, s_test_buffer); + for (int i = 0; i < test_count; ++i) + { + nerd_sha256d(&ctx, s_test_buffer+64, hash); + } +#endif + +#if 0 + //nerdSha256 bake (ESP32 41KH/s) + nerdSHA256_context ctx; + nerd_mids(&ctx, s_test_buffer); + nerd_sha256_bake(ctx.digest, s_test_buffer+64, bake); //15 words + for (int i = 0; i < test_count; ++i) + { + nerd_sha256d_baked(ctx.digest, s_test_buffer+64, bake, hash); + } +#endif + #if 0 //Hardware high level 62KH/s esp_sha_acquire_hardware(); diff --git a/src/ShaTests/nerdSHA256plus.cpp b/src/ShaTests/nerdSHA256plus.cpp index fb50d68..287c1e0 100644 --- a/src/ShaTests/nerdSHA256plus.cpp +++ b/src/ShaTests/nerdSHA256plus.cpp @@ -101,7 +101,7 @@ void ByteReverseWords(uint32_t* out, const uint32_t* in, uint32_t byteCount) } -IRAM_ATTR void nerd_mids(nerdSHA256_context* midstate, uint8_t* dataIn) +IRAM_ATTR void nerd_mids(nerdSHA256_context* midstate, const uint8_t* dataIn) { uint32_t A[8] = { 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 }; @@ -201,7 +201,7 @@ IRAM_ATTR void nerd_mids(nerdSHA256_context* midstate, uint8_t* dataIn) } -IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, uint8_t* dataIn, uint8_t* doubleHash) +IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, const uint8_t* dataIn, uint8_t* doubleHash) { uint32_t temp1, temp2; uint8_t temp3, temp4; @@ -403,7 +403,7 @@ IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, uint8_t* dataIn, uint8 } -IRAM_ATTR void nerd_sha256_bake(const uint32_t* digest, uint8_t* dataIn, uint32_t* bake) //15 words +IRAM_ATTR void nerd_sha256_bake(const uint32_t* digest, const uint8_t* dataIn, uint32_t* bake) //15 words { bake[0] = GET_UINT32_BE(dataIn, 0); bake[1] = GET_UINT32_BE(dataIn, 4); @@ -435,7 +435,7 @@ IRAM_ATTR void nerd_sha256_bake(const uint32_t* digest, uint8_t* dataIn, uint32_ } -IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, uint8_t* dataIn, const uint32_t* bake, uint8_t* doubleHash) +IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, const uint8_t* dataIn, const uint32_t* bake, uint8_t* doubleHash) { uint32_t temp1, temp2; uint8_t temp3, temp4; diff --git a/src/ShaTests/nerdSHA256plus.h b/src/ShaTests/nerdSHA256plus.h index fccf463..da53033 100644 --- a/src/ShaTests/nerdSHA256plus.h +++ b/src/ShaTests/nerdSHA256plus.h @@ -25,12 +25,12 @@ struct nerdSHA256_context { }; /* Calculate midstate */ -IRAM_ATTR void nerd_mids(nerdSHA256_context* midstate, uint8_t* dataIn); +IRAM_ATTR void nerd_mids(nerdSHA256_context* midstate, const uint8_t* dataIn); -IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, uint8_t* dataIn, uint8_t* doubleHash); +IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, const uint8_t* dataIn, uint8_t* doubleHash); -IRAM_ATTR void nerd_sha256_bake(const uint32_t* digest, uint8_t* dataIn, uint32_t* bake); //15 words -IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, uint8_t* dataIn, const uint32_t* bake, uint8_t* doubleHash); +IRAM_ATTR void nerd_sha256_bake(const uint32_t* digest, const uint8_t* dataIn, uint32_t* bake); //15 words +IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, const uint8_t* dataIn, const uint32_t* bake, uint8_t* doubleHash); void ByteReverseWords(uint32_t* out, const uint32_t* in, uint32_t byteCount); diff --git a/src/mining.cpp b/src/mining.cpp index be4d1a8..d38c1ec 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -17,7 +17,9 @@ #include "mbedtls/sha256.h" //#define SHA256_VALIDATE +#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) #define HARDWARE_SHA265 +#endif #ifdef HARDWARE_SHA265 #include From 81c53224d7d30e1cbb0714ef69fcc5882a201ee2 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Mon, 27 Jan 2025 01:50:35 +0200 Subject: [PATCH 06/39] tiny performance increase stable 200KH/s --- src/NerdMinerV2.ino.cpp | 71 ++++++++++++++++++++++++++++----- src/ShaTests/nerdSHA256plus.cpp | 37 ++++++++++------- src/mining.cpp | 56 +++++++++++++++++++++----- 3 files changed, 129 insertions(+), 35 deletions(-) diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index a5013e6..466ab29 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -92,12 +92,51 @@ static uint8_t interResult_aligned[64] __attribute__((aligned(256))); static uint8_t midstate_aligned[32] __attribute__((aligned(256))); static uint8_t hash_aligned[64] __attribute__((aligned(256))); -IRAM_ATTR void nerd_sha_hal_wait_idle() +static inline void nerd_sha_hal_wait_idle() { while (sha_ll_busy()) {} } +static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) +{ + uint32_t *data_words = (uint32_t *)input_text; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + + REG_WRITE(®_addr_buf[0], data_words[0]); + REG_WRITE(®_addr_buf[1], data_words[1]); + REG_WRITE(®_addr_buf[2], data_words[2]); + REG_WRITE(®_addr_buf[3], data_words[3]); + REG_WRITE(®_addr_buf[4], data_words[4]); + REG_WRITE(®_addr_buf[5], data_words[5]); + REG_WRITE(®_addr_buf[6], data_words[6]); + REG_WRITE(®_addr_buf[7], data_words[7]); + REG_WRITE(®_addr_buf[8], data_words[8]); + REG_WRITE(®_addr_buf[9], data_words[9]); + REG_WRITE(®_addr_buf[10], data_words[10]); + REG_WRITE(®_addr_buf[11], data_words[11]); + REG_WRITE(®_addr_buf[12], data_words[12]); + REG_WRITE(®_addr_buf[13], data_words[13]); + REG_WRITE(®_addr_buf[14], data_words[14]); + REG_WRITE(®_addr_buf[15], data_words[15]); +} + +static inline void nerd_sha_ll_write_digest_sha256(void *digest_state) +{ + uint32_t *digest_state_words = (uint32_t *)digest_state; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_H_BASE); + + REG_WRITE(®_addr_buf[0], digest_state_words[0]); + REG_WRITE(®_addr_buf[1], digest_state_words[1]); + REG_WRITE(®_addr_buf[2], digest_state_words[2]); + REG_WRITE(®_addr_buf[3], digest_state_words[3]); + REG_WRITE(®_addr_buf[4], digest_state_words[4]); + REG_WRITE(®_addr_buf[5], digest_state_words[5]); + REG_WRITE(®_addr_buf[6], digest_state_words[6]); + REG_WRITE(®_addr_buf[7], digest_state_words[7]); + REG_WRITE(®_addr_buf[8], digest_state_words[8]); +} + IRAM_ATTR void HwShaTest() { uint8_t interResult[64]; @@ -116,7 +155,7 @@ IRAM_ATTR void HwShaTest() uint32_t bake[16]; uint32_t time_start = micros(); - const int test_count = 1000000; + int test_count = 1000000; #if 0 //Generic software 16KH/s @@ -146,7 +185,10 @@ IRAM_ATTR void HwShaTest() #endif #if 0 - //nerdSha256 bake (ESP32 41KH/s) + //nerdSha256 bake + //ESP32 : 41KH/s + //ESP32S3 : 42.32KH/s + test_count = 100000; nerdSHA256_context ctx; nerd_mids(&ctx, s_test_buffer); nerd_sha256_bake(ctx.digest, s_test_buffer+64, bake); //15 words @@ -232,7 +274,7 @@ IRAM_ATTR void HwShaTest() #endif #if 1 - //Hardware LL 161KH/s + //Hardware LL 162.43KH/s esp_sha_acquire_hardware(); //sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); sha_hal_wait_idle(); @@ -248,25 +290,32 @@ IRAM_ATTR void HwShaTest() { //sha_hal_write_digest(SHA2_256, midstate); sha_ll_write_digest(SHA2_256, midstate, 256 / 32); + //nerd_sha_ll_write_digest_sha256(midstate); //sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); - sha_hal_wait_idle(); - sha_ll_fill_text_block(s_test_buffer+64, 64/4); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); + //sha_ll_fill_text_block(s_test_buffer+64, 64/4); + nerd_sha_ll_fill_text_block_sha256(s_test_buffer+64); sha_ll_continue_block(SHA2_256); //sha_hal_read_digest(SHA2_256, interResult); sha_ll_load(SHA2_256); - sha_hal_wait_idle(); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); sha_ll_read_digest(SHA2_256, interResult, 256 / 32); //sha_hal_hash_block(SHA2_256, interResult, 64/4, true); - sha_hal_wait_idle(); - sha_ll_fill_text_block(interResult, 64/4); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); + //sha_ll_fill_text_block(interResult, 64/4); + nerd_sha_ll_fill_text_block_sha256(interResult); sha_ll_start_block(SHA2_256); //sha_hal_read_digest(SHA2_256, hash); sha_ll_load(SHA2_256); - sha_hal_wait_idle(); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); sha_ll_read_digest(SHA2_256, hash, 256 / 32); } esp_sha_release_hardware(); @@ -275,7 +324,7 @@ IRAM_ATTR void HwShaTest() uint32_t time_end = micros(); double hash_rate = ((double)test_count * 1000000) / (double)(time_end - time_start); Serial.print("DmaHashrate="); - Serial.print((int)hash_rate/1000); + Serial.print(hash_rate/1000); Serial.println("KH/s"); Serial.print("interResult: "); diff --git a/src/ShaTests/nerdSHA256plus.cpp b/src/ShaTests/nerdSHA256plus.cpp index 287c1e0..8997d83 100644 --- a/src/ShaTests/nerdSHA256plus.cpp +++ b/src/ShaTests/nerdSHA256plus.cpp @@ -24,6 +24,11 @@ #include #include +//#pragma GCC optimize ("O2") +//#pragma GCC optimize ("jump-tables") +//#pragma GCC optimize ("tree-switch-conversion") +//#pragma GCC optimize ("no-stack-check") + #define ROTR(x, n) ((x >> n) | (x << ((sizeof(x) << 3) - n))) #ifndef PUT_UINT32_BE @@ -438,8 +443,6 @@ IRAM_ATTR void nerd_sha256_bake(const uint32_t* digest, const uint8_t* dataIn, u IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, const uint8_t* dataIn, const uint32_t* bake, uint8_t* doubleHash) { uint32_t temp1, temp2; - uint8_t temp3, temp4; - uint32_t* buffer32; //*********** Init 1rst SHA *********** //W0 W1 W2 is same !! @@ -453,14 +456,6 @@ IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, const uint8_t* dataIn, uint32_t A[8] = { a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7] }; - union { - uint32_t num; - uint8_t b[4]; - } u; - uint8_t* p = NULL; - - uint8_t i; - //P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], W[0], K[0]); //P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], W[1], K[1]); //P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], W[2], K[2]); @@ -620,15 +615,25 @@ IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, const uint8_t* dataIn, P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(53), K[53]); P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(54), K[54]); P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(55), K[55]); - P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(56), K[56]); - P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(57), K[57]); + //Unroll 56 - worse performace + P(A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(56), K[56]); + + //Unroll 57 + //P(A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(57), K[57]); + //P(a, b, c, d, e, f, g, h, x, K) + uint32_t m1 = A[6] + S3(A[3]) + F1(A[3], A[4], A[5]) + K[57] + R(57); + //uint32_t m2 = S2(A[7]) + F0(A[7], A[0], A[1]); + A[2] += m1; + //A[6] = m1 + m2; + uint32_t d57_a1 = A[1]; + //Unroll 58 //P(A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(58), K[58]); //P(a, b, c, d, e, f, g, h, x, K) uint32_t z1 = A[5] + S3(A[2]) + F1(A[2], A[3], A[4]) + K[58] + R(58); //uint32_t z2 = S2(A[6]) + F0(A[6], A[7], A[0]); - uint32_t z0 = A[0]; + uint32_t d58_a0 = A[0]; A[1] += z1; //A[5] = z1 + z2; @@ -651,8 +656,12 @@ IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, const uint8_t* dataIn, return; } + //Post 57 + uint32_t m2 = S2(A[7]) + F0(A[7], d58_a0, d57_a1); + A[6] = m1 + m2; + //Post 58 - uint32_t z2 = S2(A[6]) + F0(A[6], A[7], z0); + uint32_t z2 = S2(A[6]) + F0(A[6], A[7], d58_a0); A[5] = z1 + z2; //Post 59 diff --git a/src/mining.cpp b/src/mining.cpp index d38c1ec..d358ea5 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -259,7 +259,37 @@ void runStratumWorker(void *name) { } +#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) +static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) +{ + uint32_t *data_words = (uint32_t *)input_text; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + REG_WRITE(®_addr_buf[0], data_words[0]); + REG_WRITE(®_addr_buf[1], data_words[1]); + REG_WRITE(®_addr_buf[2], data_words[2]); + REG_WRITE(®_addr_buf[3], data_words[3]); + REG_WRITE(®_addr_buf[4], data_words[4]); + REG_WRITE(®_addr_buf[5], data_words[5]); + REG_WRITE(®_addr_buf[6], data_words[6]); + REG_WRITE(®_addr_buf[7], data_words[7]); + REG_WRITE(®_addr_buf[8], data_words[8]); + REG_WRITE(®_addr_buf[9], data_words[9]); + REG_WRITE(®_addr_buf[10], data_words[10]); + REG_WRITE(®_addr_buf[11], data_words[11]); + REG_WRITE(®_addr_buf[12], data_words[12]); + REG_WRITE(®_addr_buf[13], data_words[13]); + REG_WRITE(®_addr_buf[14], data_words[14]); + REG_WRITE(®_addr_buf[15], data_words[15]); +} + +static inline void nerd_sha_hal_wait_idle() +{ + while (sha_ll_busy()) + {} +} + +#endif //////////////////THREAD CALLS/////////////////// //This works only with one thread, TODO -> Class or miner_data for each thread @@ -358,26 +388,34 @@ void runMiner(void * task_id) { if (miner_id == 0) { //Hardware + uint32_t nonce_start = nonce; esp_sha_acquire_hardware(); while (nonce < nonce_end) { - memcpy(header64+12, &nonce, 4); + //memcpy(header64+12, &nonce, 4); + ((uint32_t*)(header64+12))[0] = nonce; sha_ll_write_digest(SHA2_256, midstate, 256 / 32); - sha_hal_wait_idle(); - sha_ll_fill_text_block(header64, 64/4); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); + //sha_ll_fill_text_block(header64, 64/4); + nerd_sha_ll_fill_text_block_sha256(header64); sha_ll_continue_block(SHA2_256); sha_ll_load(SHA2_256); - sha_hal_wait_idle(); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); sha_ll_read_digest(SHA2_256, interResult, 256 / 32); - sha_hal_wait_idle(); - sha_ll_fill_text_block(interResult, 64/4); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); + //sha_ll_fill_text_block(interResult, 64/4); + nerd_sha_ll_fill_text_block_sha256(interResult); sha_ll_start_block(SHA2_256); sha_ll_load(SHA2_256); - sha_hal_wait_idle(); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); sha_ll_read_digest(SHA2_256, hash, 256 / 32); #ifdef SHA256_VALIDATE mbedtls_sha256_context ctx; @@ -413,14 +451,12 @@ void runMiner(void * task_id) { return; //Crash Here } #endif - - hashes++; nonce++; - if(hash[31] == 0 && hash[30] == 0) break; } esp_sha_release_hardware(); + hashes += nonce - nonce_start; } else #endif { From 589682d20907e62133841716452f56d326824154 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Mon, 27 Jan 2025 01:57:39 +0200 Subject: [PATCH 07/39] Other tiny optimization --- src/mining.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/mining.cpp b/src/mining.cpp index d358ea5..bc17bb7 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -395,7 +395,7 @@ void runMiner(void * task_id) { //memcpy(header64+12, &nonce, 4); ((uint32_t*)(header64+12))[0] = nonce; - sha_ll_write_digest(SHA2_256, midstate, 256 / 32); + sha_ll_write_digest(SHA2_256, midstate, 256 / 32); //no need to unroll //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); //sha_ll_fill_text_block(header64, 64/4); @@ -460,9 +460,11 @@ void runMiner(void * task_id) { } else #endif { + uint32_t nonce_start = nonce; while (nonce < nonce_end) { - memcpy(header64+12, &nonce, 4); + //memcpy(header64+12, &nonce, 4); + ((uint32_t*)(header64+12))[0] = nonce; //nerd_sha256d(&nerdMidstate, header64, hash); //Boosted 80Khs sha nerd_sha256d_baked(nerdMidstate.digest, header64, bake, hash); @@ -517,12 +519,11 @@ void runMiner(void * task_id) { return; //Crash Here } #endif - hashes++; nonce++; - if(hash[31] == 0 && hash[30] == 0) break; } + hashes += nonce - nonce_start; } /*Serial.print("hash1: "); From 04b200f171216c718b585084085ea300d4932105 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Wed, 29 Jan 2025 18:00:17 +0200 Subject: [PATCH 08/39] Another Job model --- src/NerdMinerV2.ino.cpp | 8 +- src/ShaTests/nerdSHA256plus.cpp | 19 +- src/ShaTests/nerdSHA256plus.h | 2 +- src/mining.cpp | 352 ++++++++++++++++++++++++++------ src/mining.h | 7 +- src/utils.cpp | 5 +- 6 files changed, 317 insertions(+), 76 deletions(-) diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index 466ab29..79ac35f 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -441,11 +441,15 @@ void setup() // Start mining tasks //BaseType_t res = xTaskCreate(runWorker, name, 35000, (void*)name, 1, NULL); TaskHandle_t minerTask1, minerTask2 = NULL; - xTaskCreate(runMiner, "Miner0", 6000, (void*)0, 1, &minerTask1); + xTaskCreate(minerWorkerSw, "Miner0", 6000, (void*)0, 1, &minerTask1); esp_task_wdt_add(minerTask1); #if (SOC_CPU_CORES_NUM >= 2) - xTaskCreate(runMiner, "Miner1", 6000, (void*)1, 1, &minerTask2); + #if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) + xTaskCreate(minerWorkerHw, "Miner1", 6000, (void*)1, 1, &minerTask2); + #else + xTaskCreate(minerWorkerSw, "Miner1", 6000, (void*)0, 1, &minerTask2); + #endif esp_task_wdt_add(minerTask2); #endif diff --git a/src/ShaTests/nerdSHA256plus.cpp b/src/ShaTests/nerdSHA256plus.cpp index 8997d83..e113c2d 100644 --- a/src/ShaTests/nerdSHA256plus.cpp +++ b/src/ShaTests/nerdSHA256plus.cpp @@ -106,7 +106,7 @@ void ByteReverseWords(uint32_t* out, const uint32_t* in, uint32_t byteCount) } -IRAM_ATTR void nerd_mids(nerdSHA256_context* midstate, const uint8_t* dataIn) +IRAM_ATTR void nerd_mids(uint32_t* digest, const uint8_t* dataIn) { uint32_t A[8] = { 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 }; @@ -195,15 +195,14 @@ IRAM_ATTR void nerd_mids(nerdSHA256_context* midstate, const uint8_t* dataIn) P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(62), K[62]); P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(63), K[63]); - midstate->digest[0] = 0x6A09E667 + A[0]; - midstate->digest[1] = 0xBB67AE85 + A[1]; - midstate->digest[2] = 0x3C6EF372 + A[2]; - midstate->digest[3] = 0xA54FF53A + A[3]; - midstate->digest[4] = 0x510E527F + A[4]; - midstate->digest[5] = 0x9B05688C + A[5]; - midstate->digest[6] = 0x1F83D9AB + A[6]; - midstate->digest[7] = 0x5BE0CD19 + A[7]; - + digest[0] = 0x6A09E667 + A[0]; + digest[1] = 0xBB67AE85 + A[1]; + digest[2] = 0x3C6EF372 + A[2]; + digest[3] = 0xA54FF53A + A[3]; + digest[4] = 0x510E527F + A[4]; + digest[5] = 0x9B05688C + A[5]; + digest[6] = 0x1F83D9AB + A[6]; + digest[7] = 0x5BE0CD19 + A[7]; } IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, const uint8_t* dataIn, uint8_t* doubleHash) diff --git a/src/ShaTests/nerdSHA256plus.h b/src/ShaTests/nerdSHA256plus.h index da53033..5cdce78 100644 --- a/src/ShaTests/nerdSHA256plus.h +++ b/src/ShaTests/nerdSHA256plus.h @@ -25,7 +25,7 @@ struct nerdSHA256_context { }; /* Calculate midstate */ -IRAM_ATTR void nerd_mids(nerdSHA256_context* midstate, const uint8_t* dataIn); +IRAM_ATTR void nerd_mids(uint32_t* digest, const uint8_t* dataIn); IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, const uint8_t* dataIn, uint8_t* doubleHash); diff --git a/src/mining.cpp b/src/mining.cpp index bc17bb7..8122a14 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -14,8 +14,14 @@ #include "drivers/displays/display.h" #include "drivers/storage/storage.h" #include +#include #include "mbedtls/sha256.h" +//10 Jobs per second +#define NONCE_PER_JOB_SW 4096 +#define NONCE_PER_JOB_HW 16*1024 + + //#define SHA256_VALIDATE #if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) #define HARDWARE_SHA265 @@ -39,13 +45,6 @@ uint64_t upTime = 0; volatile uint32_t shares; // increase if blockhash has 32 bits of zeroes volatile uint32_t valids; // increased if blockhash <= target -static std::mutex s_nonce_batch_mutex; -static volatile uint32_t s_nonce_batch = 0; - -static volatile uint8_t s_thread_busy[2] = {0, 0}; -static volatile uint32_t s_thread_task_id = 0; -static volatile uint32_t s_thread_task_aborted_id = 0; - // Track best diff double best_diff = 0.0; @@ -57,7 +56,6 @@ IPAddress serverIP(1, 1, 1, 1); //Temporally save poolIPaddres //Global work data static WiFiClient client; -static std::mutex s_client_mutex; static miner_data mMiner; //Global miner data (Create a miner class TODO) mining_subscribe mWorker; mining_job mJob; @@ -112,10 +110,7 @@ bool checkPoolInactivity(unsigned int keepAliveTime, unsigned long inactivityTim mLastTXtoPool = millis(); Serial.println(" Sending : KeepAlive suggest_difficulty"); //if (client.print("{}\n") == 0) { - { - std::lock_guard lock(s_client_mutex); - tx_suggest_difficulty(client, DEFAULT_DIFFICULTY); - } + tx_suggest_difficulty(client, DEFAULT_DIFFICULTY); /*if(tx_suggest_difficulty(client, DEFAULT_DIFFICULTY)){ Serial.println(" Sending keepAlive to pool -> Detected client disconnected"); return true; @@ -133,6 +128,47 @@ bool checkPoolInactivity(unsigned int keepAliveTime, unsigned long inactivityTim return false; } +struct JobRequest +{ + uint32_t id; + uint32_t nonce_start; + uint32_t nonce_count; + double difficulty; + uint8_t buffer_upper[64]; + uint32_t midstate[8]; + uint32_t bake[16]; +}; + +struct JobResult +{ + uint32_t id; + uint32_t nonce; + uint32_t nonce_count; + double difficulty; + uint8_t hash[32]; +}; + +static std::mutex s_job_mutex; +std::list> s_job_request_list_sw; +#ifdef HARDWARE_SHA265 +std::list> s_job_request_list_hw; +#endif +std::list> s_job_result_list; + +static void JobPush(std::list> &job_list, uint32_t id, uint32_t nonce_start, uint32_t nonce_count, double difficulty, + const uint8_t* buffer_upper, const uint32_t* midstate, const uint32_t* bake) +{ + std::shared_ptr job = std::make_shared(); + job->id = id; + job->nonce_start = nonce_start; + job->nonce_count = nonce_count; + job->difficulty = difficulty; + memcpy(job->buffer_upper, buffer_upper, sizeof(job->buffer_upper)); + memcpy(job->midstate, midstate, sizeof(job->midstate)); + memcpy(job->bake, bake, sizeof(job->bake)); + job_list.push_back(job); +} + void runStratumWorker(void *name) { // TEST: https://bitcoin.stackexchange.com/questions/22929/full-example-data-for-scrypt-stratum-client @@ -147,6 +183,8 @@ void runStratumWorker(void *name) { // connect to pool double currentPoolDifficulty = DEFAULT_DIFFICULTY; + uint32_t nonce_pool = 0; + uint32_t job_pool = 0; while(true) { @@ -165,10 +203,9 @@ void runStratumWorker(void *name) { vTaskDelay(((1 + rand() % 120) * 1000) / portTICK_PERIOD_MS); } - if(!isMinerSuscribed){ - + if(!isMinerSuscribed) + { //Stop miner current jobs - s_thread_task_aborted_id = s_thread_task_id; mWorker = init_mining_subscribe(); // STEP 1: Pool server connection (SUBSCRIBE) @@ -184,7 +221,7 @@ void runStratumWorker(void *name) { //tx_mining_auth2(client, mWorker.wName, mWorker.wPass); //Don't verifies authoritzation, TODO // STEP 3: Suggest pool difficulty - tx_suggest_difficulty(client, DEFAULT_DIFFICULTY); + tx_suggest_difficulty(client, currentPoolDifficulty); isMinerSuscribed=true; mLastTXtoPool = millis(); @@ -194,72 +231,190 @@ void runStratumWorker(void *name) { if(checkPoolInactivity(KEEPALIVE_TIME_ms, POOLINACTIVITY_TIME_ms)){ //Restart connection Serial.println(" Detected more than 2 min without data form stratum server. Closing socket and reopening..."); - { - std::lock_guard lock(s_client_mutex); - client.stop(); - } - s_thread_task_aborted_id = s_thread_task_id; + client.stop(); isMinerSuscribed=false; continue; } - //Read pending messages from pool - while(true) - { - String line; - { - std::lock_guard lock(s_client_mutex); - if (!client.connected() || !client.available()) - break; - line = client.readStringUntil('\n'); - } + uint32_t hw_midstate[8]; + uint32_t diget_mid[8]; + uint32_t bake[16]; - Serial.println(" Received message from pool"); + //Read pending messages from pool + while(client.connected() && client.available()) + { + String line = client.readStringUntil('\n'); + //Serial.println(" Received message from pool"); stratum_method result = parse_mining_method(line); switch (result) { case STRATUM_PARSE_ERROR: Serial.println(" Parsed JSON: error on JSON"); break; case MINING_NOTIFY: if(parse_mining_notify(line, mJob)) { + { + std::lock_guard lock(s_job_mutex); + s_job_request_list_sw.clear(); + #ifdef HARDWARE_SHA265 + s_job_request_list_hw.clear(); + #endif + } //Increse templates readed templates++; - //Stop miner current jobs - //mMiner.inRun = false; - s_thread_task_aborted_id = s_thread_task_id; - - #if (SOC_CPU_CORES_NUM >= 2) - while (s_thread_busy[0] || s_thread_busy[1]) { vTaskDelay(1 / portTICK_PERIOD_MS); } - #else - while (s_thread_busy[0]) { vTaskDelay(1 / portTICK_PERIOD_MS); } - #endif + job_pool++; //Prepare data for new jobs - mMiner=calculateMiningData(mWorker,mJob); - mMiner.poolDifficulty = currentPoolDifficulty; + mMiner=calculateMiningData(mWorker, mJob); + + memset(mMiner.bytearray_blockheader+80, 0, 128-80); + mMiner.bytearray_blockheader[80] = 0x80; + mMiner.bytearray_blockheader[126] = 0x02; + mMiner.bytearray_blockheader[127] = 0x80; + + nerd_mids(diget_mid, mMiner.bytearray_blockheader); + nerd_sha256_bake(diget_mid, mMiner.bytearray_blockheader+64, bake); + + #ifdef HARDWARE_SHA265 + esp_sha_acquire_hardware(); + sha_hal_hash_block(SHA2_256, mMiner.bytearray_blockheader, 64/4, true); + sha_hal_read_digest(SHA2_256, hw_midstate); + esp_sha_release_hardware(); + #endif + { - std::lock_guard lock(s_nonce_batch_mutex); - s_nonce_batch = TARGET_NONCE - MAX_NONCE; + std::lock_guard lock(s_job_mutex); + for (int i = 0; i < 4; ++ i) + { + JobPush( s_job_request_list_sw, job_pool, nonce_pool, NONCE_PER_JOB_SW, currentPoolDifficulty, mMiner.bytearray_blockheader+64, diget_mid, bake); + nonce_pool += NONCE_PER_JOB_SW; + #ifdef HARDWARE_SHA265 + JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, mMiner.bytearray_blockheader+64, hw_midstate, bake); + nonce_pool += NONCE_PER_JOB_HW; + #endif + } } - s_thread_task_id++; - //Give new job to miner } break; case MINING_SET_DIFFICULTY: parse_mining_set_difficulty(line, currentPoolDifficulty); - mMiner.poolDifficulty = currentPoolDifficulty; break; case STRATUM_SUCCESS: Serial.println(" Parsed JSON: Success"); break; default: Serial.println(" Parsed JSON: unknown"); break; } } + vTaskDelay(50 / portTICK_PERIOD_MS); //Small delay - vTaskDelay(500 / portTICK_PERIOD_MS); //Small delay - + std::list> job_result_list; + { + std::lock_guard lock(s_job_mutex); + job_result_list = s_job_result_list; + s_job_result_list.clear(); + + while (s_job_request_list_sw.size() < 4) + { + JobPush( s_job_request_list_sw, job_pool, nonce_pool, NONCE_PER_JOB_SW, currentPoolDifficulty, mMiner.bytearray_blockheader+64, diget_mid, bake); + nonce_pool += NONCE_PER_JOB_SW; + } + + #ifdef HARDWARE_SHA265 + while (s_job_request_list_hw.size() < 4) + { + JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, mMiner.bytearray_blockheader+64, hw_midstate, bake); + nonce_pool += NONCE_PER_JOB_HW; + } + #endif + } + + while (!job_result_list.empty()) + { + std::shared_ptr res = job_result_list.front(); + job_result_list.pop_front(); + + hashes += res->nonce_count; + if (res->difficulty > currentPoolDifficulty && job_pool == res->id) + { + tx_mining_submit(client, mWorker, mJob, res->nonce); + Serial.print(" - Current diff share: "); Serial.println(res->difficulty,12); + Serial.print(" - Current pool diff : "); Serial.println(currentPoolDifficulty,12); + Serial.print(" - TX SHARE: "); + for (size_t i = 0; i < 32; i++) + Serial.printf("%02x", res->hash[i]); + Serial.println(""); + mLastTXtoPool = millis(); + + if (res->difficulty > best_diff) + best_diff = res->difficulty; + + // check if 32bit share + if(res->hash[29] !=0 || res->hash[28] !=0) + shares++; + + // check if valid header + if(checkValid(res->hash, mMiner.bytearray_target)) + { + Serial.printf("CONGRATULATIONS! Valid block found with nonce: %d | 0x%x\n", res->nonce); + valids++; + } + } + } } - } -#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) +//////////////////THREAD CALLS/////////////////// + +void minerWorkerSw(void * task_id) +{ + unsigned int miner_id = (uint32_t)task_id; + Serial.printf("[MINER] %d Started minerWorkerSw Task!\n", miner_id); + + std::shared_ptr job; + std::shared_ptr result; + uint8_t hash[32]; + while (1) + { + { + std::lock_guard lock(s_job_mutex); + if (result) + { + s_job_result_list.push_back(result); + result.reset(); + } + if (!s_job_request_list_sw.empty()) + { + job = s_job_request_list_sw.front(); + s_job_request_list_sw.pop_front(); + } else + job.reset(); + } + if (job) + { + result = std::make_shared(); + result->difficulty = 0.0; + result->nonce = 0xFFFFFFFF; + result->id = job->id; + result->nonce_count = job->nonce_count; + for (uint32_t n = 0; n < job->nonce_count; ++n) + { + ((uint32_t*)(job->buffer_upper+12))[0] = job->nonce_start+n; + nerd_sha256d_baked(job->midstate, job->buffer_upper, job->bake, hash); + if(hash[31] == 0 && hash[30] == 0) + { + double diff_hash = diff_from_target(hash); + if (diff_hash > best_diff) + { + result->difficulty = diff_hash; + result->nonce = job->nonce_start+n; + memcpy(result->hash, hash, 32); + } + } + } + memcpy(result->hash, hash, sizeof(hash)); + } else + vTaskDelay(2 / portTICK_PERIOD_MS); + } +} + +#ifdef HARDWARE_SHA265 + static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) { uint32_t *data_words = (uint32_t *)input_text; @@ -289,12 +444,92 @@ static inline void nerd_sha_hal_wait_idle() {} } +void minerWorkerHw(void * task_id) +{ + unsigned int miner_id = (uint32_t)task_id; + Serial.printf("[MINER] %d Started minerWorkerHw Task!\n", miner_id); + + std::shared_ptr job; + std::shared_ptr result; + uint8_t interResult[64]; + uint8_t hash[32]; + + memset(interResult, 0, sizeof(interResult)); + interResult[32] = 0x80; + interResult[62] = 0x01; + interResult[63] = 0x00; + while (1) + { + { + std::lock_guard lock(s_job_mutex); + if (result) + { + s_job_result_list.push_back(result); + result.reset(); + } + if (!s_job_request_list_hw.empty()) + { + job = s_job_request_list_hw.front(); + s_job_request_list_hw.pop_front(); + } else + job.reset(); + } + if (job) + { + result = std::make_shared(); + result->id = job->id; + result->nonce = 0xFFFFFF; + result->nonce_count = job->nonce_count; + result->difficulty = 0.0; + + uint8_t* sha_buffer = job->buffer_upper; + esp_sha_acquire_hardware(); + for (uint32_t n = 0; n < job->nonce_count; ++n) + { + ((uint32_t*)(sha_buffer+12))[0] = job->nonce_start+n; + + sha_ll_write_digest(SHA2_256, job->midstate, 256 / 32); //no need to unroll + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); + //sha_ll_fill_text_block(header64, 64/4); + nerd_sha_ll_fill_text_block_sha256(sha_buffer); + sha_ll_continue_block(SHA2_256); + + sha_ll_load(SHA2_256); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); + sha_ll_read_digest(SHA2_256, interResult, 256 / 32); + + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); + //sha_ll_fill_text_block(interResult, 64/4); + nerd_sha_ll_fill_text_block_sha256(interResult); + sha_ll_start_block(SHA2_256); + + sha_ll_load(SHA2_256); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); + sha_ll_read_digest(SHA2_256, hash, 256 / 32); + + if(hash[31] == 0 && hash[30] == 0) + { + double diff_hash = diff_from_target(hash); + if (diff_hash > result->difficulty) + { + result->difficulty = diff_hash; + result->nonce = job->nonce_start+n; + memcpy(result->hash, hash, sizeof(hash)); + } + } + } + esp_sha_release_hardware(); + } else + vTaskDelay(2 / portTICK_PERIOD_MS); + } +} #endif -//////////////////THREAD CALLS/////////////////// -//This works only with one thread, TODO -> Class or miner_data for each thread - - +#if 0 void runMiner(void * task_id) { unsigned int miner_id = (uint32_t)task_id; @@ -641,6 +876,8 @@ void runMiner(void * task_id) { } //while (1) } +#endif + #define DELAY 100 #define REDRAW_EVERY 10 @@ -716,7 +953,8 @@ void runMonitor(void *name) if (elapsedKHs == 0) { Serial.printf(">>> [i] Miner: newJob>%s / inRun>%s) - Client: connected>%s / subscribed>%s / wificonnected>%s\n", - (s_thread_task_id != s_thread_task_aborted_id) ? "true" : "false", s_thread_busy[0] ? "true" : "false", + //(1) ? "true" : "false", 1 ? "true" : "false", + "true", "true", client.connected() ? "true" : "false", isMinerSuscribed ? "true" : "false", WiFi.status() == WL_CONNECTED ? "true" : "false"); } diff --git a/src/mining.h b/src/mining.h index 11b27cb..d7e3dd0 100644 --- a/src/mining.h +++ b/src/mining.h @@ -13,8 +13,13 @@ #define TARGET_BUFFER_SIZE 64 void runMonitor(void *name); + void runStratumWorker(void *name); void runMiner(void *name); + +void minerWorkerSw(void * task_id); +void minerWorkerHw(void * task_id); + String printLocalTime(void); void resetStat(); @@ -24,8 +29,6 @@ typedef struct{ uint8_t bytearray_pooltarget[32]; uint8_t merkle_result[32]; uint8_t bytearray_blockheader[128]; - uint8_t bytearray_blockheader2[80]; - double poolDifficulty; } miner_data; diff --git a/src/utils.cpp b/src/utils.cpp index 40699af..5943d54 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -171,10 +171,7 @@ void getNextExtranonce2(int extranonce2_size, char *extranonce2) { miner_data init_miner_data(void){ - miner_data newMinerData; - - newMinerData.poolDifficulty = DEFAULT_DIFFICULTY; - + miner_data newMinerData; return newMinerData; } From e4c839fcd320ba578746dfad461d1bbecf2ae824 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Wed, 29 Jan 2025 18:10:03 +0200 Subject: [PATCH 09/39] Fixes --- src/NerdMinerV2.ino.cpp | 12 ++++++------ src/mining.cpp | 4 ---- src/mining.h | 4 ++++ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index 79ac35f..0b6c8bd 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -441,15 +441,15 @@ void setup() // Start mining tasks //BaseType_t res = xTaskCreate(runWorker, name, 35000, (void*)name, 1, NULL); TaskHandle_t minerTask1, minerTask2 = NULL; - xTaskCreate(minerWorkerSw, "Miner0", 6000, (void*)0, 1, &minerTask1); + #ifdef HARDWARE_SHA265 + xTaskCreate(minerWorkerHw, "MinerHw-0", 6000, (void*)0, 1, &minerTask1); + #else + xTaskCreate(minerWorkerSw, "MinerSw-0", 6000, (void*)0, 1, &minerTask1); + #endif esp_task_wdt_add(minerTask1); #if (SOC_CPU_CORES_NUM >= 2) - #if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) - xTaskCreate(minerWorkerHw, "Miner1", 6000, (void*)1, 1, &minerTask2); - #else - xTaskCreate(minerWorkerSw, "Miner1", 6000, (void*)0, 1, &minerTask2); - #endif + xTaskCreate(minerWorkerSw, "MinerSw-1", 6000, (void*)1, 1, &minerTask2); esp_task_wdt_add(minerTask2); #endif diff --git a/src/mining.cpp b/src/mining.cpp index 8122a14..0dfba08 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -23,9 +23,6 @@ //#define SHA256_VALIDATE -#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) -#define HARDWARE_SHA265 -#endif #ifdef HARDWARE_SHA265 #include @@ -407,7 +404,6 @@ void minerWorkerSw(void * task_id) } } } - memcpy(result->hash, hash, sizeof(hash)); } else vTaskDelay(2 / portTICK_PERIOD_MS); } diff --git a/src/mining.h b/src/mining.h index d7e3dd0..7e424c1 100644 --- a/src/mining.h +++ b/src/mining.h @@ -10,6 +10,10 @@ #define KEEPALIVE_TIME_ms 30000 #define POOLINACTIVITY_TIME_ms 60000 +#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) +#define HARDWARE_SHA265 +#endif + #define TARGET_BUFFER_SIZE 64 void runMonitor(void *name); From fc9bc9eac19ea0f8ca70006e45886e69426b9e13 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Fri, 31 Jan 2025 12:36:14 +0200 Subject: [PATCH 10/39] Added i2c master, Another job thread distribution --- platformio.ini | 2 +- src/ShaTests/nerdSHA256plus.cpp | 2 +- src/i2c_master.cpp | 184 +++++++++++++ src/i2c_master.h | 9 + src/mining.cpp | 464 +++++++------------------------- src/monitor.cpp | 41 ++- src/utils.cpp | 10 +- 7 files changed, 334 insertions(+), 378 deletions(-) create mode 100644 src/i2c_master.cpp create mode 100644 src/i2c_master.h diff --git a/platformio.ini b/platformio.ini index babee0e..b8c1fa9 100644 --- a/platformio.ini +++ b/platformio.ini @@ -616,7 +616,7 @@ monitor_filters = # Commenting out 'board_build.arduino.memory_type' fixes missing 'sdkconfig.h' errors: ;board_build.arduino.memory_type = qio_opi monitor_speed = 115200 -upload_speed = 115200 +upload_speed = 921600 # 2 x 4.5MB app, 6.875MB SPIFFS board_build.partitions = huge_app.csv build_flags = diff --git a/src/ShaTests/nerdSHA256plus.cpp b/src/ShaTests/nerdSHA256plus.cpp index e113c2d..5e14e08 100644 --- a/src/ShaTests/nerdSHA256plus.cpp +++ b/src/ShaTests/nerdSHA256plus.cpp @@ -24,7 +24,7 @@ #include #include -//#pragma GCC optimize ("O2") +//#pragma GCC optimize ("Ofast") //#pragma GCC optimize ("jump-tables") //#pragma GCC optimize ("tree-switch-conversion") //#pragma GCC optimize ("no-stack-check") diff --git a/src/i2c_master.cpp b/src/i2c_master.cpp new file mode 100644 index 0000000..08a95d1 --- /dev/null +++ b/src/i2c_master.cpp @@ -0,0 +1,184 @@ +#include "i2c_master.h" +#include +#include + +#define I2C_MASTER_NUM_PORT 0 +#define PIN_I2C_SDA 21 +#define PIN_I2C_SCL 22 +#define I2C_MASTER_TX_BUF_LEN 1024 +#define I2C_MASTER_RX_BUF_LEN 1024 + +static i2c_config_t s_i2c_config; + +#define I2C_CMD_FEED 0xA1 +#define I2C_CMD_REQUEST_RESULT 0xA9 +#define I2C_CMD_SLAVE_RESULT 0xAA + +struct __attribute__((__packed__)) JobI2cRequest +{ + //84 bytes + uint8_t cmd; + uint8_t crc; + uint8_t id; + uint8_t nonce_start; + float difficulty; + uint8_t buffer[76]; +}; + +struct __attribute__((__packed__)) JobI2cResult +{ + //11 bytes + uint8_t cmd; + uint8_t crc; + uint8_t id; + uint32_t nonce; + uint32_t processed_nonce; +}; + +const uint8_t s_crc8_table[256] = +{ + 0x00, 0x31, 0x62, 0x53, 0xC4, 0xF5, 0xA6, 0x97, + 0xB9, 0x88, 0xDB, 0xEA, 0x7D, 0x4C, 0x1F, 0x2E, + 0x43, 0x72, 0x21, 0x10, 0x87, 0xB6, 0xE5, 0xD4, + 0xFA, 0xCB, 0x98, 0xA9, 0x3E, 0x0F, 0x5C, 0x6D, + 0x86, 0xB7, 0xE4, 0xD5, 0x42, 0x73, 0x20, 0x11, + 0x3F, 0x0E, 0x5D, 0x6C, 0xFB, 0xCA, 0x99, 0xA8, + 0xC5, 0xF4, 0xA7, 0x96, 0x01, 0x30, 0x63, 0x52, + 0x7C, 0x4D, 0x1E, 0x2F, 0xB8, 0x89, 0xDA, 0xEB, + 0x3D, 0x0C, 0x5F, 0x6E, 0xF9, 0xC8, 0x9B, 0xAA, + 0x84, 0xB5, 0xE6, 0xD7, 0x40, 0x71, 0x22, 0x13, + 0x7E, 0x4F, 0x1C, 0x2D, 0xBA, 0x8B, 0xD8, 0xE9, + 0xC7, 0xF6, 0xA5, 0x94, 0x03, 0x32, 0x61, 0x50, + 0xBB, 0x8A, 0xD9, 0xE8, 0x7F, 0x4E, 0x1D, 0x2C, + 0x02, 0x33, 0x60, 0x51, 0xC6, 0xF7, 0xA4, 0x95, + 0xF8, 0xC9, 0x9A, 0xAB, 0x3C, 0x0D, 0x5E, 0x6F, + 0x41, 0x70, 0x23, 0x12, 0x85, 0xB4, 0xE7, 0xD6, + 0x7A, 0x4B, 0x18, 0x29, 0xBE, 0x8F, 0xDC, 0xED, + 0xC3, 0xF2, 0xA1, 0x90, 0x07, 0x36, 0x65, 0x54, + 0x39, 0x08, 0x5B, 0x6A, 0xFD, 0xCC, 0x9F, 0xAE, + 0x80, 0xB1, 0xE2, 0xD3, 0x44, 0x75, 0x26, 0x17, + 0xFC, 0xCD, 0x9E, 0xAF, 0x38, 0x09, 0x5A, 0x6B, + 0x45, 0x74, 0x27, 0x16, 0x81, 0xB0, 0xE3, 0xD2, + 0xBF, 0x8E, 0xDD, 0xEC, 0x7B, 0x4A, 0x19, 0x28, + 0x06, 0x37, 0x64, 0x55, 0xC2, 0xF3, 0xA0, 0x91, + 0x47, 0x76, 0x25, 0x14, 0x83, 0xB2, 0xE1, 0xD0, + 0xFE, 0xCF, 0x9C, 0xAD, 0x3A, 0x0B, 0x58, 0x69, + 0x04, 0x35, 0x66, 0x57, 0xC0, 0xF1, 0xA2, 0x93, + 0xBD, 0x8C, 0xDF, 0xEE, 0x79, 0x48, 0x1B, 0x2A, + 0xC1, 0xF0, 0xA3, 0x92, 0x05, 0x34, 0x67, 0x56, + 0x78, 0x49, 0x1A, 0x2B, 0xBC, 0x8D, 0xDE, 0xEF, + 0x82, 0xB3, 0xE0, 0xD1, 0x46, 0x77, 0x24, 0x15, + 0x3B, 0x0A, 0x59, 0x68, 0xFF, 0xCE, 0x9D, 0xAC +}; + +static uint8_t CommandCrc8(const void* data, size_t len) +{ + const uint8_t* ptr = (const uint8_t*)data; + uint8_t crc = 0xFF; + crc = s_crc8_table[crc ^ ptr[0]]; + for (size_t n = 2; n < len; ++n) + crc = s_crc8_table[crc ^ ptr[n]]; + return crc; +} + +int i2c_master_start() +{ + memset(&s_i2c_config, 0, sizeof(s_i2c_config)); + s_i2c_config.mode = I2C_MODE_MASTER; + s_i2c_config.sda_io_num = PIN_I2C_SDA; + s_i2c_config.scl_io_num = PIN_I2C_SCL; + s_i2c_config.sda_pullup_en = GPIO_PULLUP_ENABLE; + s_i2c_config.scl_pullup_en = GPIO_PULLUP_ENABLE; + s_i2c_config.master.clk_speed = 50000; + + esp_err_t err = i2c_param_config(I2C_MASTER_NUM_PORT, &s_i2c_config); + if (err != ESP_OK) + return err; + + return i2c_driver_install(I2C_MASTER_NUM_PORT, s_i2c_config.mode, I2C_MASTER_TX_BUF_LEN, I2C_MASTER_RX_BUF_LEN, 0); +} + +std::vector i2c_master_scan(uint8_t start, uint8_t end) +{ + std::vector vec; + for (int addr = start; addr < end; ++addr) + { + i2c_cmd_handle_t cmd = i2c_cmd_link_create(); + i2c_master_start(cmd); + i2c_master_write_byte(cmd, (addr << 1) | I2C_MASTER_WRITE, true); + //i2c_master_write(cmd, data_wr, size, true); + i2c_master_stop(cmd); + esp_err_t ret = i2c_master_cmd_begin(I2C_MASTER_NUM_PORT, cmd, 50 / portTICK_RATE_MS); + i2c_cmd_link_delete(cmd); + if (ret == ESP_OK) + vec.push_back(addr); + } + return vec; +} + +void i2c_feed_slaves(const std::vector& slaves, uint8_t id, uint8_t nonce_start, float difficulty, const uint8_t* buffer) +{ + JobI2cRequest request; + request.cmd = I2C_CMD_FEED; + request.id = id; + request.difficulty = difficulty; + memcpy(request.buffer, buffer, sizeof(request.buffer)); + + for (size_t n = 0; n < slaves.size(); ++n) + { + request.nonce_start = nonce_start; + nonce_start += 0x10; + request.crc = CommandCrc8(&request, sizeof(request)); + + i2c_cmd_handle_t cmd = i2c_cmd_link_create(); + i2c_master_start(cmd); + i2c_master_write_byte(cmd, (slaves[n] << 1) | I2C_MASTER_WRITE, true); + i2c_master_write(cmd, (const uint8_t*)&request, sizeof(request), true); + i2c_master_stop(cmd); + i2c_master_cmd_begin(I2C_MASTER_NUM_PORT, cmd, 5 / portTICK_RATE_MS); + i2c_cmd_link_delete(cmd); + } +} + + +void i2c_hit_slaves(const std::vector& slaves) +{ + uint8_t request[2]; + request[0] = I2C_CMD_REQUEST_RESULT; + request[1] = CommandCrc8(request, 2); + for (size_t n = 0; n < slaves.size(); ++n) + { + i2c_cmd_handle_t cmd = i2c_cmd_link_create(); + i2c_master_start(cmd); + i2c_master_write_byte(cmd, (slaves[n] << 1) | I2C_MASTER_WRITE, true); + i2c_master_write(cmd, request, sizeof(request), true); + i2c_master_stop(cmd); + i2c_master_cmd_begin(I2C_MASTER_NUM_PORT, cmd, 5 / portTICK_RATE_MS); + i2c_cmd_link_delete(cmd); + } +} + +std::vector i2c_harvest_slaves(const std::vector& slaves, uint8_t id, uint32_t &total_procesed_nonce) +{ + std::vector nonce_vector; + JobI2cResult result; + for (size_t n = 0; n < slaves.size(); ++n) + { + i2c_cmd_handle_t cmd = i2c_cmd_link_create(); + i2c_master_start(cmd); + i2c_master_write_byte(cmd, (slaves[n] << 1) | I2C_MASTER_READ, true); + i2c_master_read(cmd, (uint8_t*)&result, sizeof(result), I2C_MASTER_LAST_NACK); + i2c_master_stop(cmd); + i2c_master_cmd_begin(I2C_MASTER_NUM_PORT, cmd, 5 / portTICK_RATE_MS); + i2c_cmd_link_delete(cmd); + + uint8_t crc = CommandCrc8(&result, sizeof(result)); + + if (crc != result.crc) + continue; + if (result.nonce != 0xFFFFFFFF) + nonce_vector.push_back(result.nonce); + total_procesed_nonce += result.processed_nonce; + } + return nonce_vector; +} diff --git a/src/i2c_master.h b/src/i2c_master.h new file mode 100644 index 0000000..8ea73e4 --- /dev/null +++ b/src/i2c_master.h @@ -0,0 +1,9 @@ +#include +#include +#pragma once + +int i2c_master_start(); +std::vector i2c_master_scan(uint8_t start, uint8_t end); +void i2c_feed_slaves(const std::vector& slaves, uint8_t id, uint8_t nonce_start, float difficulty, const uint8_t* buffer); +void i2c_hit_slaves(const std::vector& slaves); +std::vector i2c_harvest_slaves(const std::vector& slaves, uint8_t id, uint32_t &total_procesed_nonce); \ No newline at end of file diff --git a/src/mining.cpp b/src/mining.cpp index 0dfba08..822b79b 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -16,6 +16,7 @@ #include #include #include "mbedtls/sha256.h" +#include "i2c_master.h" //10 Jobs per second #define NONCE_PER_JOB_SW 4096 @@ -43,7 +44,7 @@ volatile uint32_t shares; // increase if blockhash has 32 bits of zeroes volatile uint32_t valids; // increased if blockhash <= target // Track best diff -double best_diff = 0.0; +double s_best_diff = 0.0; // Variables to hold data from custom textboxes //Track mining stats in non volatile memory @@ -177,11 +178,22 @@ void runStratumWorker(void *name) { Serial.printf("### [Total Heap / Free heap / Min free heap]: %d / %d / %d \n", ESP.getHeapSize(), ESP.getFreeHeap(), ESP.getMinFreeHeap()); #endif - // connect to pool - + std::vector i2c_slave_vector; + + //scan for i2c slaves + if (i2c_master_start() == 0) + i2c_slave_vector = i2c_master_scan(0x0, 0x80); + + Serial.printf("Found %d slave workers\n", i2c_slave_vector.size()); + Serial.print(" Workers: "); + for (size_t n = 0; n < i2c_slave_vector.size(); ++n) + Serial.printf("0x%02X,", (uint32_t)i2c_slave_vector[n]); + Serial.println(""); + + // connect to pool double currentPoolDifficulty = DEFAULT_DIFFICULTY; uint32_t nonce_pool = 0; - uint32_t job_pool = 0; + uint32_t job_pool = 0xFFFFFFFF; while(true) { @@ -259,6 +271,10 @@ void runStratumWorker(void *name) { templates++; job_pool++; + uint32_t mh = hashes/1000000; + Mhashes += mh; + hashes -= mh*1000000; + //Prepare data for new jobs mMiner=calculateMiningData(mWorker, mJob); @@ -277,6 +293,8 @@ void runStratumWorker(void *name) { esp_sha_release_hardware(); #endif + nonce_pool = 0x10000000; + { std::lock_guard lock(s_job_mutex); for (int i = 0; i < 4; ++ i) @@ -289,6 +307,9 @@ void runStratumWorker(void *name) { #endif } } + //Nonce for nonce_pool starts from 0x10000000 + //For i2c slave we give nonces from 0x20000000, that is 0x10000000 nonces per slave + i2c_feed_slaves(i2c_slave_vector, job_pool & 0xFF, 0x20, currentPoolDifficulty, mMiner.bytearray_blockheader); } break; case MINING_SET_DIFFICULTY: parse_mining_set_difficulty(line, currentPoolDifficulty); @@ -298,12 +319,47 @@ void runStratumWorker(void *name) { } } - vTaskDelay(50 / portTICK_PERIOD_MS); //Small delay std::list> job_result_list; + if (i2c_slave_vector.empty() || job_pool == 0xFFFFFFFF) + { + vTaskDelay(50 / portTICK_PERIOD_MS); //Small delay + } else + { + uint32_t time_start = millis(); + i2c_hit_slaves(i2c_slave_vector); + vTaskDelay(5 / portTICK_PERIOD_MS); + uint32_t nonces_done = 0; + std::vector nonce_vector = i2c_harvest_slaves(i2c_slave_vector, job_pool & 0xFF, nonces_done); + hashes += nonces_done; + for (size_t n = 0; n < nonce_vector.size(); ++n) + { + std::shared_ptr result = std::make_shared(); + ((uint32_t*)(mMiner.bytearray_blockheader+64+12))[0] = nonce_vector[n]; + nerd_sha256d_baked(diget_mid, mMiner.bytearray_blockheader+64, bake, result->hash); + result->id = job_pool; + result->nonce = nonce_vector[n]; + result->nonce_count = 0; + result->difficulty = diff_from_target(result->hash); + job_result_list.push_back(result); + } + uint32_t time_end = millis(); + //if (nonces_done > 16384) + //Serial.printf("Harvest slaves in %dms hashes=%d\n", time_end - time_start, nonces_done); + if (time_end > time_start) + { + uint32_t elapsed = time_end - time_start; + if (elapsed < 50) + vTaskDelay((50 - elapsed) / portTICK_PERIOD_MS); + } else + vTaskDelay(40 / portTICK_PERIOD_MS); + } + + + if (job_pool != 0xFFFFFFFF) { std::lock_guard lock(s_job_mutex); - job_result_list = s_job_result_list; + job_result_list.insert(job_result_list.end(), s_job_result_list.begin(), s_job_result_list.end()); s_job_result_list.clear(); while (s_job_request_list_sw.size() < 4) @@ -327,7 +383,7 @@ void runStratumWorker(void *name) { job_result_list.pop_front(); hashes += res->nonce_count; - if (res->difficulty > currentPoolDifficulty && job_pool == res->id) + if (res->difficulty > currentPoolDifficulty && job_pool == res->id && res->nonce != 0xFFFFFFFF) { tx_mining_submit(client, mWorker, mJob, res->nonce); Serial.print(" - Current diff share: "); Serial.println(res->difficulty,12); @@ -338,8 +394,8 @@ void runStratumWorker(void *name) { Serial.println(""); mLastTXtoPool = millis(); - if (res->difficulty > best_diff) - best_diff = res->difficulty; + if (res->difficulty > s_best_diff) + s_best_diff = res->difficulty; // check if 32bit share if(res->hash[29] !=0 || res->hash[28] !=0) @@ -366,6 +422,7 @@ void minerWorkerSw(void * task_id) std::shared_ptr job; std::shared_ptr result; uint8_t hash[32]; + uint32_t wdt_counter = 0; while (1) { { @@ -385,7 +442,7 @@ void minerWorkerSw(void * task_id) if (job) { result = std::make_shared(); - result->difficulty = 0.0; + result->difficulty = job->difficulty; result->nonce = 0xFFFFFFFF; result->id = job->id; result->nonce_count = job->nonce_count; @@ -396,7 +453,7 @@ void minerWorkerSw(void * task_id) if(hash[31] == 0 && hash[30] == 0) { double diff_hash = diff_from_target(hash); - if (diff_hash > best_diff) + if (diff_hash > result->difficulty) { result->difficulty = diff_hash; result->nonce = job->nonce_start+n; @@ -406,6 +463,13 @@ void minerWorkerSw(void * task_id) } } else vTaskDelay(2 / portTICK_PERIOD_MS); + + wdt_counter++; + if (wdt_counter >= 8) + { + wdt_counter = 0; + esp_task_wdt_reset(); + } } } @@ -450,6 +514,8 @@ void minerWorkerHw(void * task_id) uint8_t interResult[64]; uint8_t hash[32]; + uint32_t wdt_counter = 0; + memset(interResult, 0, sizeof(interResult)); interResult[32] = 0x80; interResult[62] = 0x01; @@ -474,9 +540,9 @@ void minerWorkerHw(void * task_id) { result = std::make_shared(); result->id = job->id; - result->nonce = 0xFFFFFF; + result->nonce = 0xFFFFFFFF; result->nonce_count = job->nonce_count; - result->difficulty = 0.0; + result->difficulty = job->difficulty; uint8_t* sha_buffer = job->buffer_upper; esp_sha_acquire_hardware(); @@ -521,358 +587,17 @@ void minerWorkerHw(void * task_id) esp_sha_release_hardware(); } else vTaskDelay(2 / portTICK_PERIOD_MS); + + wdt_counter++; + if (wdt_counter >= 8) + { + wdt_counter = 0; + esp_task_wdt_reset(); + } } } #endif -#if 0 -void runMiner(void * task_id) { - - unsigned int miner_id = (uint32_t)task_id; - - Serial.printf("[MINER] %d Started runMiner Task!\n", miner_id); - - uint32_t task_current_id = 0; - - while(1) - { - //Wait new job - s_thread_busy[miner_id] = 0; - while (task_current_id == s_thread_task_id) - vTaskDelay(1 / portTICK_PERIOD_MS); //Small delay to join both mining threads - - task_current_id = s_thread_task_id; - s_thread_busy[miner_id] = 1; - - Serial.printf("[MINER] %d Task=%d\n", miner_id, task_current_id); - - mMonitor.NerdStatus = NM_hashing; - - //Prepare Premining data - nerdSHA256_context nerdMidstate; //NerdShaplus - uint8_t hash[32]; - uint8_t interResult[64]; - uint8_t hash_validate[32]; - uint8_t midstate[32]; - uint32_t bake[16]; - - - unsigned char *header64; - //Calcular midstate - if (miner_id == 0) - { - #ifdef HARDWARE_SHA265 - esp_sha_acquire_hardware(); - sha_hal_hash_block(SHA2_256, mMiner.bytearray_blockheader, 64/4, true); - sha_hal_read_digest(SHA2_256, midstate); - esp_sha_release_hardware(); - - memset(mMiner.bytearray_blockheader+80, 0, 128-80); - mMiner.bytearray_blockheader[80] = 0x80; - mMiner.bytearray_blockheader[126] = 0x02; - mMiner.bytearray_blockheader[127] = 0x80; - - memset(interResult, 0, sizeof(interResult)); - interResult[32] = 0x80; - interResult[62] = 0x01; - interResult[63] = 0x00; - - #else - nerd_mids(&nerdMidstate, mMiner.bytearray_blockheader); //NerdShaplus - #endif - header64 = mMiner.bytearray_blockheader + 64; - } else - { - memcpy(mMiner.bytearray_blockheader2, &mMiner.bytearray_blockheader, 80); - nerd_mids(&nerdMidstate, mMiner.bytearray_blockheader2); //NerdShaplus - header64 = mMiner.bytearray_blockheader2 + 64; - nerd_sha256_bake(nerdMidstate.digest, header64, bake); - } - - uint32_t nonce = 0; - uint32_t nonce_end = 0; - uint32_t startT = micros(); - - // each miner thread needs to track its own blockheader template - uint8_t temp; - - bool is16BitShare=true; - Serial.println(">>> STARTING TO HASH NONCES"); - while(true) - { - if (nonce >= nonce_end) - { - std::lock_guard lock(s_nonce_batch_mutex); - nonce = s_nonce_batch; -#ifdef HARDWARE_SHA265 - if (miner_id == 0) - nonce_end = nonce + 512; - else -#endif - nonce_end = nonce + 128; - if (nonce_end > TARGET_NONCE) - nonce_end = TARGET_NONCE; - s_nonce_batch = nonce_end; - } - -#ifdef HARDWARE_SHA265 - if (miner_id == 0) - { - //Hardware - uint32_t nonce_start = nonce; - esp_sha_acquire_hardware(); - while (nonce < nonce_end) - { - //memcpy(header64+12, &nonce, 4); - ((uint32_t*)(header64+12))[0] = nonce; - - sha_ll_write_digest(SHA2_256, midstate, 256 / 32); //no need to unroll - //sha_hal_wait_idle(); - nerd_sha_hal_wait_idle(); - //sha_ll_fill_text_block(header64, 64/4); - nerd_sha_ll_fill_text_block_sha256(header64); - sha_ll_continue_block(SHA2_256); - - sha_ll_load(SHA2_256); - //sha_hal_wait_idle(); - nerd_sha_hal_wait_idle(); - sha_ll_read_digest(SHA2_256, interResult, 256 / 32); - - //sha_hal_wait_idle(); - nerd_sha_hal_wait_idle(); - //sha_ll_fill_text_block(interResult, 64/4); - nerd_sha_ll_fill_text_block_sha256(interResult); - sha_ll_start_block(SHA2_256); - - sha_ll_load(SHA2_256); - //sha_hal_wait_idle(); - nerd_sha_hal_wait_idle(); - sha_ll_read_digest(SHA2_256, hash, 256 / 32); - #ifdef SHA256_VALIDATE - mbedtls_sha256_context ctx; - mbedtls_sha256_init(&ctx); - mbedtls_sha256_starts_ret(&ctx,0); - mbedtls_sha256_update_ret(&ctx, header64-64, 80); - mbedtls_sha256_finish_ret(&ctx, interResult); - - mbedtls_sha256_starts_ret(&ctx,0); - mbedtls_sha256_update_ret(&ctx, interResult, 32); - mbedtls_sha256_finish_ret(&ctx, hash_validate); - mbedtls_sha256_free(&ctx); - - bool failed = false; - for (size_t i = 0; i < 32; i++) - { - if (hash[i] != hash_validate[i]) - failed = true; - } - if (failed) - { - Serial.println("Hardware SHA256 Failed"); - Serial.println("HwSha256:"); - for (size_t i = 0; i < 32; i++) - Serial.printf("%02x,", hash[i]); - Serial.println(""); - - Serial.println("mbedtls Sha256:"); - for (size_t i = 0; i < 32; i++) - Serial.printf("%02x,", hash_validate[i]); - Serial.println(""); - vTaskDelay(500 / portTICK_PERIOD_MS); - return; //Crash Here - } - #endif - nonce++; - if(hash[31] == 0 && hash[30] == 0) - break; - } - esp_sha_release_hardware(); - hashes += nonce - nonce_start; - } else -#endif - { - uint32_t nonce_start = nonce; - while (nonce < nonce_end) - { - //memcpy(header64+12, &nonce, 4); - ((uint32_t*)(header64+12))[0] = nonce; - //nerd_sha256d(&nerdMidstate, header64, hash); //Boosted 80Khs sha - nerd_sha256d_baked(nerdMidstate.digest, header64, bake, hash); - - #ifdef SHA256_VALIDATE - //Important - Remove Return optimization - mbedtls_sha256_context ctx; - mbedtls_sha256_init(&ctx); - mbedtls_sha256_starts_ret(&ctx,0); - mbedtls_sha256_update_ret(&ctx, header64-64, 80); - mbedtls_sha256_finish_ret(&ctx, interResult); - - mbedtls_sha256_starts_ret(&ctx,0); - mbedtls_sha256_update_ret(&ctx, interResult, 32); - mbedtls_sha256_finish_ret(&ctx, hash_validate); - mbedtls_sha256_free(&ctx); - - bool failed = false; - for (size_t i = 0; i < 32; i++) - { - if (hash[i] != hash_validate[i]) - failed = true; - } - if (failed) - { - Serial.println("SHA256 Failed"); - Serial.println("Input:"); - for (size_t i = 0; i < 80; i++) - { - Serial.printf("0x%02x,", (header64-64)[i]); - if (i % 16 == 15) - Serial.println(""); - } - Serial.println(""); - - Serial.println("Midstate:"); - for (size_t i = 0; i < 8; i++) - { - Serial.printf("0x%08x,", nerdMidstate.digest[i]); - Serial.println(""); - } - - Serial.println("NerdSha256:"); - for (size_t i = 0; i < 32; i++) - Serial.printf("%02x,", hash[i]); - Serial.println(""); - - Serial.println("mbedtls Sha256:"); - for (size_t i = 0; i < 32; i++) - Serial.printf("%02x,", hash_validate[i]); - Serial.println(""); - vTaskDelay(500 / portTICK_PERIOD_MS); - return; //Crash Here - } - #endif - nonce++; - if(hash[31] == 0 && hash[30] == 0) - break; - } - hashes += nonce - nonce_start; - } - - /*Serial.print("hash1: "); - for (size_t i = 0; i < 32; i++) - Serial.printf("%02x", hash[i]); - Serial.println(""); - Serial.print("hash2: "); - for (size_t i = 0; i < 32; i++) - Serial.printf("%02x", hash2[i]); - Serial.println(""); */ - - if (nonce >= TARGET_NONCE) - break; //exit - - - if(task_current_id <= s_thread_task_aborted_id) - { - Serial.printf("MINER %d WORK ABORTED Task=%d Abort=%d\n", miner_id, task_current_id, s_thread_task_aborted_id); - break; - } - - // check if 16bit share - if(hash[31] !=0 || hash[30] !=0) - continue; - -#if 0 - if (miner_id == 1) - { - //validate - mbedtls_sha256_context ctx; - mbedtls_sha256_init(&ctx); - mbedtls_sha256_starts_ret(&ctx,0); - mbedtls_sha256_update_ret(&ctx, header64-64, 80); - mbedtls_sha256_finish_ret(&ctx, interResult); - - mbedtls_sha256_starts_ret(&ctx,0); - mbedtls_sha256_update_ret(&ctx, interResult, 32); - mbedtls_sha256_finish_ret(&ctx, hash_validate); - mbedtls_sha256_free(&ctx); - - bool failed = false; - for (size_t i = 0; i < 32; i++) - { - if (hash[i] != hash_validate[i]) - failed = true; - } - if (failed) - Serial.printf("MINER %d Sha256 Fail\n", miner_id); - else - Serial.printf("MINER %d Sha256 Good\n", miner_id); - } -#endif - //Check target to submit - //Difficulty of 1 > 0x00000000FFFF0000000000000000000000000000000000000000000000000000 - //NM2 pool diff 1e-9 > Target = diff_1 / diff_pool > 0x00003B9ACA00....00 - //Swapping diff bytes little endian >>>>>>>>>>>>>>>> 0x0000DC59D300....00 - //if((hash[29] <= 0xDC) && (hash[28] <= 0x59)) //0x00003B9ACA00 > diff value for 1e-9 - double diff_hash = diff_from_target(hash); - - // update best diff - if (diff_hash > best_diff) - best_diff = diff_hash; - - if(diff_hash > mMiner.poolDifficulty)//(hash[29] <= 0x3B)//(diff_hash > 1e-9) - { - { - std::lock_guard lock(s_client_mutex); - tx_mining_submit(client, mWorker, mJob, nonce-1); - } - Serial.print(" - Current diff share: "); Serial.println(diff_hash,12); - Serial.print(" - Current pool diff : "); Serial.println(mMiner.poolDifficulty,12); - Serial.print(" - TX SHARE: "); - for (size_t i = 0; i < 32; i++) - Serial.printf("%02x", hash[i]); - #ifdef DEBUG_MINING - Serial.println(""); - Serial.print(" - Current nonce: "); Serial.println(nonce); - Serial.print(" - Current block header: "); - for (size_t i = 0; i < 80; i++) { - Serial.printf("%02x", mMiner.bytearray_blockheader[i]); - } - #endif - Serial.println(""); - mLastTXtoPool = millis(); - } - - // check if 32bit share - if(hash[29] !=0 || hash[28] !=0) - continue; - shares++; - - // check if valid header - if(checkValid(hash, mMiner.bytearray_target)){ - Serial.printf("[WORKER] %d CONGRATULATIONS! Valid block found with nonce: %d | 0x%x\n", miner_id, nonce, nonce); - valids++; - Serial.printf("[WORKER] %d Submitted work valid!\n", miner_id); - // wait for new job - break; - } - } // exit if found a valid result or nonce > MAX_NONCE - - //wc_Sha256Free(&sha256); - //wc_Sha256Free(midstate); - Serial.print(">>> Finished job waiting new data from pool"); - - if(hashes>=MAX_NONCE_STEP) - { - Mhashes=Mhashes+MAX_NONCE_STEP/1000000; - hashes=hashes-MAX_NONCE_STEP; - } - - uint32_t duration = micros() - startT; - if (esp_task_wdt_reset() == ESP_OK) - Serial.print(">>> Resetting watchdog timer"); - } //while (1) -} - -#endif #define DELAY 100 #define REDRAW_EVERY 10 @@ -888,7 +613,7 @@ void restoreStat() { ret = nvs_open("state", NVS_READWRITE, &stat_handle); size_t required_size = sizeof(double); - nvs_get_blob(stat_handle, "best_diff", &best_diff, &required_size); + nvs_get_blob(stat_handle, "best_diff", &s_best_diff, &required_size); nvs_get_u32(stat_handle, "Mhashes", &Mhashes); uint32_t nv_shares, nv_valids; nvs_get_u32(stat_handle, "shares", &nv_shares); @@ -902,7 +627,7 @@ void restoreStat() { void saveStat() { if(!Settings.saveStats) return; Serial.printf("[MONITOR] Saving stats\n"); - nvs_set_blob(stat_handle, "best_diff", &best_diff, sizeof(double)); + nvs_set_blob(stat_handle, "best_diff", &s_best_diff, sizeof(double)); nvs_set_u32(stat_handle, "Mhashes", Mhashes); nvs_set_u32(stat_handle, "shares", shares); nvs_set_u32(stat_handle, "valids", valids); @@ -913,7 +638,7 @@ void saveStat() { void resetStat() { Serial.printf("[MONITOR] Resetting NVS stats\n"); templates = hashes = Mhashes = totalKHashes = elapsedKHs = upTime = shares = valids = 0; - best_diff = 0.0; + s_best_diff = 0.0; saveStat(); } @@ -931,14 +656,17 @@ void runMonitor(void *name) uint32_t seconds_elapsed = 0; - totalKHashes = (Mhashes * 1000) + hashes / 1000;; + totalKHashes = (Mhashes * 1000) + hashes / 1000; + uint32_t last_update_millis = millis(); while (1) { - if ((frame % REDRAW_EVERY) == 0) + uint32_t now_millis = millis(); + if (now_millis < last_update_millis || now_millis >= last_update_millis + 990) { - unsigned long mElapsed = millis() - mLastCheck; - mLastCheck = millis(); + unsigned long mElapsed = now_millis - mLastCheck; + mLastCheck = now_millis; + last_update_millis = now_millis; unsigned long currentKHashes = (Mhashes * 1000) + hashes / 1000; elapsedKHs = currentKHashes - totalKHashes; totalKHashes = currentKHashes; diff --git a/src/monitor.cpp b/src/monitor.cpp index fd4d34c..a1081b0 100644 --- a/src/monitor.cpp +++ b/src/monitor.cpp @@ -20,7 +20,7 @@ extern uint64_t upTime; extern uint32_t shares; // increase if blockhash has 32 bits of zeroes extern uint32_t valids; // increased if blockhash <= targethalfshares -extern double best_diff; // track best diff +extern double s_best_diff; // track best diff extern monitor_data mMonitor; @@ -235,9 +235,44 @@ String getTime(void){ return LocalHour; } +enum EHashRateScale +{ + HashRateScale_99KH, + HashRateScale_999KH, + HashRateScale_9MH +}; + +static EHashRateScale s_hashrate_scale = HashRateScale_99KH; +static uint32_t s_skip_first = 3; +static double s_top_hashrate = 0.0; + String getCurrentHashRate(unsigned long mElapsed) { - return String((1.0 * (elapsedKHs * 1000)) / mElapsed, 2); + double hashrate = (double)elapsedKHs * 1000.0 / (double)mElapsed; + if (s_skip_first > 0) + { + s_skip_first--; + } else + { + if (hashrate > s_top_hashrate) + { + s_top_hashrate = hashrate; + if (hashrate > 999.9) + s_hashrate_scale = HashRateScale_9MH; + else if (hashrate > 99.9) + s_hashrate_scale = HashRateScale_999KH; + } + } + + switch (s_hashrate_scale) + { + case HashRateScale_99KH: + return String(hashrate, 2); + case HashRateScale_999KH: + return String(hashrate, 1); + default: + return String((int)hashrate ); + } } mining_data getMiningData(unsigned long mElapsed) @@ -245,7 +280,7 @@ mining_data getMiningData(unsigned long mElapsed) mining_data data; char best_diff_string[16] = {0}; - suffix_string(best_diff, best_diff_string, 16, 0); + suffix_string(s_best_diff, best_diff_string, 16, 0); char timeMining[15] = {0}; uint64_t secElapsed = upTime + (esp_timer_get_time() / 1000000); diff --git a/src/utils.cpp b/src/utils.cpp index 5943d54..86156bd 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -79,19 +79,19 @@ static const double truediffone = 2695953529101130949315647634472399133601089873 /* Converts a little endian 256 bit value to a double */ double le256todouble(const void *target) { - uint64_t *data64; + const uint64_t *data64; double dcut64; - data64 = (uint64_t *)(target + 24); + data64 = (const uint64_t *)((const uint8_t*)target + 24); dcut64 = *data64 * 6277101735386680763835789423207666416102355444464034512896.0; - data64 = (uint64_t *)(target + 16); + data64 = (const uint64_t *)((const uint8_t*)target + 16); dcut64 += *data64 * 340282366920938463463374607431768211456.0; - data64 = (uint64_t *)(target + 8); + data64 = (const uint64_t *)((const uint8_t*)target + 8); dcut64 += *data64 * 18446744073709551616.0; - data64 = (uint64_t *)(target); + data64 = (const uint64_t *)(target); dcut64 += *data64; return dcut64; From 58858d1c318102f23352dd37562383c9f295dae8 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Fri, 31 Jan 2025 20:27:04 +0200 Subject: [PATCH 11/39] Revert best_diff name --- src/mining.cpp | 12 ++++++------ src/monitor.cpp | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mining.cpp b/src/mining.cpp index 822b79b..8358d66 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -44,7 +44,7 @@ volatile uint32_t shares; // increase if blockhash has 32 bits of zeroes volatile uint32_t valids; // increased if blockhash <= target // Track best diff -double s_best_diff = 0.0; +double best_diff = 0.0; // Variables to hold data from custom textboxes //Track mining stats in non volatile memory @@ -394,8 +394,8 @@ void runStratumWorker(void *name) { Serial.println(""); mLastTXtoPool = millis(); - if (res->difficulty > s_best_diff) - s_best_diff = res->difficulty; + if (res->difficulty > best_diff) + best_diff = res->difficulty; // check if 32bit share if(res->hash[29] !=0 || res->hash[28] !=0) @@ -613,7 +613,7 @@ void restoreStat() { ret = nvs_open("state", NVS_READWRITE, &stat_handle); size_t required_size = sizeof(double); - nvs_get_blob(stat_handle, "best_diff", &s_best_diff, &required_size); + nvs_get_blob(stat_handle, "best_diff", &best_diff, &required_size); nvs_get_u32(stat_handle, "Mhashes", &Mhashes); uint32_t nv_shares, nv_valids; nvs_get_u32(stat_handle, "shares", &nv_shares); @@ -627,7 +627,7 @@ void restoreStat() { void saveStat() { if(!Settings.saveStats) return; Serial.printf("[MONITOR] Saving stats\n"); - nvs_set_blob(stat_handle, "best_diff", &s_best_diff, sizeof(double)); + nvs_set_blob(stat_handle, "best_diff", &best_diff, sizeof(double)); nvs_set_u32(stat_handle, "Mhashes", Mhashes); nvs_set_u32(stat_handle, "shares", shares); nvs_set_u32(stat_handle, "valids", valids); @@ -638,7 +638,7 @@ void saveStat() { void resetStat() { Serial.printf("[MONITOR] Resetting NVS stats\n"); templates = hashes = Mhashes = totalKHashes = elapsedKHs = upTime = shares = valids = 0; - s_best_diff = 0.0; + best_diff = 0.0; saveStat(); } diff --git a/src/monitor.cpp b/src/monitor.cpp index a1081b0..900aebc 100644 --- a/src/monitor.cpp +++ b/src/monitor.cpp @@ -20,7 +20,7 @@ extern uint64_t upTime; extern uint32_t shares; // increase if blockhash has 32 bits of zeroes extern uint32_t valids; // increased if blockhash <= targethalfshares -extern double s_best_diff; // track best diff +extern double best_diff; // track best diff extern monitor_data mMonitor; @@ -280,7 +280,7 @@ mining_data getMiningData(unsigned long mElapsed) mining_data data; char best_diff_string[16] = {0}; - suffix_string(s_best_diff, best_diff_string, 16, 0); + suffix_string(best_diff, best_diff_string, 16, 0); char timeMining[15] = {0}; uint64_t secElapsed = upTime + (esp_timer_get_time() / 1000000); From 278d9cec27a7c38db5e1b0e5057654c03afeea4e Mon Sep 17 00:00:00 2001 From: evgenykz Date: Sat, 1 Feb 2025 01:59:23 +0200 Subject: [PATCH 12/39] sha dma test; Increase hashrate ~212KH/s --- src/NerdMinerV2.ino.cpp | 95 +++++++++++++++++++-- src/mining.cpp | 177 ++++++++++++++++++++++++++++++++-------- src/stratum.cpp | 18 +++- src/stratum.h | 3 +- 4 files changed, 253 insertions(+), 40 deletions(-) diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index 0b6c8bd..065197c 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -61,6 +61,10 @@ const char* ntpServer = "pool.ntp.org"; #include #include #include +#include +#include +#include +#include static const uint8_t s_test_buffer[128] = { @@ -94,7 +98,7 @@ static uint8_t hash_aligned[64] __attribute__((aligned(256))); static inline void nerd_sha_hal_wait_idle() { - while (sha_ll_busy()) + while (REG_READ(SHA_BUSY_REG)) {} } @@ -134,9 +138,37 @@ static inline void nerd_sha_ll_write_digest_sha256(void *digest_state) REG_WRITE(®_addr_buf[5], digest_state_words[5]); REG_WRITE(®_addr_buf[6], digest_state_words[6]); REG_WRITE(®_addr_buf[7], digest_state_words[7]); - REG_WRITE(®_addr_buf[8], digest_state_words[8]); } +//void IRAM_ATTR esp_dport_access_read_buffer(uint32_t *buff_out, uint32_t address, uint32_t num_words) +static inline void nerd_sha_ll_read_digest(void* ptr) +{ + DPORT_INTERRUPT_DISABLE(); +#if 0 + for (uint32_t i = 0; i < 256 / 32; ++i) + { + ((uint32_t*)ptr)[i] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + i * 4); + } +#else + ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 0 * 4); + ((uint32_t*)ptr)[1] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 1 * 4); + ((uint32_t*)ptr)[2] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 2 * 4); + ((uint32_t*)ptr)[3] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 3 * 4); + ((uint32_t*)ptr)[4] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 4 * 4); + ((uint32_t*)ptr)[5] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 5 * 4); + ((uint32_t*)ptr)[6] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 6 * 4); + ((uint32_t*)ptr)[7] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 7 * 4); +#endif + DPORT_INTERRUPT_RESTORE(); +} + +static IRAM_ATTR uint8_t dma_buffer[128] __attribute__((aligned(32))); +static IRAM_ATTR uint8_t dma_inter[64] __attribute__((aligned(32))); +static IRAM_ATTR uint8_t dma_hash[32] __attribute__((aligned(32))); +static DRAM_ATTR lldesc_t s_dma_descr_input; +static DRAM_ATTR lldesc_t s_dma_descr_buf; +static DRAM_ATTR lldesc_t s_dma_descr_inter; + IRAM_ATTR void HwShaTest() { uint8_t interResult[64]; @@ -303,7 +335,8 @@ IRAM_ATTR void HwShaTest() sha_ll_load(SHA2_256); //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); - sha_ll_read_digest(SHA2_256, interResult, 256 / 32); + //sha_ll_read_digest(SHA2_256, interResult, 256 / 32); + nerd_sha_ll_read_digest(interResult); //sha_hal_hash_block(SHA2_256, interResult, 64/4, true); //sha_hal_wait_idle(); @@ -316,7 +349,59 @@ IRAM_ATTR void HwShaTest() sha_ll_load(SHA2_256); //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); - sha_ll_read_digest(SHA2_256, hash, 256 / 32); + //sha_ll_read_digest(SHA2_256, hash, 256 / 32); + nerd_sha_ll_read_digest(hash); + } + esp_sha_release_hardware(); +#endif + +#if 0 + //DMA hash + uint8_t* dma_cap_buf = (uint8_t*)heap_caps_malloc(128, MALLOC_CAP_8BIT|MALLOC_CAP_DMA|MALLOC_CAP_INTERNAL); + memcpy(dma_cap_buf, s_test_buffer, 128); + + uint8_t* dma_cap_inter = (uint8_t*)heap_caps_malloc(64, MALLOC_CAP_8BIT|MALLOC_CAP_DMA|MALLOC_CAP_INTERNAL); + memcpy(dma_cap_inter, interResult, 64); + + uint8_t* dma_cap_hash = (uint8_t*)heap_caps_malloc(32, MALLOC_CAP_8BIT|MALLOC_CAP_DMA|MALLOC_CAP_INTERNAL); + + memset(&s_dma_descr_input, 0, sizeof(lldesc_t)); + memset(&s_dma_descr_buf, 0, sizeof(lldesc_t)); + memset(&s_dma_descr_inter, 0, sizeof(lldesc_t)); + + + s_dma_descr_input.length = 64; + s_dma_descr_input.size = 64; + s_dma_descr_input.owner = 1; + s_dma_descr_input.eof = 1; + s_dma_descr_input.buf = dma_cap_buf+64; + + s_dma_descr_buf.length = 64; + s_dma_descr_buf.size = 64; + s_dma_descr_buf.owner = 1; + s_dma_descr_buf.buf = dma_cap_buf; + s_dma_descr_buf.eof = 0; + s_dma_descr_buf.empty = (uint32_t)(&s_dma_descr_input); + + s_dma_descr_inter.length = 64; + s_dma_descr_inter.size = 64; + s_dma_descr_inter.owner = 1; + s_dma_descr_inter.buf = dma_cap_inter; + s_dma_descr_inter.eof = 1; + + //49.83KH/s + esp_sha_acquire_hardware(); + for (int i = 0; i < test_count; ++i) + { + esp_crypto_shared_gdma_start(&s_dma_descr_buf, NULL, GDMA_TRIG_PERIPH_SHA); + sha_hal_hash_dma(SHA2_256, 2, true); + sha_hal_wait_idle(); + esp_sha_read_digest_state(SHA2_256, dma_cap_inter); + + esp_crypto_shared_gdma_start(&s_dma_descr_inter, NULL, GDMA_TRIG_PERIPH_SHA); + sha_hal_hash_dma(SHA2_256, 1, true); + sha_hal_wait_idle(); + esp_sha_read_digest_state(SHA2_256, hash); } esp_sha_release_hardware(); #endif @@ -442,7 +527,7 @@ void setup() //BaseType_t res = xTaskCreate(runWorker, name, 35000, (void*)name, 1, NULL); TaskHandle_t minerTask1, minerTask2 = NULL; #ifdef HARDWARE_SHA265 - xTaskCreate(minerWorkerHw, "MinerHw-0", 6000, (void*)0, 1, &minerTask1); + xTaskCreate(minerWorkerHw, "MinerHw-0", 2048, (void*)0, 2, &minerTask1); #else xTaskCreate(minerWorkerSw, "MinerSw-0", 6000, (void*)0, 1, &minerTask1); #endif diff --git a/src/mining.cpp b/src/mining.cpp index 8358d66..fc0696f 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -15,6 +15,7 @@ #include "drivers/storage/storage.h" #include #include +#include #include "mbedtls/sha256.h" #include "i2c_master.h" @@ -58,7 +59,7 @@ static miner_data mMiner; //Global miner data (Create a miner class TODO) mining_subscribe mWorker; mining_job mJob; monitor_data mMonitor; -bool isMinerSuscribed = false; +static bool volatile isMinerSuscribed = false; unsigned long mLastTXtoPool = millis(); int saveIntervals[7] = {5 * 60, 15 * 60, 30 * 60, 1 * 3600, 3 * 3600, 6 * 3600, 12 * 3600}; @@ -100,12 +101,17 @@ unsigned long mStart0Hashrate = 0; bool checkPoolInactivity(unsigned int keepAliveTime, unsigned long inactivityTime){ unsigned long currentKHashes = (Mhashes*1000) + hashes/1000; - unsigned long elapsedKHs = currentKHashes - totalKHashes; + unsigned long elapsedKHs = currentKHashes - totalKHashes; + + uint32_t time_now = millis(); // If no shares sent to pool // send something to pool to hold socket oppened - if(millis() - mLastTXtoPool > keepAliveTime){ - mLastTXtoPool = millis(); + if (time_now < mLastTXtoPool) //32bit wrap + mLastTXtoPool = time_now; + if ( time_now > mLastTXtoPool + keepAliveTime) + { + mLastTXtoPool = time_now; Serial.println(" Sending : KeepAlive suggest_difficulty"); //if (client.print("{}\n") == 0) { tx_suggest_difficulty(client, DEFAULT_DIFFICULTY); @@ -117,8 +123,8 @@ bool checkPoolInactivity(unsigned int keepAliveTime, unsigned long inactivityTim if(elapsedKHs == 0){ //Check if hashrate is 0 during inactivityTIme - if(mStart0Hashrate == 0) mStart0Hashrate = millis(); - if((millis()-mStart0Hashrate) > inactivityTime) { mStart0Hashrate=0; return true;} + if(mStart0Hashrate == 0) mStart0Hashrate = time_now; + if((time_now-mStart0Hashrate) > inactivityTime) { mStart0Hashrate=0; return true;} return false; } @@ -167,6 +173,26 @@ static void JobPush(std::list> &job_list, uint32_t job_list.push_back(job); } +struct Submition +{ + double diff; + bool is32bit; + bool isValid; +}; + +static void MiningJobStop(uint32_t &job_pool, std::map> & submition_map) +{ + { + std::lock_guard lock(s_job_mutex); + s_job_request_list_sw.clear(); + #ifdef HARDWARE_SHA265 + s_job_request_list_hw.clear(); + #endif + } + job_pool = 0xFFFFFFFF; + submition_map.clear(); +} + void runStratumWorker(void *name) { // TEST: https://bitcoin.stackexchange.com/questions/22929/full-example-data-for-scrypt-stratum-client @@ -179,6 +205,7 @@ void runStratumWorker(void *name) { #endif std::vector i2c_slave_vector; + std::map> s_submition_map; //scan for i2c slaves if (i2c_master_start() == 0) @@ -194,12 +221,15 @@ void runStratumWorker(void *name) { double currentPoolDifficulty = DEFAULT_DIFFICULTY; uint32_t nonce_pool = 0; uint32_t job_pool = 0xFFFFFFFF; + uint32_t last_job_time = millis(); while(true) { if(WiFi.status() != WL_CONNECTED){ // WiFi is disconnected, so reconnect now mMonitor.NerdStatus = NM_Connecting; + job_pool = 0xFFFFFFFF; + MiningJobStop(job_pool, s_submition_map); WiFi.reconnect(); vTaskDelay(5000 / portTICK_PERIOD_MS); continue; @@ -220,6 +250,7 @@ void runStratumWorker(void *name) { // STEP 1: Pool server connection (SUBSCRIBE) if(!tx_mining_subscribe(client, mWorker)) { client.stop(); + MiningJobStop(job_pool, s_submition_map); continue; } @@ -233,7 +264,9 @@ void runStratumWorker(void *name) { tx_suggest_difficulty(client, currentPoolDifficulty); isMinerSuscribed=true; - mLastTXtoPool = millis(); + uint32_t time_now = millis(); + mLastTXtoPool = time_now; + last_job_time = time_now; } //Check if pool is down for almost 5minutes and then restart connection with pool (1min=600000ms) @@ -242,9 +275,23 @@ void runStratumWorker(void *name) { Serial.println(" Detected more than 2 min without data form stratum server. Closing socket and reopening..."); client.stop(); isMinerSuscribed=false; + MiningJobStop(job_pool, s_submition_map); continue; } + { + uint32_t time_now = millis(); + if (time_now < last_job_time) //32bit wrap + last_job_time = time_now; + if (time_now >= last_job_time + 10*60*1000) //10minutes without job + { + client.stop(); + isMinerSuscribed=false; + MiningJobStop(job_pool, s_submition_map); + continue; + } + } + uint32_t hw_midstate[8]; uint32_t diget_mid[8]; uint32_t bake[16]; @@ -257,7 +304,6 @@ void runStratumWorker(void *name) { stratum_method result = parse_mining_method(line); switch (result) { - case STRATUM_PARSE_ERROR: Serial.println(" Parsed JSON: error on JSON"); break; case MINING_NOTIFY: if(parse_mining_notify(line, mJob)) { { @@ -271,6 +317,8 @@ void runStratumWorker(void *name) { templates++; job_pool++; + last_job_time = millis(); + uint32_t mh = hashes/1000000; Mhashes += mh; hashes -= mh*1000000; @@ -310,11 +358,44 @@ void runStratumWorker(void *name) { //Nonce for nonce_pool starts from 0x10000000 //For i2c slave we give nonces from 0x20000000, that is 0x10000000 nonces per slave i2c_feed_slaves(i2c_slave_vector, job_pool & 0xFF, 0x20, currentPoolDifficulty, mMiner.bytearray_blockheader); + } else + { + Serial.println("Parsing error, need restart"); + client.stop(); + isMinerSuscribed=false; + MiningJobStop(job_pool, s_submition_map); } break; case MINING_SET_DIFFICULTY: parse_mining_set_difficulty(line, currentPoolDifficulty); break; - case STRATUM_SUCCESS: Serial.println(" Parsed JSON: Success"); break; + case STRATUM_SUCCESS: { + unsigned long id = parse_extract_id(line); + auto itt = s_submition_map.find(id); + if (itt != s_submition_map.end()) + { + if (itt->second->diff > best_diff) + best_diff = itt->second->diff; + if (itt->second->is32bit) + shares++; + if (itt->second->isValid) + { + Serial.println("CONGRATULATIONS! Valid block found"); + valids++; + } + s_submition_map.erase(itt); + } + } + break; + case STRATUM_PARSE_ERROR: { + unsigned long id = parse_extract_id(line); + auto itt = s_submition_map.find(id); + if (itt != s_submition_map.end()) + { + Serial.printf("Refuse submition %d\n", id); + s_submition_map.erase(itt); + } + } + break; default: Serial.println(" Parsed JSON: unknown"); break; } @@ -385,28 +466,28 @@ void runStratumWorker(void *name) { hashes += res->nonce_count; if (res->difficulty > currentPoolDifficulty && job_pool == res->id && res->nonce != 0xFFFFFFFF) { - tx_mining_submit(client, mWorker, mJob, res->nonce); + unsigned long sumbit_id = 0; + tx_mining_submit(client, mWorker, mJob, res->nonce, sumbit_id); Serial.print(" - Current diff share: "); Serial.println(res->difficulty,12); Serial.print(" - Current pool diff : "); Serial.println(currentPoolDifficulty,12); Serial.print(" - TX SHARE: "); for (size_t i = 0; i < 32; i++) Serial.printf("%02x", res->hash[i]); Serial.println(""); - mLastTXtoPool = millis(); + mLastTXtoPool = millis(); - if (res->difficulty > best_diff) - best_diff = res->difficulty; - - // check if 32bit share - if(res->hash[29] !=0 || res->hash[28] !=0) - shares++; - - // check if valid header - if(checkValid(res->hash, mMiner.bytearray_target)) + std::shared_ptr submition = std::make_shared(); + submition->diff = res->difficulty; + submition->is32bit = (res->hash[29] == 0 && res->hash[28] == 0); + if (submition->is32bit) { - Serial.printf("CONGRATULATIONS! Valid block found with nonce: %d | 0x%x\n", res->nonce); - valids++; - } + submition->isValid = checkValid(res->hash, mMiner.bytearray_target); + } else + submition->isValid = false; + + s_submition_map.insert(std::make_pair(sumbit_id, submition)); + if (s_submition_map.size() > 32) + s_submition_map.erase(s_submition_map.begin()); } } } @@ -498,9 +579,30 @@ static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) REG_WRITE(®_addr_buf[15], data_words[15]); } +static inline void nerd_sha_ll_read_digest(void* ptr) +{ + DPORT_INTERRUPT_DISABLE(); +#if 0 + for (uint32_t i = 0; i < 256 / 32; ++i) + { + ((uint32_t*)ptr)[i] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + i * 4); + } +#else + ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 0 * 4); + ((uint32_t*)ptr)[1] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 1 * 4); + ((uint32_t*)ptr)[2] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 2 * 4); + ((uint32_t*)ptr)[3] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 3 * 4); + ((uint32_t*)ptr)[4] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 4 * 4); + ((uint32_t*)ptr)[5] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 5 * 4); + ((uint32_t*)ptr)[6] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 6 * 4); + ((uint32_t*)ptr)[7] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 7 * 4); +#endif + DPORT_INTERRUPT_RESTORE(); +} + static inline void nerd_sha_hal_wait_idle() { - while (sha_ll_busy()) + while (REG_READ(SHA_BUSY_REG)) {} } @@ -550,28 +652,37 @@ void minerWorkerHw(void * task_id) { ((uint32_t*)(sha_buffer+12))[0] = job->nonce_start+n; - sha_ll_write_digest(SHA2_256, job->midstate, 256 / 32); //no need to unroll + //sha_hal_write_digest(SHA2_256, midstate); + sha_ll_write_digest(SHA2_256, job->midstate, 256 / 32); + //nerd_sha_ll_write_digest_sha256(midstate); + + //sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); - //sha_ll_fill_text_block(header64, 64/4); - nerd_sha_ll_fill_text_block_sha256(sha_buffer); + //sha_ll_fill_text_block(s_test_buffer+64, 64/4); + nerd_sha_ll_fill_text_block_sha256(sha_buffer+64); sha_ll_continue_block(SHA2_256); - + + //sha_hal_read_digest(SHA2_256, interResult); sha_ll_load(SHA2_256); //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); - sha_ll_read_digest(SHA2_256, interResult, 256 / 32); - + //sha_ll_read_digest(SHA2_256, interResult, 256 / 32); + nerd_sha_ll_read_digest(interResult); + + //sha_hal_hash_block(SHA2_256, interResult, 64/4, true); //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); //sha_ll_fill_text_block(interResult, 64/4); nerd_sha_ll_fill_text_block_sha256(interResult); sha_ll_start_block(SHA2_256); + //sha_hal_read_digest(SHA2_256, hash); sha_ll_load(SHA2_256); //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); - sha_ll_read_digest(SHA2_256, hash, 256 / 32); + //sha_ll_read_digest(SHA2_256, hash, 256 / 32); + nerd_sha_ll_read_digest(hash); if(hash[31] == 0 && hash[30] == 0) { @@ -677,8 +788,8 @@ void runMonitor(void *name) if (elapsedKHs == 0) { Serial.printf(">>> [i] Miner: newJob>%s / inRun>%s) - Client: connected>%s / subscribed>%s / wificonnected>%s\n", - //(1) ? "true" : "false", 1 ? "true" : "false", - "true", "true", + "true",//(1) ? "true" : "false", + isMinerSuscribed ? "true" : "false", client.connected() ? "true" : "false", isMinerSuscribed ? "true" : "false", WiFi.status() == WL_CONNECTED ? "true" : "false"); } diff --git a/src/stratum.cpp b/src/stratum.cpp index 8237a44..7310b9b 100644 --- a/src/stratum.cpp +++ b/src/stratum.cpp @@ -205,12 +205,13 @@ bool parse_mining_notify(String line, mining_job& mJob) } -bool tx_mining_submit(WiFiClient& client, mining_subscribe mWorker, mining_job mJob, unsigned long nonce) +bool tx_mining_submit(WiFiClient& client, mining_subscribe mWorker, mining_job mJob, unsigned long nonce, unsigned long &submit_id) { char payload[BUFFER] = {0}; // Submit id = getNextId(id); + submit_id = id; sprintf(payload, "{\"id\": %u, \"method\": \"mining.submit\", \"params\": [\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"]}\n", id, mWorker.wName,//"bc1qvv469gmw4zz6qa4u4dsezvrlmqcqszwyfzhgwj", //mWorker.name, @@ -253,3 +254,18 @@ bool tx_suggest_difficulty(WiFiClient& client, double difficulty) return client.print(payload); } + + +unsigned long parse_extract_id(const String &line) +{ + DeserializationError error = deserializeJson(doc, line); + if (error) + return 0; + + if (!doc.containsKey("id")) + return 0; + + unsigned long id = doc["id"]; + + return id; +} \ No newline at end of file diff --git a/src/stratum.h b/src/stratum.h index d8110e7..7012bcf 100644 --- a/src/stratum.h +++ b/src/stratum.h @@ -60,11 +60,12 @@ stratum_method parse_mining_method(String line); bool parse_mining_notify(String line, mining_job& mJob); //Method Mining.submit -bool tx_mining_submit(WiFiClient& client, mining_subscribe mWorker, mining_job mJob, unsigned long nonce); +bool tx_mining_submit(WiFiClient& client, mining_subscribe mWorker, mining_job mJob, unsigned long nonce, unsigned long &submit_id); //Difficulty Methods bool tx_suggest_difficulty(WiFiClient& client, double difficulty); bool parse_mining_set_difficulty(String line, double& difficulty); +unsigned long parse_extract_id(const String &line); #endif // STRATUM_API_H \ No newline at end of file From 73a1de8ee6a06d4e48b5a0e07037e7f18e575bfc Mon Sep 17 00:00:00 2001 From: evgenykz Date: Sat, 1 Feb 2025 14:26:47 +0200 Subject: [PATCH 13/39] Added crc32 Changed thread priority Added task interruption --- src/NerdMinerV2.ino.cpp | 8 ++-- src/mining.cpp | 66 ++++++++++++++++++++++++++++--- src/monitor.cpp | 29 ++++++++++---- src/utils.cpp | 86 +++++++++++++++++++++++++++++++++++++++++ src/utils.h | 4 ++ 5 files changed, 176 insertions(+), 17 deletions(-) diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index 065197c..3d51c31 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -507,15 +507,15 @@ void setup() Serial.println("Initiating tasks..."); char *name = (char*) malloc(32); sprintf(name, "(%s)", "Monitor"); - BaseType_t res1 = xTaskCreatePinnedToCore(runMonitor, "Monitor", 10000, (void*)name, 4, NULL,1); + BaseType_t res1 = xTaskCreatePinnedToCore(runMonitor, "Monitor", 10000, (void*)name, 5, NULL,1); /******** CREATE STRATUM TASK *****/ sprintf(name, "(%s)", "Stratum"); #if defined(ESP32_2432S028R) || defined(ESP32_2432S028_2USB) // Free a little bit of the heap to the screen - BaseType_t res2 = xTaskCreatePinnedToCore(runStratumWorker, "Stratum", 13500, (void*)name, 3, NULL,1); + BaseType_t res2 = xTaskCreatePinnedToCore(runStratumWorker, "Stratum", 13500, (void*)name, 4, NULL,1); #else - BaseType_t res2 = xTaskCreatePinnedToCore(runStratumWorker, "Stratum", 15000, (void*)name, 3, NULL,1); + BaseType_t res2 = xTaskCreatePinnedToCore(runStratumWorker, "Stratum", 15000, (void*)name, 4, NULL,1); #endif /******** CREATE MINER TASKS *****/ @@ -527,7 +527,7 @@ void setup() //BaseType_t res = xTaskCreate(runWorker, name, 35000, (void*)name, 1, NULL); TaskHandle_t minerTask1, minerTask2 = NULL; #ifdef HARDWARE_SHA265 - xTaskCreate(minerWorkerHw, "MinerHw-0", 2048, (void*)0, 2, &minerTask1); + xTaskCreate(minerWorkerHw, "MinerHw-0", 2048, (void*)0, 3, &minerTask1); #else xTaskCreate(minerWorkerSw, "MinerSw-0", 6000, (void*)0, 1, &minerTask1); #endif diff --git a/src/mining.cpp b/src/mining.cpp index fc0696f..56d5459 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -158,6 +158,7 @@ std::list> s_job_request_list_sw; std::list> s_job_request_list_hw; #endif std::list> s_job_result_list; +static volatile uint8_t s_working_current_job_id = 0xFF; static void JobPush(std::list> &job_list, uint32_t id, uint32_t nonce_start, uint32_t nonce_count, double difficulty, const uint8_t* buffer_upper, const uint32_t* midstate, const uint32_t* bake) @@ -189,6 +190,7 @@ static void MiningJobStop(uint32_t &job_pool, std::mapnonce = 0xFFFFFFFF; result->id = job->id; result->nonce_count = job->nonce_count; + uint8_t job_in_work = job->id & 0xFF; for (uint32_t n = 0; n < job->nonce_count; ++n) { ((uint32_t*)(job->buffer_upper+12))[0] = job->nonce_start+n; nerd_sha256d_baked(job->midstate, job->buffer_upper, job->bake, hash); + + if (s_working_current_job_id != job_in_work) + { + result->nonce_count = n+1; + break; + } + if(hash[31] == 0 && hash[30] == 0) { double diff_hash = diff_from_target(hash); @@ -645,6 +659,7 @@ void minerWorkerHw(void * task_id) result->nonce = 0xFFFFFFFF; result->nonce_count = job->nonce_count; result->difficulty = job->difficulty; + uint8_t job_in_work = job->id & 0xFF; uint8_t* sha_buffer = job->buffer_upper; esp_sha_acquire_hardware(); @@ -684,6 +699,11 @@ void minerWorkerHw(void * task_id) //sha_ll_read_digest(SHA2_256, hash, 256 / 32); nerd_sha_ll_read_digest(hash); + if (s_working_current_job_id != job_in_work) + { + result->nonce_count = n+1; + break; + } if(hash[31] == 0 && hash[30] == 0) { double diff_hash = diff_from_target(hash); @@ -733,17 +753,51 @@ void restoreStat() { valids = nv_valids; nvs_get_u32(stat_handle, "templates", &templates); nvs_get_u64(stat_handle, "upTime", &upTime); + + uint32_t crc = crc32_reset(); + crc = crc32_add(crc, &best_diff, sizeof(best_diff)); + crc = crc32_add(crc, &Mhashes, sizeof(Mhashes)); + crc = crc32_add(crc, &nv_shares, sizeof(nv_shares)); + crc = crc32_add(crc, &nv_valids, sizeof(nv_valids)); + crc = crc32_add(crc, &templates, sizeof(templates)); + crc = crc32_add(crc, &upTime, sizeof(upTime)); + crc = crc32_finish(crc); + + uint32_t nv_crc; + nvs_get_u32(stat_handle, "crc32", &nv_crc); + if (nv_crc != crc) + { + best_diff = 0.0; + Mhashes = 0; + shares = 0; + valids = 0; + templates = 0; + upTime = 0; + } } void saveStat() { if(!Settings.saveStats) return; Serial.printf("[MONITOR] Saving stats\n"); - nvs_set_blob(stat_handle, "best_diff", &best_diff, sizeof(double)); + nvs_set_blob(stat_handle, "best_diff", &best_diff, sizeof(best_diff)); nvs_set_u32(stat_handle, "Mhashes", Mhashes); nvs_set_u32(stat_handle, "shares", shares); nvs_set_u32(stat_handle, "valids", valids); nvs_set_u32(stat_handle, "templates", templates); - nvs_set_u64(stat_handle, "upTime", upTime + (esp_timer_get_time()/1000000)); + uint64_t upTime_now = upTime + (esp_timer_get_time()/1000000); + nvs_set_u64(stat_handle, "upTime", upTime_now); + + uint32_t crc = crc32_reset(); + crc = crc32_add(crc, &best_diff, sizeof(best_diff)); + crc = crc32_add(crc, &Mhashes, sizeof(Mhashes)); + uint32_t nv_shares = shares; + uint32_t nv_valids = valids; + crc = crc32_add(crc, &nv_shares, sizeof(nv_shares)); + crc = crc32_add(crc, &nv_valids, sizeof(nv_valids)); + crc = crc32_add(crc, &templates, sizeof(templates)); + crc = crc32_add(crc, &upTime_now, sizeof(upTime_now)); + crc = crc32_finish(crc); + nvs_set_u32(stat_handle, "crc32", crc); } void resetStat() { diff --git a/src/monitor.cpp b/src/monitor.cpp index 900aebc..de62a76 100644 --- a/src/monitor.cpp +++ b/src/monitor.cpp @@ -4,6 +4,7 @@ #include "HTTPClient.h" #include #include +#include #include "mining.h" #include "utils.h" #include "monitor.h" @@ -246,20 +247,34 @@ static EHashRateScale s_hashrate_scale = HashRateScale_99KH; static uint32_t s_skip_first = 3; static double s_top_hashrate = 0.0; +static std::list s_hashrate_avg_list; +static double s_hashrate_summ = 0.0; + String getCurrentHashRate(unsigned long mElapsed) { double hashrate = (double)elapsedKHs * 1000.0 / (double)mElapsed; + + s_hashrate_summ += hashrate; + s_hashrate_avg_list.push_back(hashrate); + if (s_hashrate_avg_list.size() > 10) + { + s_hashrate_summ -= s_hashrate_avg_list.front(); + s_hashrate_avg_list.pop_front(); + } + + double avg_hashrate = s_hashrate_summ / (double)s_hashrate_avg_list.size(); + if (s_skip_first > 0) { s_skip_first--; } else { - if (hashrate > s_top_hashrate) + if (avg_hashrate > s_top_hashrate) { - s_top_hashrate = hashrate; - if (hashrate > 999.9) + s_top_hashrate = avg_hashrate; + if (avg_hashrate > 999.9) s_hashrate_scale = HashRateScale_9MH; - else if (hashrate > 99.9) + else if (avg_hashrate > 99.9) s_hashrate_scale = HashRateScale_999KH; } } @@ -267,11 +282,11 @@ String getCurrentHashRate(unsigned long mElapsed) switch (s_hashrate_scale) { case HashRateScale_99KH: - return String(hashrate, 2); + return String(avg_hashrate, 2); case HashRateScale_999KH: - return String(hashrate, 1); + return String(avg_hashrate, 1); default: - return String((int)hashrate ); + return String((int)avg_hashrate ); } } diff --git a/src/utils.cpp b/src/utils.cpp index 86156bd..d6dfb2b 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -475,4 +475,90 @@ void suffix_string(double val, char *buf, size_t bufsiz, int sigdigits) snprintf(buf, bufsiz, "%*.*f%s", sigdigits + 1, ndigits, dval, suffix); } +} + + +static const uint32_t s_crc32_table[256] = +{ + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, + 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, + 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, + 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, + 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, + 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, + 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, + 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, + 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, + 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, + 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, + 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, + 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, + 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, + 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, + 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, + 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, + 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, + 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, + 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, + 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, + 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, + 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, + 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, + 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, + 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, + 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, + 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D +}; + +uint32_t crc32_reset() +{ + return 0xFFFFFFFF; +} + +uint32_t crc32_add(uint32_t crc32, const void* data, size_t size) +{ + for (size_t n = 0; n < size; ++n) + crc32 = (crc32 >> 8) ^ s_crc32_table[(crc32 ^ ((const uint8_t*)data)[n]) & 0xFF]; + return crc32; +} + +uint32_t crc32_finish(uint32_t crc32) +{ + return crc32 ^ 0xFFFFFFFF; } \ No newline at end of file diff --git a/src/utils.h b/src/utils.h index c15f774..b8a539d 100644 --- a/src/utils.h +++ b/src/utils.h @@ -24,6 +24,10 @@ miner_data calculateMiningData(mining_subscribe& mWorker, mining_job mJob); bool checkValid(unsigned char* hash, unsigned char* target); void suffix_string(double val, char *buf, size_t bufsiz, int sigdigits); +uint32_t crc32_reset(); +uint32_t crc32_add(uint32_t crc32, const void* data, size_t size); +uint32_t crc32_finish(uint32_t crc32); + #endif // UTILS_API_H \ No newline at end of file From 47a23e1ba31f973c67d7428246b8df3e3e130ecf Mon Sep 17 00:00:00 2001 From: evgenykz Date: Sat, 1 Feb 2025 22:27:58 +0200 Subject: [PATCH 14/39] Revert thread priority - fix buttons --- src/NerdMinerV2.ino.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index 3d51c31..b48562e 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -408,7 +408,7 @@ IRAM_ATTR void HwShaTest() uint32_t time_end = micros(); double hash_rate = ((double)test_count * 1000000) / (double)(time_end - time_start); - Serial.print("DmaHashrate="); + Serial.print("Hashrate="); Serial.print(hash_rate/1000); Serial.println("KH/s"); @@ -507,15 +507,15 @@ void setup() Serial.println("Initiating tasks..."); char *name = (char*) malloc(32); sprintf(name, "(%s)", "Monitor"); - BaseType_t res1 = xTaskCreatePinnedToCore(runMonitor, "Monitor", 10000, (void*)name, 5, NULL,1); + BaseType_t res1 = xTaskCreatePinnedToCore(runMonitor, "Monitor", 10000, (void*)name, 3, NULL,1); /******** CREATE STRATUM TASK *****/ sprintf(name, "(%s)", "Stratum"); #if defined(ESP32_2432S028R) || defined(ESP32_2432S028_2USB) // Free a little bit of the heap to the screen - BaseType_t res2 = xTaskCreatePinnedToCore(runStratumWorker, "Stratum", 13500, (void*)name, 4, NULL,1); + BaseType_t res2 = xTaskCreatePinnedToCore(runStratumWorker, "Stratum", 13500, (void*)name, 2, NULL,1); #else - BaseType_t res2 = xTaskCreatePinnedToCore(runStratumWorker, "Stratum", 15000, (void*)name, 4, NULL,1); + BaseType_t res2 = xTaskCreatePinnedToCore(runStratumWorker, "Stratum", 15000, (void*)name, 2, NULL,1); #endif /******** CREATE MINER TASKS *****/ @@ -527,7 +527,7 @@ void setup() //BaseType_t res = xTaskCreate(runWorker, name, 35000, (void*)name, 1, NULL); TaskHandle_t minerTask1, minerTask2 = NULL; #ifdef HARDWARE_SHA265 - xTaskCreate(minerWorkerHw, "MinerHw-0", 2048, (void*)0, 3, &minerTask1); + xTaskCreate(minerWorkerHw, "MinerHw-0", 2048, (void*)0, 1, &minerTask1); #else xTaskCreate(minerWorkerSw, "MinerSw-0", 6000, (void*)0, 1, &minerTask1); #endif From 63d4fc20b571a381892e66290c8bd36cec42182e Mon Sep 17 00:00:00 2001 From: evgenykz Date: Sat, 1 Feb 2025 22:40:23 +0200 Subject: [PATCH 15/39] Thread priority update --- src/NerdMinerV2.ino.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index b48562e..1bf7f02 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -507,15 +507,15 @@ void setup() Serial.println("Initiating tasks..."); char *name = (char*) malloc(32); sprintf(name, "(%s)", "Monitor"); - BaseType_t res1 = xTaskCreatePinnedToCore(runMonitor, "Monitor", 10000, (void*)name, 3, NULL,1); + BaseType_t res1 = xTaskCreatePinnedToCore(runMonitor, "Monitor", 10000, (void*)name, 5, NULL,1); /******** CREATE STRATUM TASK *****/ sprintf(name, "(%s)", "Stratum"); #if defined(ESP32_2432S028R) || defined(ESP32_2432S028_2USB) // Free a little bit of the heap to the screen - BaseType_t res2 = xTaskCreatePinnedToCore(runStratumWorker, "Stratum", 13500, (void*)name, 2, NULL,1); + BaseType_t res2 = xTaskCreatePinnedToCore(runStratumWorker, "Stratum", 13500, (void*)name, 4, NULL,1); #else - BaseType_t res2 = xTaskCreatePinnedToCore(runStratumWorker, "Stratum", 15000, (void*)name, 2, NULL,1); + BaseType_t res2 = xTaskCreatePinnedToCore(runStratumWorker, "Stratum", 15000, (void*)name, 4, NULL,1); #endif /******** CREATE MINER TASKS *****/ @@ -527,7 +527,7 @@ void setup() //BaseType_t res = xTaskCreate(runWorker, name, 35000, (void*)name, 1, NULL); TaskHandle_t minerTask1, minerTask2 = NULL; #ifdef HARDWARE_SHA265 - xTaskCreate(minerWorkerHw, "MinerHw-0", 2048, (void*)0, 1, &minerTask1); + xTaskCreate(minerWorkerHw, "MinerHw-0", 2048, (void*)0, 3, &minerTask1); #else xTaskCreate(minerWorkerSw, "MinerSw-0", 6000, (void*)0, 1, &minerTask1); #endif @@ -538,6 +538,8 @@ void setup() esp_task_wdt_add(minerTask2); #endif + vTaskPrioritySet(NULL, 4); + /******** MONITOR SETUP *****/ setup_monitor(); } From abf298f56a3a4bc2d8b553b44987db74f3999bf0 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Sat, 1 Feb 2025 22:47:32 +0200 Subject: [PATCH 16/39] hw mining fix --- src/mining.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mining.cpp b/src/mining.cpp index 56d5459..70d89ac 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -675,7 +675,7 @@ void minerWorkerHw(void * task_id) //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); //sha_ll_fill_text_block(s_test_buffer+64, 64/4); - nerd_sha_ll_fill_text_block_sha256(sha_buffer+64); + nerd_sha_ll_fill_text_block_sha256(sha_buffer); sha_ll_continue_block(SHA2_256); //sha_hal_read_digest(SHA2_256, interResult); From f6477c6c19c02d168195d2e70b4efa7ac7fe48bb Mon Sep 17 00:00:00 2001 From: evgenykz Date: Sat, 1 Feb 2025 23:40:01 +0200 Subject: [PATCH 17/39] 225KH/s --- src/mining.cpp | 53 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/src/mining.cpp b/src/mining.cpp index 70d89ac..33a4c32 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -595,23 +595,52 @@ static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) static inline void nerd_sha_ll_read_digest(void* ptr) { - DPORT_INTERRUPT_DISABLE(); -#if 0 - for (uint32_t i = 0; i < 256 / 32; ++i) - { - ((uint32_t*)ptr)[i] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + i * 4); - } -#else + DPORT_INTERRUPT_DISABLE(); ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 0 * 4); ((uint32_t*)ptr)[1] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 1 * 4); ((uint32_t*)ptr)[2] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 2 * 4); ((uint32_t*)ptr)[3] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 3 * 4); ((uint32_t*)ptr)[4] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 4 * 4); ((uint32_t*)ptr)[5] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 5 * 4); - ((uint32_t*)ptr)[6] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 6 * 4); + ((uint32_t*)ptr)[6] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 6 * 4); ((uint32_t*)ptr)[7] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 7 * 4); -#endif + DPORT_INTERRUPT_RESTORE(); +} + +static inline void nerd_sha_ll_read_digest_if(void* ptr) +{ + DPORT_INTERRUPT_DISABLE(); + ((uint32_t*)ptr)[7] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 7 * 4); + if ( ((uint16_t*)ptr)[15] != 0) + //if ( (((uint32_t*)ptr)[7] >> 16) != 0) + { DPORT_INTERRUPT_RESTORE(); + return; + } + + ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 0 * 4); + ((uint32_t*)ptr)[1] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 1 * 4); + ((uint32_t*)ptr)[2] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 2 * 4); + ((uint32_t*)ptr)[3] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 3 * 4); + ((uint32_t*)ptr)[4] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 4 * 4); + ((uint32_t*)ptr)[5] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 5 * 4); + ((uint32_t*)ptr)[6] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 6 * 4); + DPORT_INTERRUPT_RESTORE(); +} + +static inline void nerd_sha_ll_write_digest(void *digest_state) +{ + uint32_t *digest_state_words = (uint32_t *)digest_state; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_H_BASE); + + REG_WRITE(®_addr_buf[0], digest_state_words[0]); + REG_WRITE(®_addr_buf[1], digest_state_words[1]); + REG_WRITE(®_addr_buf[2], digest_state_words[2]); + REG_WRITE(®_addr_buf[3], digest_state_words[3]); + REG_WRITE(®_addr_buf[4], digest_state_words[4]); + REG_WRITE(®_addr_buf[5], digest_state_words[5]); + REG_WRITE(®_addr_buf[6], digest_state_words[6]); + REG_WRITE(®_addr_buf[7], digest_state_words[7]); } static inline void nerd_sha_hal_wait_idle() @@ -629,6 +658,7 @@ void minerWorkerHw(void * task_id) std::shared_ptr result; uint8_t interResult[64]; uint8_t hash[32]; + uint8_t digest_mid[32]; uint32_t wdt_counter = 0; @@ -660,6 +690,7 @@ void minerWorkerHw(void * task_id) result->nonce_count = job->nonce_count; result->difficulty = job->difficulty; uint8_t job_in_work = job->id & 0xFF; + memcpy(digest_mid, job->midstate, sizeof(digest_mid)); uint8_t* sha_buffer = job->buffer_upper; esp_sha_acquire_hardware(); @@ -668,7 +699,7 @@ void minerWorkerHw(void * task_id) ((uint32_t*)(sha_buffer+12))[0] = job->nonce_start+n; //sha_hal_write_digest(SHA2_256, midstate); - sha_ll_write_digest(SHA2_256, job->midstate, 256 / 32); + nerd_sha_ll_write_digest(digest_mid); //nerd_sha_ll_write_digest_sha256(midstate); //sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); @@ -697,7 +728,7 @@ void minerWorkerHw(void * task_id) //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); //sha_ll_read_digest(SHA2_256, hash, 256 / 32); - nerd_sha_ll_read_digest(hash); + nerd_sha_ll_read_digest_if(hash); if (s_working_current_job_id != job_in_work) { From 2b6a18f2e6d31272ef13163926b83b6573ffde6b Mon Sep 17 00:00:00 2001 From: evgenykz Date: Sun, 2 Feb 2025 00:16:23 +0200 Subject: [PATCH 18/39] 242KH/s --- src/mining.cpp | 62 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/src/mining.cpp b/src/mining.cpp index 33a4c32..5ac4428 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -324,6 +324,7 @@ void runStratumWorker(void *name) { s_working_current_job_id = job_pool & 0xFF; //Terminate current job in thread last_job_time = millis(); + mLastTXtoPool = last_job_time; uint32_t mh = hashes/1000000; Mhashes += mh; @@ -593,6 +594,31 @@ static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) REG_WRITE(®_addr_buf[15], data_words[15]); } +static inline void nerd_sha_ll_fill_text_block_sha256_inter() +{ + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + + DPORT_INTERRUPT_DISABLE(); + REG_WRITE(®_addr_buf[0], DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 0 * 4)); + REG_WRITE(®_addr_buf[1], DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 1 * 4)); + REG_WRITE(®_addr_buf[2], DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 2 * 4)); + REG_WRITE(®_addr_buf[3], DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 3 * 4)); + REG_WRITE(®_addr_buf[4], DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 4 * 4)); + REG_WRITE(®_addr_buf[5], DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 5 * 4)); + REG_WRITE(®_addr_buf[6], DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 6 * 4)); + REG_WRITE(®_addr_buf[7], DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 7 * 4)); + DPORT_INTERRUPT_RESTORE(); + + REG_WRITE(®_addr_buf[8], 0x00000080); + REG_WRITE(®_addr_buf[9], 0x00000000); + REG_WRITE(®_addr_buf[10], 0x00000000); + REG_WRITE(®_addr_buf[11], 0x00000000); + REG_WRITE(®_addr_buf[12], 0x00000000); + REG_WRITE(®_addr_buf[13], 0x00000000); + REG_WRITE(®_addr_buf[14], 0x00000000); + REG_WRITE(®_addr_buf[15], 0x00010000); +} + static inline void nerd_sha_ll_read_digest(void* ptr) { DPORT_INTERRUPT_DISABLE(); @@ -607,7 +633,7 @@ static inline void nerd_sha_ll_read_digest(void* ptr) DPORT_INTERRUPT_RESTORE(); } -static inline void nerd_sha_ll_read_digest_if(void* ptr) +static inline bool nerd_sha_ll_read_digest_if(void* ptr) { DPORT_INTERRUPT_DISABLE(); ((uint32_t*)ptr)[7] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 7 * 4); @@ -615,7 +641,7 @@ static inline void nerd_sha_ll_read_digest_if(void* ptr) //if ( (((uint32_t*)ptr)[7] >> 16) != 0) { DPORT_INTERRUPT_RESTORE(); - return; + return false; } ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 0 * 4); @@ -626,6 +652,7 @@ static inline void nerd_sha_ll_read_digest_if(void* ptr) ((uint32_t*)ptr)[5] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 5 * 4); ((uint32_t*)ptr)[6] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 6 * 4); DPORT_INTERRUPT_RESTORE(); + return true; } static inline void nerd_sha_ll_write_digest(void *digest_state) @@ -659,13 +686,10 @@ void minerWorkerHw(void * task_id) uint8_t interResult[64]; uint8_t hash[32]; uint8_t digest_mid[32]; + uint8_t sha_buffer[64]; uint32_t wdt_counter = 0; - memset(interResult, 0, sizeof(interResult)); - interResult[32] = 0x80; - interResult[62] = 0x01; - interResult[63] = 0x00; while (1) { { @@ -691,8 +715,8 @@ void minerWorkerHw(void * task_id) result->difficulty = job->difficulty; uint8_t job_in_work = job->id & 0xFF; memcpy(digest_mid, job->midstate, sizeof(digest_mid)); + memcpy(sha_buffer, job->buffer_upper, sizeof(sha_buffer)); - uint8_t* sha_buffer = job->buffer_upper; esp_sha_acquire_hardware(); for (uint32_t n = 0; n < job->nonce_count; ++n) { @@ -714,13 +738,14 @@ void minerWorkerHw(void * task_id) //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); //sha_ll_read_digest(SHA2_256, interResult, 256 / 32); - nerd_sha_ll_read_digest(interResult); + //nerd_sha_ll_read_digest(interResult); //sha_hal_hash_block(SHA2_256, interResult, 64/4, true); //sha_hal_wait_idle(); - nerd_sha_hal_wait_idle(); + //nerd_sha_hal_wait_idle(); //sha_ll_fill_text_block(interResult, 64/4); - nerd_sha_ll_fill_text_block_sha256(interResult); + //nerd_sha_ll_fill_text_block_sha256(interResult); + nerd_sha_ll_fill_text_block_sha256_inter(); sha_ll_start_block(SHA2_256); //sha_hal_read_digest(SHA2_256, hash); @@ -728,15 +753,9 @@ void minerWorkerHw(void * task_id) //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); //sha_ll_read_digest(SHA2_256, hash, 256 / 32); - nerd_sha_ll_read_digest_if(hash); - - if (s_working_current_job_id != job_in_work) - { - result->nonce_count = n+1; - break; - } - if(hash[31] == 0 && hash[30] == 0) + if (nerd_sha_ll_read_digest_if(hash)) { + //~5 per second double diff_hash = diff_from_target(hash); if (diff_hash > result->difficulty) { @@ -745,6 +764,13 @@ void minerWorkerHw(void * task_id) memcpy(result->hash, hash, sizeof(hash)); } } + if ( + (uint8_t)(n & 0xFF) == 0 && + s_working_current_job_id != job_in_work) + { + result->nonce_count = n+1; + break; + } } esp_sha_release_hardware(); } else From 15a80381f95203c09012ec86a097e1ebb64f7216 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Mon, 3 Feb 2025 02:08:09 +0200 Subject: [PATCH 19/39] esp32D hardware sha --- platformio.ini | 4 +- src/NerdMinerV2.ino.cpp | 188 ++++++++++++++++++++++-- src/ShaTests/nerdSHA256plus.cpp | 57 +++++++- src/mining.cpp | 251 +++++++++++++++++++++++++++++--- src/mining.h | 4 +- 5 files changed, 463 insertions(+), 41 deletions(-) diff --git a/platformio.ini b/platformio.ini index b8c1fa9..48c3b05 100644 --- a/platformio.ini +++ b/platformio.ini @@ -325,7 +325,7 @@ monitor_filters = time log2file monitor_speed = 115200 -upload_speed = 115200 +upload_speed = 921600 board_build.partitions = huge_app.csv build_flags = -D DEVKITV1=1 @@ -586,7 +586,7 @@ monitor_filters = log2file ;board_build.arduino.memory_type = qio_opi monitor_speed = 115200 -upload_speed = 115200 +upload_speed = 921600 # 2 x 4.5MB app, 6.875MB SPIFFS board_build.partitions = huge_app.csv build_flags = diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index 1bf7f02..33a7b0f 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -56,9 +56,11 @@ const char* ntpServer = "pool.ntp.org"; #ifdef HW_SHA256_TEST +#include #include "ShaTests/nerdSHA256plus.h" #include "mbedtls/sha256.h" #include +#include #include #include #include @@ -96,6 +98,7 @@ static uint8_t interResult_aligned[64] __attribute__((aligned(256))); static uint8_t midstate_aligned[32] __attribute__((aligned(256))); static uint8_t hash_aligned[64] __attribute__((aligned(256))); +#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) static inline void nerd_sha_hal_wait_idle() { while (REG_READ(SHA_BUSY_REG)) @@ -169,6 +172,116 @@ static DRAM_ATTR lldesc_t s_dma_descr_input; static DRAM_ATTR lldesc_t s_dma_descr_buf; static DRAM_ATTR lldesc_t s_dma_descr_inter; +#endif + +#if defined(CONFIG_IDF_TARGET_ESP32) +static inline void nerd_sha_ll_read_digest_swap(void* ptr) +{ + DPORT_INTERRUPT_DISABLE(); + ((uint32_t*)ptr)[0] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 0 * 4)); + ((uint32_t*)ptr)[1] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 1 * 4)); + ((uint32_t*)ptr)[2] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 2 * 4)); + ((uint32_t*)ptr)[3] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 3 * 4)); + ((uint32_t*)ptr)[4] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 4 * 4)); + ((uint32_t*)ptr)[5] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 5 * 4)); + ((uint32_t*)ptr)[6] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 6 * 4)); + ((uint32_t*)ptr)[7] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 7 * 4)); + DPORT_INTERRUPT_RESTORE(); +} + +static inline void nerd_sha_ll_read_digest(void* ptr) +{ + DPORT_INTERRUPT_DISABLE(); + ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 0 * 4); + ((uint32_t*)ptr)[1] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 1 * 4); + ((uint32_t*)ptr)[2] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 2 * 4); + ((uint32_t*)ptr)[3] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 3 * 4); + ((uint32_t*)ptr)[4] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 4 * 4); + ((uint32_t*)ptr)[5] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 5 * 4); + ((uint32_t*)ptr)[6] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 6 * 4); + ((uint32_t*)ptr)[7] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 7 * 4); + DPORT_INTERRUPT_RESTORE(); +} + +static inline void nerd_sha_hal_wait_idle() +{ + while (DPORT_REG_READ(SHA_256_BUSY_REG)) + {} +} + +static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) +{ + uint32_t *data_words = (uint32_t *)input_text; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + + reg_addr_buf[0] = data_words[0]; + reg_addr_buf[1] = data_words[1]; + reg_addr_buf[2] = data_words[2]; + reg_addr_buf[3] = data_words[3]; + reg_addr_buf[4] = data_words[4]; + reg_addr_buf[5] = data_words[5]; + reg_addr_buf[6] = data_words[6]; + reg_addr_buf[7] = data_words[7]; + reg_addr_buf[8] = data_words[8]; + reg_addr_buf[9] = data_words[9]; + reg_addr_buf[10] = data_words[10]; + reg_addr_buf[11] = data_words[11]; + reg_addr_buf[12] = data_words[12]; + reg_addr_buf[13] = data_words[13]; + reg_addr_buf[14] = data_words[14]; + reg_addr_buf[15] = data_words[15]; +} + +static inline void nerd_sha_ll_fill_text_block_sha256_swap(const void *input_text) +{ + uint32_t *data_words = (uint32_t *)input_text; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + + reg_addr_buf[0] = __builtin_bswap32(data_words[0]); + reg_addr_buf[1] = __builtin_bswap32(data_words[1]); + reg_addr_buf[2] = __builtin_bswap32(data_words[2]); + reg_addr_buf[3] = __builtin_bswap32(data_words[3]); + reg_addr_buf[4] = __builtin_bswap32(data_words[4]); + reg_addr_buf[5] = __builtin_bswap32(data_words[5]); + reg_addr_buf[6] = __builtin_bswap32(data_words[6]); + reg_addr_buf[7] = __builtin_bswap32(data_words[7]); + reg_addr_buf[8] = __builtin_bswap32(data_words[8]); + reg_addr_buf[9] = __builtin_bswap32(data_words[9]); + reg_addr_buf[10] = __builtin_bswap32(data_words[10]); + reg_addr_buf[11] = __builtin_bswap32(data_words[11]); + reg_addr_buf[12] = __builtin_bswap32(data_words[12]); + reg_addr_buf[13] = __builtin_bswap32(data_words[13]); + reg_addr_buf[14] = __builtin_bswap32(data_words[14]); + reg_addr_buf[15] = __builtin_bswap32(data_words[15]); +} + +static inline void nerd_sha_ll_fill_text_block_sha256_double(const void *input_text) +{ + uint32_t *data_words = (uint32_t *)input_text; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + +#if 0 + //No change + reg_addr_buf[0] = data_words[0]; + reg_addr_buf[1] = data_words[1]; + reg_addr_buf[2] = data_words[2]; + reg_addr_buf[3] = data_words[3]; + reg_addr_buf[4] = data_words[4]; + reg_addr_buf[5] = data_words[5]; + reg_addr_buf[6] = data_words[6]; + reg_addr_buf[7] = data_words[7]; +#endif + reg_addr_buf[8] = 0x80000000; + reg_addr_buf[9] = 0x00000000; + reg_addr_buf[10] = 0x00000000; + reg_addr_buf[11] = 0x00000000; + reg_addr_buf[12] = 0x00000000; + reg_addr_buf[13] = 0x00000000; + reg_addr_buf[14] = 0x00000000; + reg_addr_buf[15] = 0x00000100; +} +#endif + IRAM_ATTR void HwShaTest() { uint8_t interResult[64]; @@ -190,7 +303,10 @@ IRAM_ATTR void HwShaTest() int test_count = 1000000; #if 0 - //Generic software 16KH/s + //Generic software + //esp32s3 16KH/s + //esp32D 9.5KH/s + test_count = 20000; mbedtls_sha256_context ctx; mbedtls_sha256_init(&ctx); for (int i = 0; i < test_count; ++i) @@ -206,10 +322,13 @@ IRAM_ATTR void HwShaTest() mbedtls_sha256_free(&ctx); #endif -#if 0 - //nerdSha256 (ESP32 39KH/s) +#if 1 + //nerdSha256 + //ESP32 39KH/s + //ESP32S3 39.01KH/s + test_count = 100000; nerdSHA256_context ctx; - nerd_mids(&ctx, s_test_buffer); + nerd_mids(ctx.digest, s_test_buffer); for (int i = 0; i < test_count; ++i) { nerd_sha256d(&ctx, s_test_buffer+64, hash); @@ -244,8 +363,21 @@ IRAM_ATTR void HwShaTest() #endif #if 0 - //Hardware block - //NOT avaliable + //ESP32D 5.50KH/s + test_count = 40000; + //esp_sha_lock_engine(SHA2_256); + for (int i = 0; i < test_count; ++i) + { + esp_sha(SHA2_256, s_test_buffer, 80, interResult); + esp_sha(SHA2_256, interResult, 32, hash); + } + //esp_sha_unlock_engine(SHA2_256); +#endif + +#if 0 + //ESP32D + //Invalid result!! + test_count = 100000; esp_sha_lock_engine(SHA2_256); for (int i = 0; i < test_count; ++i) { @@ -259,17 +391,42 @@ IRAM_ATTR void HwShaTest() #endif #if 0 - //Hardware low level 132KH/s - esp_sha_acquire_hardware(); + //ESP32D Hardware SHA ~200KH/s + test_count = 50000; + periph_module_enable(PERIPH_SHA_MODULE); + uint8_t buffer_swap[128]; + for (int i = 0; i < 32; ++i) + ((uint32_t*)buffer_swap)[i] = __builtin_bswap32(((const uint32_t*)s_test_buffer)[i]); + + uint8_t inter_swap[64]; + for (int i = 0; i < 16; ++i) + ((uint32_t*)inter_swap)[i] = __builtin_bswap32(((const uint32_t*)interResult)[i]); + for (int i = 0; i < test_count; ++i) { - sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); - sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); - sha_hal_read_digest(SHA2_256, interResult); - sha_hal_hash_block(SHA2_256, interResult, 64/4, true); - sha_hal_read_digest(SHA2_256, hash); + //sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); + nerd_sha_hal_wait_idle(); + nerd_sha_ll_fill_text_block_sha256(buffer_swap); + sha_ll_start_block(SHA2_256); + + //sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); + nerd_sha_hal_wait_idle(); + nerd_sha_ll_fill_text_block_sha256(buffer_swap+64); + sha_ll_continue_block(SHA2_256); + + nerd_sha_hal_wait_idle(); + sha_ll_load(SHA2_256); + //nerd_sha_ll_read_digest_swap(interResult); + + //sha_hal_hash_block(SHA2_256, interResult, 64/4, true); + nerd_sha_hal_wait_idle(); + nerd_sha_ll_fill_text_block_sha256_double(inter_swap); + sha_ll_start_block(SHA2_256); + + nerd_sha_hal_wait_idle(); + sha_ll_load(SHA2_256); + nerd_sha_ll_read_digest_swap(hash); } - esp_sha_release_hardware(); #endif #if 0 @@ -305,7 +462,7 @@ IRAM_ATTR void HwShaTest() memcpy(hash, hash_aligned, sizeof(hash_aligned)); #endif -#if 1 +#if 0 //Hardware LL 162.43KH/s esp_sha_acquire_hardware(); //sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); @@ -423,6 +580,7 @@ IRAM_ATTR void HwShaTest() Serial.println(""); //should be + //54cd9f1ebc3db9a626688e5bb91d808abbd4079b2cba7f43fa08bfced300ef19 //6fa464b007f2d577edfa5dfe9dfc3f9209f36d1a6711d314ea68ccdd03000000 } diff --git a/src/ShaTests/nerdSHA256plus.cpp b/src/ShaTests/nerdSHA256plus.cpp index 5e14e08..d82d7dc 100644 --- a/src/ShaTests/nerdSHA256plus.cpp +++ b/src/ShaTests/nerdSHA256plus.cpp @@ -212,8 +212,19 @@ IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, const uint8_t* dataIn, uint32_t* buffer32; //*********** Init 1rst SHA *********** - uint32_t W[64] = { GET_UINT32_BE(dataIn, 0), GET_UINT32_BE(dataIn, 4), - GET_UINT32_BE(dataIn, 8), GET_UINT32_BE(dataIn, 12), 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, + uint32_t W[64] = { +#if 0 + __builtin_bswap32(((const uint32_t*)dataIn)[0]), + __builtin_bswap32(((const uint32_t*)dataIn)[1]), + __builtin_bswap32(((const uint32_t*)dataIn)[2]), + __builtin_bswap32(((const uint32_t*)dataIn)[3]), +#else + GET_UINT32_BE(dataIn, 0), + GET_UINT32_BE(dataIn, 4), + GET_UINT32_BE(dataIn, 8), + GET_UINT32_BE(dataIn, 12), +#endif + 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 640}; uint32_t A[8] = { midstate->digest[0], midstate->digest[1], midstate->digest[2], midstate->digest[3], @@ -392,9 +403,9 @@ IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, const uint8_t* dataIn, P(A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(61), K[61]); P(A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(62), K[62]); P(A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(63), K[63]); - - PUT_UINT32_BE(0x5BE0CD19 + A[7], doubleHash, 28); - //if(doubleHash[31] !=0 || doubleHash[30] !=0) return false; + +#if 1 + //Best performance PUT_UINT32_BE(0x6A09E667 + A[0], doubleHash, 0); PUT_UINT32_BE(0xBB67AE85 + A[1], doubleHash, 4); PUT_UINT32_BE(0x3C6EF372 + A[2], doubleHash, 8); @@ -402,6 +413,42 @@ IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, const uint8_t* dataIn, PUT_UINT32_BE(0x510E527F + A[4], doubleHash, 16); PUT_UINT32_BE(0x9B05688C + A[5], doubleHash, 20); PUT_UINT32_BE(0x1F83D9AB + A[6], doubleHash, 24); + PUT_UINT32_BE(0x5BE0CD19 + A[7], doubleHash, 28); +#endif + +#if 0 + temp1 = 0x6A09E667 + A[0]; ((uint32_t*)doubleHash)[0] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0xBB67AE85 + A[1]; ((uint32_t*)doubleHash)[1] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0x3C6EF372 + A[2]; ((uint32_t*)doubleHash)[2] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0xA54FF53A + A[3]; ((uint32_t*)doubleHash)[3] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0x510E527F + A[4]; ((uint32_t*)doubleHash)[4] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0x9B05688C + A[5]; ((uint32_t*)doubleHash)[5] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0x1F83D9AB + A[6]; ((uint32_t*)doubleHash)[6] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); + temp1 = 0x5BE0CD19 + A[7]; ((uint32_t*)doubleHash)[7] = (temp1 << 24) | ((temp1 << 8) & 0x00FF0000) | ((temp1 >> 8) & 0x0000FF00) | (temp1 >> 24); +#endif + +#if 0 + temp1 = 0x6A09E667 + A[0]; ((uint32_t*)doubleHash)[0] = __builtin_bswap32(temp1); + temp1 = 0xBB67AE85 + A[1]; ((uint32_t*)doubleHash)[1] = __builtin_bswap32(temp1); + temp1 = 0x3C6EF372 + A[2]; ((uint32_t*)doubleHash)[2] = __builtin_bswap32(temp1); + temp1 = 0xA54FF53A + A[3]; ((uint32_t*)doubleHash)[3] = __builtin_bswap32(temp1); + temp1 = 0x510E527F + A[4]; ((uint32_t*)doubleHash)[4] = __builtin_bswap32(temp1); + temp1 = 0x9B05688C + A[5]; ((uint32_t*)doubleHash)[5] = __builtin_bswap32(temp1); + temp1 = 0x1F83D9AB + A[6]; ((uint32_t*)doubleHash)[6] = __builtin_bswap32(temp1); + temp1 = 0x5BE0CD19 + A[7]; ((uint32_t*)doubleHash)[7] = __builtin_bswap32(temp1); +#endif + + +#if 0 + ((uint32_t*)doubleHash)[0] = __builtin_bswap32( (0x6A09E667 + A[0]) ); + ((uint32_t*)doubleHash)[1] = __builtin_bswap32( (0xBB67AE85 + A[1]) ); + ((uint32_t*)doubleHash)[2] = __builtin_bswap32( (0x3C6EF372 + A[2]) ); + ((uint32_t*)doubleHash)[3] = __builtin_bswap32( (0xA54FF53A + A[3]) ); + ((uint32_t*)doubleHash)[4] = __builtin_bswap32( (0x510E527F + A[4]) ); + ((uint32_t*)doubleHash)[5] = __builtin_bswap32( (0x9B05688C + A[5]) ); + ((uint32_t*)doubleHash)[6] = __builtin_bswap32( (0x1F83D9AB + A[6]) ); + ((uint32_t*)doubleHash)[7] = __builtin_bswap32( (0x5BE0CD19 + A[7]) ); +#endif return true; } diff --git a/src/mining.cpp b/src/mining.cpp index 5ac4428..355acfa 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -30,6 +30,11 @@ #include #include #include + +#if defined(CONFIG_IDF_TARGET_ESP32) +#include +#endif + #endif nvs_handle_t stat_handle; @@ -138,7 +143,7 @@ struct JobRequest uint32_t nonce_start; uint32_t nonce_count; double difficulty; - uint8_t buffer_upper[64]; + uint8_t sha_buffer[128]; uint32_t midstate[8]; uint32_t bake[16]; }; @@ -161,14 +166,14 @@ std::list> s_job_result_list; static volatile uint8_t s_working_current_job_id = 0xFF; static void JobPush(std::list> &job_list, uint32_t id, uint32_t nonce_start, uint32_t nonce_count, double difficulty, - const uint8_t* buffer_upper, const uint32_t* midstate, const uint32_t* bake) + const uint8_t* sha_buffer, const uint32_t* midstate, const uint32_t* bake) { std::shared_ptr job = std::make_shared(); job->id = id; job->nonce_start = nonce_start; job->nonce_count = nonce_count; job->difficulty = difficulty; - memcpy(job->buffer_upper, buffer_upper, sizeof(job->buffer_upper)); + memcpy(job->sha_buffer, sha_buffer, sizeof(job->sha_buffer)); memcpy(job->midstate, midstate, sizeof(job->midstate)); memcpy(job->bake, bake, sizeof(job->bake)); job_list.push_back(job); @@ -342,10 +347,12 @@ void runStratumWorker(void *name) { nerd_sha256_bake(diget_mid, mMiner.bytearray_blockheader+64, bake); #ifdef HARDWARE_SHA265 - esp_sha_acquire_hardware(); - sha_hal_hash_block(SHA2_256, mMiner.bytearray_blockheader, 64/4, true); - sha_hal_read_digest(SHA2_256, hw_midstate); - esp_sha_release_hardware(); + #if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) + esp_sha_acquire_hardware(); + sha_hal_hash_block(SHA2_256, mMiner.bytearray_blockheader, 64/4, true); + sha_hal_read_digest(SHA2_256, hw_midstate); + esp_sha_release_hardware(); + #endif #endif nonce_pool = 0x10000000; @@ -354,10 +361,10 @@ void runStratumWorker(void *name) { std::lock_guard lock(s_job_mutex); for (int i = 0; i < 4; ++ i) { - JobPush( s_job_request_list_sw, job_pool, nonce_pool, NONCE_PER_JOB_SW, currentPoolDifficulty, mMiner.bytearray_blockheader+64, diget_mid, bake); + JobPush( s_job_request_list_sw, job_pool, nonce_pool, NONCE_PER_JOB_SW, currentPoolDifficulty, mMiner.bytearray_blockheader, diget_mid, bake); nonce_pool += NONCE_PER_JOB_SW; #ifdef HARDWARE_SHA265 - JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, mMiner.bytearray_blockheader+64, hw_midstate, bake); + JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, mMiner.bytearray_blockheader, hw_midstate, bake); nonce_pool += NONCE_PER_JOB_HW; #endif } @@ -452,14 +459,14 @@ void runStratumWorker(void *name) { while (s_job_request_list_sw.size() < 4) { - JobPush( s_job_request_list_sw, job_pool, nonce_pool, NONCE_PER_JOB_SW, currentPoolDifficulty, mMiner.bytearray_blockheader+64, diget_mid, bake); + JobPush( s_job_request_list_sw, job_pool, nonce_pool, NONCE_PER_JOB_SW, currentPoolDifficulty, mMiner.bytearray_blockheader, diget_mid, bake); nonce_pool += NONCE_PER_JOB_SW; } #ifdef HARDWARE_SHA265 while (s_job_request_list_hw.size() < 4) { - JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, mMiner.bytearray_blockheader+64, hw_midstate, bake); + JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, mMiner.bytearray_blockheader, hw_midstate, bake); nonce_pool += NONCE_PER_JOB_HW; } #endif @@ -537,8 +544,8 @@ void minerWorkerSw(void * task_id) uint8_t job_in_work = job->id & 0xFF; for (uint32_t n = 0; n < job->nonce_count; ++n) { - ((uint32_t*)(job->buffer_upper+12))[0] = job->nonce_start+n; - nerd_sha256d_baked(job->midstate, job->buffer_upper, job->bake, hash); + ((uint32_t*)(job->sha_buffer+64+12))[0] = job->nonce_start+n; + nerd_sha256d_baked(job->midstate, job->sha_buffer+64, job->bake, hash); if (s_working_current_job_id != job_in_work) { @@ -571,6 +578,8 @@ void minerWorkerSw(void * task_id) #ifdef HARDWARE_SHA265 +#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) + static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) { uint32_t *data_words = (uint32_t *)input_text; @@ -636,14 +645,14 @@ static inline void nerd_sha_ll_read_digest(void* ptr) static inline bool nerd_sha_ll_read_digest_if(void* ptr) { DPORT_INTERRUPT_DISABLE(); - ((uint32_t*)ptr)[7] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 7 * 4); - if ( ((uint16_t*)ptr)[15] != 0) - //if ( (((uint32_t*)ptr)[7] >> 16) != 0) + uint32_t last = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 7 * 4); + if ( last >> 16 != 0) { DPORT_INTERRUPT_RESTORE(); return false; } + ((uint32_t*)ptr)[7] = last; ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 0 * 4); ((uint32_t*)ptr)[1] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 1 * 4); ((uint32_t*)ptr)[2] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 2 * 4); @@ -715,7 +724,7 @@ void minerWorkerHw(void * task_id) result->difficulty = job->difficulty; uint8_t job_in_work = job->id & 0xFF; memcpy(digest_mid, job->midstate, sizeof(digest_mid)); - memcpy(sha_buffer, job->buffer_upper, sizeof(sha_buffer)); + memcpy(sha_buffer, job->sha_buffer+64, sizeof(sha_buffer)); esp_sha_acquire_hardware(); for (uint32_t n = 0; n < job->nonce_count; ++n) @@ -784,7 +793,215 @@ void minerWorkerHw(void * task_id) } } } + +#endif //#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) + +#if defined(CONFIG_IDF_TARGET_ESP32) + +static inline bool nerd_sha_ll_read_digest_swap_if(void* ptr) +{ + DPORT_INTERRUPT_DISABLE(); + uint32_t fin = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 7 * 4); + if ( (uint32_t)(fin & 0xFFFF) != 0) + { + DPORT_INTERRUPT_RESTORE(); + return false; + } + ((uint32_t*)ptr)[7] = __builtin_bswap32(fin); + ((uint32_t*)ptr)[0] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 0 * 4)); + ((uint32_t*)ptr)[1] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 1 * 4)); + ((uint32_t*)ptr)[2] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 2 * 4)); + ((uint32_t*)ptr)[3] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 3 * 4)); + ((uint32_t*)ptr)[4] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 4 * 4)); + ((uint32_t*)ptr)[5] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 5 * 4)); + ((uint32_t*)ptr)[6] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 6 * 4)); + DPORT_INTERRUPT_RESTORE(); + return true; +} + +static inline void nerd_sha_ll_read_digest(void* ptr) +{ + DPORT_INTERRUPT_DISABLE(); + ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 0 * 4); + ((uint32_t*)ptr)[1] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 1 * 4); + ((uint32_t*)ptr)[2] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 2 * 4); + ((uint32_t*)ptr)[3] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 3 * 4); + ((uint32_t*)ptr)[4] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 4 * 4); + ((uint32_t*)ptr)[5] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 5 * 4); + ((uint32_t*)ptr)[6] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 6 * 4); + ((uint32_t*)ptr)[7] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 7 * 4); + DPORT_INTERRUPT_RESTORE(); +} + +static inline void nerd_sha_hal_wait_idle() +{ + while (DPORT_REG_READ(SHA_256_BUSY_REG)) + {} +} + +static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) +{ + uint32_t *data_words = (uint32_t *)input_text; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + + reg_addr_buf[0] = data_words[0]; + reg_addr_buf[1] = data_words[1]; + reg_addr_buf[2] = data_words[2]; + reg_addr_buf[3] = data_words[3]; + reg_addr_buf[4] = data_words[4]; + reg_addr_buf[5] = data_words[5]; + reg_addr_buf[6] = data_words[6]; + reg_addr_buf[7] = data_words[7]; + reg_addr_buf[8] = data_words[8]; + reg_addr_buf[9] = data_words[9]; + reg_addr_buf[10] = data_words[10]; + reg_addr_buf[11] = data_words[11]; + reg_addr_buf[12] = data_words[12]; + reg_addr_buf[13] = data_words[13]; + reg_addr_buf[14] = data_words[14]; + reg_addr_buf[15] = data_words[15]; +} + +static inline void nerd_sha_ll_fill_text_block_sha256_swap(const void *input_text) +{ + uint32_t *data_words = (uint32_t *)input_text; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + + reg_addr_buf[0] = __builtin_bswap32(data_words[0]); + reg_addr_buf[1] = __builtin_bswap32(data_words[1]); + reg_addr_buf[2] = __builtin_bswap32(data_words[2]); + reg_addr_buf[3] = __builtin_bswap32(data_words[3]); + reg_addr_buf[4] = __builtin_bswap32(data_words[4]); + reg_addr_buf[5] = __builtin_bswap32(data_words[5]); + reg_addr_buf[6] = __builtin_bswap32(data_words[6]); + reg_addr_buf[7] = __builtin_bswap32(data_words[7]); + reg_addr_buf[8] = __builtin_bswap32(data_words[8]); + reg_addr_buf[9] = __builtin_bswap32(data_words[9]); + reg_addr_buf[10] = __builtin_bswap32(data_words[10]); + reg_addr_buf[11] = __builtin_bswap32(data_words[11]); + reg_addr_buf[12] = __builtin_bswap32(data_words[12]); + reg_addr_buf[13] = __builtin_bswap32(data_words[13]); + reg_addr_buf[14] = __builtin_bswap32(data_words[14]); + reg_addr_buf[15] = __builtin_bswap32(data_words[15]); +} + +static inline void nerd_sha_ll_fill_text_block_sha256_double() +{ + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + +#if 0 + //No change + reg_addr_buf[0] = data_words[0]; + reg_addr_buf[1] = data_words[1]; + reg_addr_buf[2] = data_words[2]; + reg_addr_buf[3] = data_words[3]; + reg_addr_buf[4] = data_words[4]; + reg_addr_buf[5] = data_words[5]; + reg_addr_buf[6] = data_words[6]; + reg_addr_buf[7] = data_words[7]; #endif + reg_addr_buf[8] = 0x80000000; + reg_addr_buf[9] = 0x00000000; + reg_addr_buf[10] = 0x00000000; + reg_addr_buf[11] = 0x00000000; + reg_addr_buf[12] = 0x00000000; + reg_addr_buf[13] = 0x00000000; + reg_addr_buf[14] = 0x00000000; + reg_addr_buf[15] = 0x00000100; +} + +void minerWorkerHw(void * task_id) +{ + unsigned int miner_id = (uint32_t)task_id; + Serial.printf("[MINER] %d Started minerWorkerHwEsp32D Task!\n", miner_id); + + std::shared_ptr job; + std::shared_ptr result; + uint8_t hash[32]; + uint8_t sha_buffer[128]; + + while (1) + { + { + std::lock_guard lock(s_job_mutex); + if (result) + { + s_job_result_list.push_back(result); + result.reset(); + } + if (!s_job_request_list_hw.empty()) + { + job = s_job_request_list_hw.front(); + s_job_request_list_hw.pop_front(); + } else + job.reset(); + } + if (job) + { + result = std::make_shared(); + result->id = job->id; + result->nonce = 0xFFFFFFFF; + result->nonce_count = job->nonce_count; + result->difficulty = job->difficulty; + uint8_t job_in_work = job->id & 0xFF; + for (int i = 0; i < 32; ++i) + ((uint32_t*)sha_buffer)[i] = __builtin_bswap32(((const uint32_t*)(job->sha_buffer))[i]); + + esp_sha_lock_engine(SHA2_256); + for (uint32_t n = 0; n < job->nonce_count; ++n) + { + ((uint32_t*)(sha_buffer+64+12))[0] = __builtin_bswap32(job->nonce_start+n); + + //sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); + nerd_sha_hal_wait_idle(); + nerd_sha_ll_fill_text_block_sha256(sha_buffer); + sha_ll_start_block(SHA2_256); + + //sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); + nerd_sha_hal_wait_idle(); + nerd_sha_ll_fill_text_block_sha256(sha_buffer+64); + sha_ll_continue_block(SHA2_256); + + nerd_sha_hal_wait_idle(); + sha_ll_load(SHA2_256); + + //sha_hal_hash_block(SHA2_256, interResult, 64/4, true); + nerd_sha_hal_wait_idle(); + nerd_sha_ll_fill_text_block_sha256_double(); + sha_ll_start_block(SHA2_256); + + nerd_sha_hal_wait_idle(); + sha_ll_load(SHA2_256); + if (nerd_sha_ll_read_digest_swap_if(hash)) + { + //~5 per second + double diff_hash = diff_from_target(hash); + if (diff_hash > result->difficulty) + { + result->difficulty = diff_hash; + result->nonce = job->nonce_start+n; + memcpy(result->hash, hash, sizeof(hash)); + } + } + if ( + (uint8_t)(n & 0xFF) == 0 && + s_working_current_job_id != job_in_work) + { + result->nonce_count = n+1; + break; + } + } + esp_sha_unlock_engine(SHA2_256); + } else + vTaskDelay(2 / portTICK_PERIOD_MS); + + esp_task_wdt_reset(); + } +} + +#endif //CONFIG_IDF_TARGET_ESP32 + +#endif //HARDWARE_SHA265 #define DELAY 100 diff --git a/src/mining.h b/src/mining.h index 7e424c1..86c010f 100644 --- a/src/mining.h +++ b/src/mining.h @@ -10,9 +10,9 @@ #define KEEPALIVE_TIME_ms 30000 #define POOLINACTIVITY_TIME_ms 60000 -#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) +//#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) #define HARDWARE_SHA265 -#endif +//#endif #define TARGET_BUFFER_SIZE 64 From 187906c76a48308d3de7b11ea73d32c2ebc00966 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Mon, 3 Feb 2025 10:56:53 +0200 Subject: [PATCH 20/39] esp32d speed-up --- src/mining.cpp | 53 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/src/mining.cpp b/src/mining.cpp index 355acfa..c2bd971 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -862,27 +862,42 @@ static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) reg_addr_buf[15] = data_words[15]; } -static inline void nerd_sha_ll_fill_text_block_sha256_swap(const void *input_text) +static inline void nerd_sha_ll_fill_text_block_sha256_upper(const void *input_text, uint32_t nonce) { uint32_t *data_words = (uint32_t *)input_text; uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); - reg_addr_buf[0] = __builtin_bswap32(data_words[0]); - reg_addr_buf[1] = __builtin_bswap32(data_words[1]); - reg_addr_buf[2] = __builtin_bswap32(data_words[2]); - reg_addr_buf[3] = __builtin_bswap32(data_words[3]); - reg_addr_buf[4] = __builtin_bswap32(data_words[4]); - reg_addr_buf[5] = __builtin_bswap32(data_words[5]); - reg_addr_buf[6] = __builtin_bswap32(data_words[6]); - reg_addr_buf[7] = __builtin_bswap32(data_words[7]); - reg_addr_buf[8] = __builtin_bswap32(data_words[8]); - reg_addr_buf[9] = __builtin_bswap32(data_words[9]); - reg_addr_buf[10] = __builtin_bswap32(data_words[10]); - reg_addr_buf[11] = __builtin_bswap32(data_words[11]); - reg_addr_buf[12] = __builtin_bswap32(data_words[12]); - reg_addr_buf[13] = __builtin_bswap32(data_words[13]); - reg_addr_buf[14] = __builtin_bswap32(data_words[14]); - reg_addr_buf[15] = __builtin_bswap32(data_words[15]); + reg_addr_buf[0] = data_words[0]; + reg_addr_buf[1] = data_words[1]; + reg_addr_buf[2] = data_words[2]; + reg_addr_buf[3] = __builtin_bswap32(nonce); +#if 1 + reg_addr_buf[4] = 0x80000000; + reg_addr_buf[5] = 0x00000000; + reg_addr_buf[6] = 0x00000000; + reg_addr_buf[7] = 0x00000000; + reg_addr_buf[8] = 0x00000000; + reg_addr_buf[9] = 0x00000000; + reg_addr_buf[10] = 0x00000000; + reg_addr_buf[11] = 0x00000000; + reg_addr_buf[12] = 0x00000000; + reg_addr_buf[13] = 0x00000000; + reg_addr_buf[14] = 0x00000000; + reg_addr_buf[15] = 0x00000280; +#else + reg_addr_buf[4] = data_words[4]; + reg_addr_buf[5] = data_words[5]; + reg_addr_buf[6] = data_words[6]; + reg_addr_buf[7] = data_words[7]; + reg_addr_buf[8] = data_words[8]; + reg_addr_buf[9] = data_words[9]; + reg_addr_buf[10] = data_words[10]; + reg_addr_buf[11] = data_words[11]; + reg_addr_buf[12] = data_words[12]; + reg_addr_buf[13] = data_words[13]; + reg_addr_buf[14] = data_words[14]; + reg_addr_buf[15] = data_words[15]; +#endif } static inline void nerd_sha_ll_fill_text_block_sha256_double() @@ -950,7 +965,7 @@ void minerWorkerHw(void * task_id) esp_sha_lock_engine(SHA2_256); for (uint32_t n = 0; n < job->nonce_count; ++n) { - ((uint32_t*)(sha_buffer+64+12))[0] = __builtin_bswap32(job->nonce_start+n); + //((uint32_t*)(sha_buffer+64+12))[0] = __builtin_bswap32(job->nonce_start+n); //sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); nerd_sha_hal_wait_idle(); @@ -959,7 +974,7 @@ void minerWorkerHw(void * task_id) //sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); nerd_sha_hal_wait_idle(); - nerd_sha_ll_fill_text_block_sha256(sha_buffer+64); + nerd_sha_ll_fill_text_block_sha256_upper(sha_buffer+64, job->nonce_start+n); sha_ll_continue_block(SHA2_256); nerd_sha_hal_wait_idle(); From d0805162ef91f3ea8c7d653c5b88942d421c8a25 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Tue, 4 Feb 2025 12:56:07 +0200 Subject: [PATCH 21/39] upTime fix; esp32s3 tiny optimization (+2KH/s) --- src/mining.cpp | 46 ++++++++++++++++++++++++++++++++++++---------- src/monitor.cpp | 23 ++++++++++++++++++----- 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/src/mining.cpp b/src/mining.cpp index c2bd971..cb8e212 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -580,7 +580,7 @@ void minerWorkerSw(void * task_id) #if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) -static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) +static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text, uint32_t nonce) { uint32_t *data_words = (uint32_t *)input_text; uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); @@ -588,7 +588,8 @@ static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) REG_WRITE(®_addr_buf[0], data_words[0]); REG_WRITE(®_addr_buf[1], data_words[1]); REG_WRITE(®_addr_buf[2], data_words[2]); - REG_WRITE(®_addr_buf[3], data_words[3]); +#if 0 + REG_WRITE(®_addr_buf[3], data_words[3]); REG_WRITE(®_addr_buf[4], data_words[4]); REG_WRITE(®_addr_buf[5], data_words[5]); REG_WRITE(®_addr_buf[6], data_words[6]); @@ -601,6 +602,21 @@ static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) REG_WRITE(®_addr_buf[13], data_words[13]); REG_WRITE(®_addr_buf[14], data_words[14]); REG_WRITE(®_addr_buf[15], data_words[15]); +#else + REG_WRITE(®_addr_buf[3], nonce); + REG_WRITE(®_addr_buf[4], 0x00000080); + REG_WRITE(®_addr_buf[5], 0x00000000); + REG_WRITE(®_addr_buf[6], 0x00000000); + REG_WRITE(®_addr_buf[7], 0x00000000); + REG_WRITE(®_addr_buf[8], 0x00000000); + REG_WRITE(®_addr_buf[9], 0x00000000); + REG_WRITE(®_addr_buf[10], 0x00000000); + REG_WRITE(®_addr_buf[11], 0x00000000); + REG_WRITE(®_addr_buf[12], 0x00000000); + REG_WRITE(®_addr_buf[13], 0x00000000); + REG_WRITE(®_addr_buf[14], 0x00000000); + REG_WRITE(®_addr_buf[15], 0x80020000); +#endif } static inline void nerd_sha_ll_fill_text_block_sha256_inter() @@ -729,7 +745,7 @@ void minerWorkerHw(void * task_id) esp_sha_acquire_hardware(); for (uint32_t n = 0; n < job->nonce_count; ++n) { - ((uint32_t*)(sha_buffer+12))[0] = job->nonce_start+n; + //((uint32_t*)(sha_buffer+12))[0] = job->nonce_start+n; //sha_hal_write_digest(SHA2_256, midstate); nerd_sha_ll_write_digest(digest_mid); @@ -739,7 +755,7 @@ void minerWorkerHw(void * task_id) //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); //sha_ll_fill_text_block(s_test_buffer+64, 64/4); - nerd_sha_ll_fill_text_block_sha256(sha_buffer); + nerd_sha_ll_fill_text_block_sha256(sha_buffer, job->nonce_start+n); sha_ll_continue_block(SHA2_256); //sha_hal_read_digest(SHA2_256, interResult); @@ -1073,8 +1089,7 @@ void saveStat() { nvs_set_u32(stat_handle, "shares", shares); nvs_set_u32(stat_handle, "valids", valids); nvs_set_u32(stat_handle, "templates", templates); - uint64_t upTime_now = upTime + (esp_timer_get_time()/1000000); - nvs_set_u64(stat_handle, "upTime", upTime_now); + nvs_set_u64(stat_handle, "upTime", upTime); uint32_t crc = crc32_reset(); crc = crc32_add(crc, &best_diff, sizeof(best_diff)); @@ -1084,7 +1099,7 @@ void saveStat() { crc = crc32_add(crc, &nv_shares, sizeof(nv_shares)); crc = crc32_add(crc, &nv_valids, sizeof(nv_valids)); crc = crc32_add(crc, &templates, sizeof(templates)); - crc = crc32_add(crc, &upTime_now, sizeof(upTime_now)); + crc = crc32_add(crc, &upTime, sizeof(upTime)); crc = crc32_finish(crc); nvs_set_u32(stat_handle, "crc32", crc); } @@ -1112,19 +1127,30 @@ void runMonitor(void *name) totalKHashes = (Mhashes * 1000) + hashes / 1000; uint32_t last_update_millis = millis(); + uint32_t uptime_frac = 0; while (1) { uint32_t now_millis = millis(); - if (now_millis < last_update_millis || now_millis >= last_update_millis + 990) - { - unsigned long mElapsed = now_millis - mLastCheck; + if (now_millis < last_update_millis) + now_millis = last_update_millis; + + uint32_t mElapsed = now_millis - mLastCheck; + if (mElapsed >= 1000) + { mLastCheck = now_millis; last_update_millis = now_millis; unsigned long currentKHashes = (Mhashes * 1000) + hashes / 1000; elapsedKHs = currentKHashes - totalKHashes; totalKHashes = currentKHashes; + uptime_frac += mElapsed; + while (uptime_frac >= 1000) + { + uptime_frac -= 1000; + upTime ++; + } + drawCurrentScreen(mElapsed); // Monitor state when hashrate is 0.0 diff --git a/src/monitor.cpp b/src/monitor.cpp index de62a76..6d3bc96 100644 --- a/src/monitor.cpp +++ b/src/monitor.cpp @@ -249,6 +249,7 @@ static double s_top_hashrate = 0.0; static std::list s_hashrate_avg_list; static double s_hashrate_summ = 0.0; +static uint8_t s_hashrate_recalc = 0; String getCurrentHashRate(unsigned long mElapsed) { @@ -262,7 +263,17 @@ String getCurrentHashRate(unsigned long mElapsed) s_hashrate_avg_list.pop_front(); } + ++s_hashrate_recalc; + if (s_hashrate_recalc == 0) + { + s_hashrate_summ = 0.0; + for (auto itt = s_hashrate_avg_list.begin(); itt != s_hashrate_avg_list.end(); ++itt) + s_hashrate_summ += *itt; + } + double avg_hashrate = s_hashrate_summ / (double)s_hashrate_avg_list.size(); + if (avg_hashrate < 0.0) + avg_hashrate = 0.0; if (s_skip_first > 0) { @@ -298,11 +309,13 @@ mining_data getMiningData(unsigned long mElapsed) suffix_string(best_diff, best_diff_string, 16, 0); char timeMining[15] = {0}; - uint64_t secElapsed = upTime + (esp_timer_get_time() / 1000000); - int days = secElapsed / 86400; - int hours = (secElapsed - (days * 86400)) / 3600; // Number of seconds in an hour - int mins = (secElapsed - (days * 86400) - (hours * 3600)) / 60; // Remove the number of hours and calculate the minutes. - int secs = secElapsed - (days * 86400) - (hours * 3600) - (mins * 60); + uint64_t tm = upTime; + int secs = tm % 60; + tm /= 60; + int mins = tm % 60; + tm /= 60; + int hours = tm % 24; + int days = tm / 24; sprintf(timeMining, "%01d %02d:%02d:%02d", days, hours, mins, secs); data.completedShares = shares; From 3b39f2ad3f6ef2494526aa2e94046987893c7c6c Mon Sep 17 00:00:00 2001 From: evgenykz Date: Tue, 4 Feb 2025 17:26:46 +0200 Subject: [PATCH 22/39] decrease transfer size; fixed network loop; fixed esp32s3 sha? Need to perform long test --- src/mining.cpp | 49 +++++++++++++++++++++++++------------------------ src/stratum.cpp | 4 ++-- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/mining.cpp b/src/mining.cpp index cb8e212..d80c52a 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -480,6 +480,8 @@ void runStratumWorker(void *name) { hashes += res->nonce_count; if (res->difficulty > currentPoolDifficulty && job_pool == res->id && res->nonce != 0xFFFFFFFF) { + if (!client.connected()) + break; unsigned long sumbit_id = 0; tx_mining_submit(client, mWorker, mJob, res->nonce, sumbit_id); Serial.print(" - Current diff share: "); Serial.println(res->difficulty,12); @@ -589,7 +591,8 @@ static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text, ui REG_WRITE(®_addr_buf[1], data_words[1]); REG_WRITE(®_addr_buf[2], data_words[2]); #if 0 - REG_WRITE(®_addr_buf[3], data_words[3]); + REG_WRITE(®_addr_buf[3], nonce); + //REG_WRITE(®_addr_buf[3], data_words[3]); REG_WRITE(®_addr_buf[4], data_words[4]); REG_WRITE(®_addr_buf[5], data_words[5]); REG_WRITE(®_addr_buf[6], data_words[6]); @@ -662,7 +665,7 @@ static inline bool nerd_sha_ll_read_digest_if(void* ptr) { DPORT_INTERRUPT_DISABLE(); uint32_t last = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 7 * 4); - if ( last >> 16 != 0) + if ( (uint16_t)(last >> 16) != 0) { DPORT_INTERRUPT_RESTORE(); return false; @@ -745,41 +748,39 @@ void minerWorkerHw(void * task_id) esp_sha_acquire_hardware(); for (uint32_t n = 0; n < job->nonce_count; ++n) { - //((uint32_t*)(sha_buffer+12))[0] = job->nonce_start+n; - - //sha_hal_write_digest(SHA2_256, midstate); + //nerd_sha_hal_wait_idle(); nerd_sha_ll_write_digest(digest_mid); - //nerd_sha_ll_write_digest_sha256(midstate); - - //sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); - //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); - //sha_ll_fill_text_block(s_test_buffer+64, 64/4); nerd_sha_ll_fill_text_block_sha256(sha_buffer, job->nonce_start+n); sha_ll_continue_block(SHA2_256); - //sha_hal_read_digest(SHA2_256, interResult); sha_ll_load(SHA2_256); - //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); - //sha_ll_read_digest(SHA2_256, interResult, 256 / 32); - //nerd_sha_ll_read_digest(interResult); - - //sha_hal_hash_block(SHA2_256, interResult, 64/4, true); - //sha_hal_wait_idle(); - //nerd_sha_hal_wait_idle(); - //sha_ll_fill_text_block(interResult, 64/4); - //nerd_sha_ll_fill_text_block_sha256(interResult); nerd_sha_ll_fill_text_block_sha256_inter(); sha_ll_start_block(SHA2_256); - - //sha_hal_read_digest(SHA2_256, hash); sha_ll_load(SHA2_256); - //sha_hal_wait_idle(); nerd_sha_hal_wait_idle(); - //sha_ll_read_digest(SHA2_256, hash, 256 / 32); if (nerd_sha_ll_read_digest_if(hash)) { + //Serial.printf("Hw 16bit Share, nonce=0x%X\n", job->nonce_start+n); +#if 0 + //Validation + ((uint32_t*)(job->sha_buffer+64+12))[0] = job->nonce_start+n; + uint8_t doubleHash[32]; + uint32_t diget_mid[8]; + uint32_t bake[16]; + nerd_mids(diget_mid, job->sha_buffer); + nerd_sha256_bake(diget_mid, job->sha_buffer+64, bake); + nerd_sha256d_baked(diget_mid, job->sha_buffer+64, bake, doubleHash); + for (int i = 0; i < 32; ++i) + { + if (hash[i] != doubleHash[i]) + { + Serial.println("***HW sha256 esp32s3 bug detected***"); + break; + } + } +#endif //~5 per second double diff_hash = diff_from_target(hash); if (diff_hash > result->difficulty) diff --git a/src/stratum.cpp b/src/stratum.cpp index 7310b9b..49c2422 100644 --- a/src/stratum.cpp +++ b/src/stratum.cpp @@ -212,7 +212,7 @@ bool tx_mining_submit(WiFiClient& client, mining_subscribe mWorker, mining_job m // Submit id = getNextId(id); submit_id = id; - sprintf(payload, "{\"id\": %u, \"method\": \"mining.submit\", \"params\": [\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"]}\n", + sprintf(payload, "{\"id\":%u,\"method\":\"mining.submit\",\"params\":[\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"]}\n", id, mWorker.wName,//"bc1qvv469gmw4zz6qa4u4dsezvrlmqcqszwyfzhgwj", //mWorker.name, mJob.job_id.c_str(), @@ -248,7 +248,7 @@ bool tx_suggest_difficulty(WiFiClient& client, double difficulty) char payload[BUFFER] = {0}; id = getNextId(id); - sprintf(payload, "{\"id\": %d, \"method\": \"mining.suggest_difficulty\", \"params\": [%.10g]}\n", id, difficulty); + sprintf(payload, "{\"id\":%d,\"method\":\"mining.suggest_difficulty\",\"params\":[%.10g]}\n", id, difficulty); Serial.print(" Sending : "); Serial.print(payload); return client.print(payload); From 228ff8894a9577a92cf6fcf57e113db9101c36a3 Mon Sep 17 00:00:00 2001 From: evgenykz Date: Wed, 5 Feb 2025 17:19:07 +0200 Subject: [PATCH 23/39] Fixed best_diff display esp32s3 HW sha256 speed-up (252KH/s) --- src/NerdMinerV2.ino.cpp | 2 +- src/mining.cpp | 103 ++++++++++++++++++++++++++++++---------- src/utils.cpp | 45 ++++++++++++++---- 3 files changed, 116 insertions(+), 34 deletions(-) diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index 33a7b0f..992cc04 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -685,7 +685,7 @@ void setup() //BaseType_t res = xTaskCreate(runWorker, name, 35000, (void*)name, 1, NULL); TaskHandle_t minerTask1, minerTask2 = NULL; #ifdef HARDWARE_SHA265 - xTaskCreate(minerWorkerHw, "MinerHw-0", 2048, (void*)0, 3, &minerTask1); + xTaskCreate(minerWorkerHw, "MinerHw-0", 4096, (void*)0, 3, &minerTask1); #else xTaskCreate(minerWorkerSw, "MinerSw-0", 6000, (void*)0, 1, &minerTask1); #endif diff --git a/src/mining.cpp b/src/mining.cpp index d80c52a..bf9b518 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -25,6 +25,8 @@ //#define SHA256_VALIDATE +//#define RANDOM_NONCE +#define RANDOM_NONCE_MASK 0xFFFFC000 #ifdef HARDWARE_SHA265 #include @@ -92,7 +94,6 @@ bool checkPoolConnection(void) { Serial.println("Imposible to connect to : " + Settings.PoolAddress); WiFi.hostByName(Settings.PoolAddress.c_str(), serverIP); Serial.printf("Resolved DNS got: %s\n", serverIP.toString()); - vTaskDelay(1000 / portTICK_PERIOD_MS); return false; } @@ -190,6 +191,7 @@ static void MiningJobStop(uint32_t &job_pool, std::map lock(s_job_mutex); + s_job_result_list.clear(); s_job_request_list_sw.clear(); #ifdef HARDWARE_SHA265 s_job_request_list_hw.clear(); @@ -200,6 +202,19 @@ static void MiningJobStop(uint32_t &job_pool, std::map> 30)) * 0xBF58476D1CE4E5B9ull; + z = (z ^ (z >> 27)) * 0x94D049BB133111EBull; + return z ^ (z >> 31); +} + +#endif + void runStratumWorker(void *name) { // TEST: https://bitcoin.stackexchange.com/questions/22929/full-example-data-for-scrypt-stratum-client @@ -238,7 +253,6 @@ void runStratumWorker(void *name) { if(WiFi.status() != WL_CONNECTED){ // WiFi is disconnected, so reconnect now mMonitor.NerdStatus = NM_Connecting; - job_pool = 0xFFFFFFFF; MiningJobStop(job_pool, s_submition_map); WiFi.reconnect(); vTaskDelay(5000 / portTICK_PERIOD_MS); @@ -247,9 +261,10 @@ void runStratumWorker(void *name) { if(!checkPoolConnection()){ //If server is not reachable add random delay for connection retries - srand(millis()); - //Generate value between 1 and 120 secs - vTaskDelay(((1 + rand() % 120) * 1000) / portTICK_PERIOD_MS); + //Generate value between 1 and 60 secs + MiningJobStop(job_pool, s_submition_map); + vTaskDelay(((1 + rand() % 60) * 1000) / portTICK_PERIOD_MS); + continue; } if(!isMinerSuscribed) @@ -355,18 +370,36 @@ void runStratumWorker(void *name) { #endif #endif - nonce_pool = 0x10000000; + #ifdef RANDOM_NONCE + nonce_pool = RandomGet() & RANDOM_NONCE_MASK; + #else + if (i2c_slave_vector.empty()) + nonce_pool = 0xDA54E700; //nonce 0x00000000 is not possible, start from some random nonce + else + nonce_pool = 0x10000000; + #endif + { std::lock_guard lock(s_job_mutex); for (int i = 0; i < 4; ++ i) { + #if 1 JobPush( s_job_request_list_sw, job_pool, nonce_pool, NONCE_PER_JOB_SW, currentPoolDifficulty, mMiner.bytearray_blockheader, diget_mid, bake); + #ifdef RANDOM_NONCE + nonce_pool = RandomGet() & RANDOM_NONCE_MASK; + #else nonce_pool += NONCE_PER_JOB_SW; + #endif + #endif #ifdef HARDWARE_SHA265 JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, mMiner.bytearray_blockheader, hw_midstate, bake); + #ifdef RANDOM_NONCE + nonce_pool = RandomGet() & RANDOM_NONCE_MASK; + #else nonce_pool += NONCE_PER_JOB_HW; #endif + #endif } } //Nonce for nonce_pool starts from 0x10000000 @@ -456,18 +489,28 @@ void runStratumWorker(void *name) { std::lock_guard lock(s_job_mutex); job_result_list.insert(job_result_list.end(), s_job_result_list.begin(), s_job_result_list.end()); s_job_result_list.clear(); - + +#if 1 while (s_job_request_list_sw.size() < 4) { JobPush( s_job_request_list_sw, job_pool, nonce_pool, NONCE_PER_JOB_SW, currentPoolDifficulty, mMiner.bytearray_blockheader, diget_mid, bake); + #ifdef RANDOM_NONCE + nonce_pool = RandomGet() & RANDOM_NONCE_MASK; + #else nonce_pool += NONCE_PER_JOB_SW; + #endif } - +#endif + #ifdef HARDWARE_SHA265 while (s_job_request_list_hw.size() < 4) { JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, mMiner.bytearray_blockheader, hw_midstate, bake); + #ifdef RANDOM_NONCE + nonce_pool = RandomGet() & RANDOM_NONCE_MASK; + #else nonce_pool += NONCE_PER_JOB_HW; + #endif } #endif } @@ -661,15 +704,18 @@ static inline void nerd_sha_ll_read_digest(void* ptr) DPORT_INTERRUPT_RESTORE(); } + static inline bool nerd_sha_ll_read_digest_if(void* ptr) { DPORT_INTERRUPT_DISABLE(); uint32_t last = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 7 * 4); + #if 1 if ( (uint16_t)(last >> 16) != 0) { DPORT_INTERRUPT_RESTORE(); return false; } + #endif ((uint32_t*)ptr)[7] = last; ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 0 * 4); @@ -704,6 +750,7 @@ static inline void nerd_sha_hal_wait_idle() {} } +//#define VALIDATION void minerWorkerHw(void * task_id) { unsigned int miner_id = (uint32_t)task_id; @@ -715,9 +762,14 @@ void minerWorkerHw(void * task_id) uint8_t hash[32]; uint8_t digest_mid[32]; uint8_t sha_buffer[64]; - uint32_t wdt_counter = 0; +#ifdef VALIDATION + uint8_t doubleHash[32]; + uint32_t diget_mid[8]; + uint32_t bake[16]; +#endif + while (1) { { @@ -744,33 +796,36 @@ void minerWorkerHw(void * task_id) uint8_t job_in_work = job->id & 0xFF; memcpy(digest_mid, job->midstate, sizeof(digest_mid)); memcpy(sha_buffer, job->sha_buffer+64, sizeof(sha_buffer)); +#ifdef VALIDATION + nerd_mids(diget_mid, job->sha_buffer); + nerd_sha256_bake(diget_mid, job->sha_buffer+64, bake); +#endif esp_sha_acquire_hardware(); - for (uint32_t n = 0; n < job->nonce_count; ++n) + REG_WRITE(SHA_MODE_REG, SHA2_256); + uint32_t nend = job->nonce_start + job->nonce_count; + for (uint32_t n = job->nonce_start; n < nend; ++n) { //nerd_sha_hal_wait_idle(); nerd_sha_ll_write_digest(digest_mid); - nerd_sha_hal_wait_idle(); - nerd_sha_ll_fill_text_block_sha256(sha_buffer, job->nonce_start+n); - sha_ll_continue_block(SHA2_256); + //nerd_sha_hal_wait_idle(); + nerd_sha_ll_fill_text_block_sha256(sha_buffer, n); + //sha_ll_continue_block(SHA2_256); + REG_WRITE(SHA_CONTINUE_REG, 1); sha_ll_load(SHA2_256); nerd_sha_hal_wait_idle(); nerd_sha_ll_fill_text_block_sha256_inter(); - sha_ll_start_block(SHA2_256); + //sha_ll_start_block(SHA2_256); + REG_WRITE(SHA_START_REG, 1); sha_ll_load(SHA2_256); nerd_sha_hal_wait_idle(); if (nerd_sha_ll_read_digest_if(hash)) { - //Serial.printf("Hw 16bit Share, nonce=0x%X\n", job->nonce_start+n); -#if 0 + //Serial.printf("Hw 16bit Share, nonce=0x%X\n", n); +#ifdef VALIDATION //Validation - ((uint32_t*)(job->sha_buffer+64+12))[0] = job->nonce_start+n; - uint8_t doubleHash[32]; - uint32_t diget_mid[8]; - uint32_t bake[16]; - nerd_mids(diget_mid, job->sha_buffer); - nerd_sha256_bake(diget_mid, job->sha_buffer+64, bake); + ((uint32_t*)(job->sha_buffer+64+12))[0] = n; nerd_sha256d_baked(diget_mid, job->sha_buffer+64, bake, doubleHash); for (int i = 0; i < 32; ++i) { @@ -786,7 +841,7 @@ void minerWorkerHw(void * task_id) if (diff_hash > result->difficulty) { result->difficulty = diff_hash; - result->nonce = job->nonce_start+n; + result->nonce = n; memcpy(result->hash, hash, sizeof(hash)); } } @@ -794,7 +849,7 @@ void minerWorkerHw(void * task_id) (uint8_t)(n & 0xFF) == 0 && s_working_current_job_id != job_in_work) { - result->nonce_count = n+1; + result->nonce_count = n-job->nonce_start+1; break; } } diff --git a/src/utils.cpp b/src/utils.cpp index d6dfb2b..72ce40a 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -430,43 +430,70 @@ void suffix_string(double val, char *buf, size_t bufsiz, int sigdigits) // minimum diff value to display const double min_diff = 0.001; const byte maxNdigits = 2; - char suffix[2] = ""; + char suffix[2] = {0,0}; bool decimal = true; double dval; if (val >= exa) { val /= peta; dval = val / kilo; - strcpy(suffix, "E"); + suffix[0] = 'E'; + if (dval > 999.99) + dval = 999.99; } else if (val >= peta) { val /= tera; dval = val / kilo; - strcpy(suffix, "P"); + suffix[0] = 'P'; } else if (val >= tera) { val /= giga; dval = val / kilo; - strcpy(suffix, "T"); + suffix[0] = 'T'; } else if (val >= giga) { val /= mega; dval = val / kilo; - strcpy(suffix, "G"); + suffix[0] = 'G'; } else if (val >= mega) { val /= kilo; dval = val / kilo; - strcpy(suffix, "M"); + suffix[0] = 'M'; } else if (val >= kilo) { dval = val / kilo; - strcpy(suffix, "K"); + suffix[0] = 'K'; } else { dval = val; if (dval < min_diff) dval = 0.0; } + + int frac = 3; + if (suffix[0] != 0) + { + if (dval > 99.999) + frac = 1; + else if (dval > 9.999) + frac = 2; + } else + { + if (dval > 99.999) + frac = 2; + else if (dval > 9.999) + frac = 3; + else + frac = 4; + } if (!sigdigits) { if (decimal) - snprintf(buf, bufsiz, "%.3f%s", dval, suffix); - else + { + if (frac == 4) + snprintf(buf, bufsiz, "%.4f%s", dval, suffix); + else if (frac == 3) + snprintf(buf, bufsiz, "%.3f%s", dval, suffix); + else if (frac == 2) + snprintf(buf, bufsiz, "%.2f%s", dval, suffix); + else + snprintf(buf, bufsiz, "%.1f%s", dval, suffix); + } else snprintf(buf, bufsiz, "%d%s", (unsigned int)dval, suffix); } else { /* Always show sigdigits + 1, padded on right with zeroes From 5640c9a6ee9c6aaff7b193984a10a3b292e3b85b Mon Sep 17 00:00:00 2001 From: evgenykz Date: Wed, 5 Feb 2025 17:47:48 +0200 Subject: [PATCH 24/39] esp32D optimization 345KH/s --- src/ShaTests/nerdSHA256plus.cpp | 8 ++--- src/ShaTests/nerdSHA256plus.h | 2 +- src/mining.cpp | 55 +++++++++++++++++++++------------ 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/ShaTests/nerdSHA256plus.cpp b/src/ShaTests/nerdSHA256plus.cpp index d82d7dc..4260fdb 100644 --- a/src/ShaTests/nerdSHA256plus.cpp +++ b/src/ShaTests/nerdSHA256plus.cpp @@ -486,7 +486,7 @@ IRAM_ATTR void nerd_sha256_bake(const uint32_t* digest, const uint8_t* dataIn, u } -IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, const uint8_t* dataIn, const uint32_t* bake, uint8_t* doubleHash) +IRAM_ATTR bool nerd_sha256d_baked(const uint32_t* digest, const uint8_t* dataIn, const uint32_t* bake, uint8_t* doubleHash) { uint32_t temp1, temp2; //*********** Init 1rst SHA *********** @@ -697,10 +697,7 @@ IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, const uint8_t* dataIn, temp1 = A[3] + S3(A[0]) + F1(A[0], A[1], A[2]) + K[60] + R(60); uint32_t a7 = A[7] + temp1; if ((uint32_t)(a7 & 0xFFFF) != 0x32E7) - { - doubleHash[30] = 0xFF; - return; - } + return false; //Post 57 uint32_t m2 = S2(A[7]) + F0(A[7], d58_a0, d57_a1); @@ -743,4 +740,5 @@ IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, const uint8_t* dataIn, PUT_UINT32_BE(0x1F83D9AB + A[6], doubleHash, 24); PUT_UINT32_BE(0x5BE0CD19 + A[7], doubleHash, 28); #endif + return true; } diff --git a/src/ShaTests/nerdSHA256plus.h b/src/ShaTests/nerdSHA256plus.h index 5cdce78..3d7872e 100644 --- a/src/ShaTests/nerdSHA256plus.h +++ b/src/ShaTests/nerdSHA256plus.h @@ -30,7 +30,7 @@ IRAM_ATTR void nerd_mids(uint32_t* digest, const uint8_t* dataIn); IRAM_ATTR bool nerd_sha256d(nerdSHA256_context* midstate, const uint8_t* dataIn, uint8_t* doubleHash); IRAM_ATTR void nerd_sha256_bake(const uint32_t* digest, const uint8_t* dataIn, uint32_t* bake); //15 words -IRAM_ATTR void nerd_sha256d_baked(const uint32_t* digest, const uint8_t* dataIn, const uint32_t* bake, uint8_t* doubleHash); +IRAM_ATTR bool nerd_sha256d_baked(const uint32_t* digest, const uint8_t* dataIn, const uint32_t* bake, uint8_t* doubleHash); void ByteReverseWords(uint32_t* out, const uint32_t* in, uint32_t byteCount); diff --git a/src/mining.cpp b/src/mining.cpp index bf9b518..178a42a 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -320,6 +320,9 @@ void runStratumWorker(void *name) { uint32_t hw_midstate[8]; uint32_t diget_mid[8]; uint32_t bake[16]; + #if defined(CONFIG_IDF_TARGET_ESP32) + uint8_t sha_buffer_swap[128]; + #endif //Read pending messages from pool while(client.connected() && client.available()) @@ -370,6 +373,11 @@ void runStratumWorker(void *name) { #endif #endif + #if defined(CONFIG_IDF_TARGET_ESP32) + for (int i = 0; i < 32; ++i) + ((uint32_t*)sha_buffer_swap)[i] = __builtin_bswap32(((const uint32_t*)(mMiner.bytearray_blockheader))[i]); + #endif + #ifdef RANDOM_NONCE nonce_pool = RandomGet() & RANDOM_NONCE_MASK; #else @@ -393,7 +401,11 @@ void runStratumWorker(void *name) { #endif #endif #ifdef HARDWARE_SHA265 - JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, mMiner.bytearray_blockheader, hw_midstate, bake); + #if defined(CONFIG_IDF_TARGET_ESP32) + JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, sha_buffer_swap, hw_midstate, bake); + #else + JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, mMiner.bytearray_blockheader, hw_midstate, bake); + #endif #ifdef RANDOM_NONCE nonce_pool = RandomGet() & RANDOM_NONCE_MASK; #else @@ -464,12 +476,14 @@ void runStratumWorker(void *name) { { std::shared_ptr result = std::make_shared(); ((uint32_t*)(mMiner.bytearray_blockheader+64+12))[0] = nonce_vector[n]; - nerd_sha256d_baked(diget_mid, mMiner.bytearray_blockheader+64, bake, result->hash); - result->id = job_pool; - result->nonce = nonce_vector[n]; - result->nonce_count = 0; - result->difficulty = diff_from_target(result->hash); - job_result_list.push_back(result); + if (nerd_sha256d_baked(diget_mid, mMiner.bytearray_blockheader+64, bake, result->hash)) + { + result->id = job_pool; + result->nonce = nonce_vector[n]; + result->nonce_count = 0; + result->difficulty = diff_from_target(result->hash); + job_result_list.push_back(result); + } } uint32_t time_end = millis(); //if (nonces_done > 16384) @@ -505,7 +519,11 @@ void runStratumWorker(void *name) { #ifdef HARDWARE_SHA265 while (s_job_request_list_hw.size() < 4) { - JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, mMiner.bytearray_blockheader, hw_midstate, bake); + #if defined(CONFIG_IDF_TARGET_ESP32) + JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, sha_buffer_swap, hw_midstate, bake); + #else + JobPush( s_job_request_list_hw, job_pool, nonce_pool, NONCE_PER_JOB_HW, currentPoolDifficulty, mMiner.bytearray_blockheader, hw_midstate, bake); + #endif #ifdef RANDOM_NONCE nonce_pool = RandomGet() & RANDOM_NONCE_MASK; #else @@ -590,15 +608,7 @@ void minerWorkerSw(void * task_id) for (uint32_t n = 0; n < job->nonce_count; ++n) { ((uint32_t*)(job->sha_buffer+64+12))[0] = job->nonce_start+n; - nerd_sha256d_baked(job->midstate, job->sha_buffer+64, job->bake, hash); - - if (s_working_current_job_id != job_in_work) - { - result->nonce_count = n+1; - break; - } - - if(hash[31] == 0 && hash[30] == 0) + if (nerd_sha256d_baked(job->midstate, job->sha_buffer+64, job->bake, hash)) { double diff_hash = diff_from_target(hash); if (diff_hash > result->difficulty) @@ -608,6 +618,12 @@ void minerWorkerSw(void * task_id) memcpy(result->hash, hash, 32); } } + + if ( (uint16_t)(n & 0xFF) == 0 &&s_working_current_job_id != job_in_work) + { + result->nonce_count = n+1; + break; + } } } else vTaskDelay(2 / portTICK_PERIOD_MS); @@ -1031,8 +1047,7 @@ void minerWorkerHw(void * task_id) result->nonce_count = job->nonce_count; result->difficulty = job->difficulty; uint8_t job_in_work = job->id & 0xFF; - for (int i = 0; i < 32; ++i) - ((uint32_t*)sha_buffer)[i] = __builtin_bswap32(((const uint32_t*)(job->sha_buffer))[i]); + memcpy(sha_buffer, job->sha_buffer, 80); esp_sha_lock_engine(SHA2_256); for (uint32_t n = 0; n < job->nonce_count; ++n) @@ -1040,7 +1055,7 @@ void minerWorkerHw(void * task_id) //((uint32_t*)(sha_buffer+64+12))[0] = __builtin_bswap32(job->nonce_start+n); //sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); - nerd_sha_hal_wait_idle(); + //nerd_sha_hal_wait_idle(); nerd_sha_ll_fill_text_block_sha256(sha_buffer); sha_ll_start_block(SHA2_256); From 4b5eae1c86029d30375cd6ae9decb3d3122c333f Mon Sep 17 00:00:00 2001 From: evgenykz Date: Mon, 17 Feb 2025 10:52:30 +0200 Subject: [PATCH 25/39] Disable i2c master Added HW sha256 bug workaround Added list size limit --- src/mining.cpp | 50 +++++++++++++++++++++++++++++++++++--------------- src/utils.cpp | 10 ++++++++++ src/utils.h | 1 + 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/src/mining.cpp b/src/mining.cpp index 178a42a..8fd7316 100644 --- a/src/mining.cpp +++ b/src/mining.cpp @@ -23,6 +23,7 @@ #define NONCE_PER_JOB_SW 4096 #define NONCE_PER_JOB_HW 16*1024 +//#define I2C_SLAVE //#define SHA256_VALIDATE //#define RANDOM_NONCE @@ -226,13 +227,14 @@ void runStratumWorker(void *name) { Serial.printf("### [Total Heap / Free heap / Min free heap]: %d / %d / %d \n", ESP.getHeapSize(), ESP.getFreeHeap(), ESP.getMinFreeHeap()); #endif - std::vector i2c_slave_vector; std::map> s_submition_map; +#ifdef I2C_SLAVE + std::vector i2c_slave_vector; + //scan for i2c slaves if (i2c_master_start() == 0) i2c_slave_vector = i2c_master_scan(0x0, 0x80); - Serial.printf("Found %d slave workers\n", i2c_slave_vector.size()); if (!i2c_slave_vector.empty()) { @@ -241,6 +243,7 @@ void runStratumWorker(void *name) { Serial.printf("0x%02X,", (uint32_t)i2c_slave_vector[n]); Serial.println(""); } +#endif // connect to pool double currentPoolDifficulty = DEFAULT_DIFFICULTY; @@ -381,10 +384,12 @@ void runStratumWorker(void *name) { #ifdef RANDOM_NONCE nonce_pool = RandomGet() & RANDOM_NONCE_MASK; #else - if (i2c_slave_vector.empty()) - nonce_pool = 0xDA54E700; //nonce 0x00000000 is not possible, start from some random nonce - else - nonce_pool = 0x10000000; + #ifdef I2C_SLAVE + if (!i2c_slave_vector.empty()) + nonce_pool = 0x10000000; + else + #endif + nonce_pool = 0xDA54E700; //nonce 0x00000000 is not possible, start from some random nonce #endif @@ -414,9 +419,11 @@ void runStratumWorker(void *name) { #endif } } + #ifdef I2C_SLAVE //Nonce for nonce_pool starts from 0x10000000 //For i2c slave we give nonces from 0x20000000, that is 0x10000000 nonces per slave i2c_feed_slaves(i2c_slave_vector, job_pool & 0xFF, 0x20, currentPoolDifficulty, mMiner.bytearray_blockheader); + #endif } else { Serial.println("Parsing error, need restart"); @@ -461,6 +468,7 @@ void runStratumWorker(void *name) { } std::list> job_result_list; + #ifdef I2C_SLAVE if (i2c_slave_vector.empty() || job_pool == 0xFFFFFFFF) { vTaskDelay(50 / portTICK_PERIOD_MS); //Small delay @@ -496,6 +504,9 @@ void runStratumWorker(void *name) { } else vTaskDelay(40 / portTICK_PERIOD_MS); } + #else + vTaskDelay(50 / portTICK_PERIOD_MS); //Small delay + #endif if (job_pool != 0xFFFFFFFF) @@ -587,7 +598,8 @@ void minerWorkerSw(void * task_id) std::lock_guard lock(s_job_mutex); if (result) { - s_job_result_list.push_back(result); + if (s_job_result_list.size() < 16) + s_job_result_list.push_back(result); result.reset(); } if (!s_job_request_list_sw.empty()) @@ -792,7 +804,8 @@ void minerWorkerHw(void * task_id) std::lock_guard lock(s_job_mutex); if (result) { - s_job_result_list.push_back(result); + if (s_job_result_list.size() < 16) + s_job_result_list.push_back(result); result.reset(); } if (!s_job_request_list_hw.empty()) @@ -856,9 +869,12 @@ void minerWorkerHw(void * task_id) double diff_hash = diff_from_target(hash); if (diff_hash > result->difficulty) { - result->difficulty = diff_hash; - result->nonce = n; - memcpy(result->hash, hash, sizeof(hash)); + if (isSha256Valid(hash)) + { + result->difficulty = diff_hash; + result->nonce = n; + memcpy(result->hash, hash, sizeof(hash)); + } } } if ( @@ -1029,7 +1045,8 @@ void minerWorkerHw(void * task_id) std::lock_guard lock(s_job_mutex); if (result) { - s_job_result_list.push_back(result); + if (s_job_result_list.size() < 16) + s_job_result_list.push_back(result); result.reset(); } if (!s_job_request_list_hw.empty()) @@ -1080,9 +1097,12 @@ void minerWorkerHw(void * task_id) double diff_hash = diff_from_target(hash); if (diff_hash > result->difficulty) { - result->difficulty = diff_hash; - result->nonce = job->nonce_start+n; - memcpy(result->hash, hash, sizeof(hash)); + if (isSha256Valid(hash)) + { + result->difficulty = diff_hash; + result->nonce = job->nonce_start+n; + memcpy(result->hash, hash, sizeof(hash)); + } } } if ( diff --git a/src/utils.cpp b/src/utils.cpp index 72ce40a..ac51020 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -108,6 +108,16 @@ double diff_from_target(void *target) return d64 / dcut64; } +bool isSha256Valid(const void* sha256) +{ + for(uint8_t i=0; i < 8; ++i) + { + if ( ((const uint32_t*)sha256)[i] != 0 ) + return true; + } + return false; +} + /****************** PREMINING CALCULATIONS ********************/ diff --git a/src/utils.h b/src/utils.h index b8a539d..467d35c 100644 --- a/src/utils.h +++ b/src/utils.h @@ -20,6 +20,7 @@ uint8_t hex(char ch); int to_byte_array(const char *in, size_t in_size, uint8_t *out); double le256todouble(const void *target); double diff_from_target(void *target); +bool isSha256Valid(const void* sha256); miner_data calculateMiningData(mining_subscribe& mWorker, mining_job mJob); bool checkValid(unsigned char* hash, unsigned char* target); void suffix_string(double val, char *buf, size_t bufsiz, int sigdigits); From c10698b60c2e3220fdef7bdef743923cb3718cab Mon Sep 17 00:00:00 2001 From: bitmaker Date: Mon, 1 Sep 2025 10:58:07 +0200 Subject: [PATCH 26/39] refactoring, btc price bug, save config data bug --- platformio.ini | 2 +- src/NerdMinerV2.ino.cpp | 537 +------------------------------- src/ShaTests/nerdSHA_HWTest.cpp | 531 +++++++++++++++++++++++++++++++ src/ShaTests/nerdSHA_HWTest.h | 5 + src/monitor.cpp | 6 +- src/monitor.h | 3 +- src/wManager.cpp | 4 +- 7 files changed, 547 insertions(+), 541 deletions(-) create mode 100644 src/ShaTests/nerdSHA_HWTest.cpp create mode 100644 src/ShaTests/nerdSHA_HWTest.h diff --git a/platformio.ini b/platformio.ini index 48c3b05..6769fbf 100644 --- a/platformio.ini +++ b/platformio.ini @@ -11,7 +11,7 @@ [platformio] globallib_dir = lib -default_envs = NerdminerV2-T-HMI, wt32-sc01, wt32-sc01-plus, han_m5stack, M5Stick-C, esp32cam, ESP32-2432S028R, ESP32_2432S028_2USB, NerdminerV2, Lilygo-T-Embed, ESP32-devKitv1, NerdminerV2-S3-DONGLE, NerdminerV2-S3-GEEK, NerdminerV2-S3-AMOLED, NerdminerV2-S3-AMOLED-TOUCH, NerdminerV2-T-QT, NerdminerV2-T-Display_V1, ESP32-2432S028R, M5-StampS3, ESP32-S3-devKitv1, ESP32-S3-mini-wemos, ESP32-S2-mini-wemos, ESP32-S3-mini-weact, ESP32-D0WD-V3-weact, ESP32-C3-devKitmv1, ESP32-C3-super-mini +default_envs = NerdminerV2, NerdminerV2-T-HMI, wt32-sc01, wt32-sc01-plus, han_m5stack, M5Stick-C, esp32cam, ESP32-2432S028R, ESP32_2432S028_2USB, Lilygo-T-Embed, ESP32-devKitv1, NerdminerV2-S3-DONGLE, NerdminerV2-S3-GEEK, NerdminerV2-S3-AMOLED, NerdminerV2-S3-AMOLED-TOUCH, NerdminerV2-T-QT, NerdminerV2-T-Display_V1, ESP32-2432S028R, M5-StampS3, ESP32-S3-devKitv1, ESP32-S3-mini-wemos, ESP32-S2-mini-wemos, ESP32-S3-mini-weact, ESP32-D0WD-V3-weact, ESP32-C3-devKitmv1, ESP32-C3-super-mini [env:M5Stick-C-Plus2] platform = espressif32@6.6.0 diff --git a/src/NerdMinerV2.ino.cpp b/src/NerdMinerV2.ino.cpp index 992cc04..46ca5e9 100644 --- a/src/NerdMinerV2.ino.cpp +++ b/src/NerdMinerV2.ino.cpp @@ -13,6 +13,7 @@ #include "monitor.h" #include "drivers/displays/display.h" #include "drivers/storage/SDCard.h" +#include "ShaTests/nerdSHA_HWTest.h" #include "timeconst.h" #ifdef TOUCH_ENABLE @@ -54,537 +55,6 @@ const char* ntpServer = "pool.ntp.org"; //void runMonitor(void *name); -#ifdef HW_SHA256_TEST - -#include -#include "ShaTests/nerdSHA256plus.h" -#include "mbedtls/sha256.h" -#include -#include -#include -#include -#include -#include -#include -#include - -static const uint8_t s_test_buffer[128] = -{ - 0x00, 0x00, 0x00, 0x22, 0x99, 0x44, 0xbb, 0xff, 0xbb, 0x00, 0x00, 0x77, 0x44, 0xcc, 0x11, 0x77, - 0x88, 0x55, 0xbb, 0x44, 0x55, 0x00, 0x77, 0x88, 0x99, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xbb, 0xbb, 0x66, 0x11, 0x88, 0x33, 0x44, 0x99, 0xcc, 0x33, 0xff, 0x22, - 0x11, 0xaa, 0x77, 0xee, 0xbb, 0x66, 0xee, 0xcc, 0xee, 0x66, 0xee, 0xdd, 0x77, 0x55, 0x22, 0x22, - 0xcc, 0xcc, 0x66, 0xee, 0x22, 0xdd, 0x99, 0x66, 0x66, 0x88, 0x00, 0x11, 0x2e, 0x33, 0x41, 0x19, - - 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x80 -}; - -static const uint8_t s_test_buffer_aligned[128] __attribute__((aligned(256))) = -{ - 0x00, 0x00, 0x00, 0x22, 0x99, 0x44, 0xbb, 0xff, 0xbb, 0x00, 0x00, 0x77, 0x44, 0xcc, 0x11, 0x77, - 0x88, 0x55, 0xbb, 0x44, 0x55, 0x00, 0x77, 0x88, 0x99, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xbb, 0xbb, 0x66, 0x11, 0x88, 0x33, 0x44, 0x99, 0xcc, 0x33, 0xff, 0x22, - 0x11, 0xaa, 0x77, 0xee, 0xbb, 0x66, 0xee, 0xcc, 0xee, 0x66, 0xee, 0xdd, 0x77, 0x55, 0x22, 0x22, - 0xcc, 0xcc, 0x66, 0xee, 0x22, 0xdd, 0x99, 0x66, 0x66, 0x88, 0x00, 0x11, 0x2e, 0x33, 0x41, 0x19, - - 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x80 -}; - -static uint8_t interResult_aligned[64] __attribute__((aligned(256))); -static uint8_t midstate_aligned[32] __attribute__((aligned(256))); -static uint8_t hash_aligned[64] __attribute__((aligned(256))); - -#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) -static inline void nerd_sha_hal_wait_idle() -{ - while (REG_READ(SHA_BUSY_REG)) - {} -} - -static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) -{ - uint32_t *data_words = (uint32_t *)input_text; - uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); - - REG_WRITE(®_addr_buf[0], data_words[0]); - REG_WRITE(®_addr_buf[1], data_words[1]); - REG_WRITE(®_addr_buf[2], data_words[2]); - REG_WRITE(®_addr_buf[3], data_words[3]); - REG_WRITE(®_addr_buf[4], data_words[4]); - REG_WRITE(®_addr_buf[5], data_words[5]); - REG_WRITE(®_addr_buf[6], data_words[6]); - REG_WRITE(®_addr_buf[7], data_words[7]); - REG_WRITE(®_addr_buf[8], data_words[8]); - REG_WRITE(®_addr_buf[9], data_words[9]); - REG_WRITE(®_addr_buf[10], data_words[10]); - REG_WRITE(®_addr_buf[11], data_words[11]); - REG_WRITE(®_addr_buf[12], data_words[12]); - REG_WRITE(®_addr_buf[13], data_words[13]); - REG_WRITE(®_addr_buf[14], data_words[14]); - REG_WRITE(®_addr_buf[15], data_words[15]); -} - -static inline void nerd_sha_ll_write_digest_sha256(void *digest_state) -{ - uint32_t *digest_state_words = (uint32_t *)digest_state; - uint32_t *reg_addr_buf = (uint32_t *)(SHA_H_BASE); - - REG_WRITE(®_addr_buf[0], digest_state_words[0]); - REG_WRITE(®_addr_buf[1], digest_state_words[1]); - REG_WRITE(®_addr_buf[2], digest_state_words[2]); - REG_WRITE(®_addr_buf[3], digest_state_words[3]); - REG_WRITE(®_addr_buf[4], digest_state_words[4]); - REG_WRITE(®_addr_buf[5], digest_state_words[5]); - REG_WRITE(®_addr_buf[6], digest_state_words[6]); - REG_WRITE(®_addr_buf[7], digest_state_words[7]); -} - -//void IRAM_ATTR esp_dport_access_read_buffer(uint32_t *buff_out, uint32_t address, uint32_t num_words) -static inline void nerd_sha_ll_read_digest(void* ptr) -{ - DPORT_INTERRUPT_DISABLE(); -#if 0 - for (uint32_t i = 0; i < 256 / 32; ++i) - { - ((uint32_t*)ptr)[i] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + i * 4); - } -#else - ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 0 * 4); - ((uint32_t*)ptr)[1] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 1 * 4); - ((uint32_t*)ptr)[2] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 2 * 4); - ((uint32_t*)ptr)[3] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 3 * 4); - ((uint32_t*)ptr)[4] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 4 * 4); - ((uint32_t*)ptr)[5] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 5 * 4); - ((uint32_t*)ptr)[6] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 6 * 4); - ((uint32_t*)ptr)[7] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 7 * 4); -#endif - DPORT_INTERRUPT_RESTORE(); -} - -static IRAM_ATTR uint8_t dma_buffer[128] __attribute__((aligned(32))); -static IRAM_ATTR uint8_t dma_inter[64] __attribute__((aligned(32))); -static IRAM_ATTR uint8_t dma_hash[32] __attribute__((aligned(32))); -static DRAM_ATTR lldesc_t s_dma_descr_input; -static DRAM_ATTR lldesc_t s_dma_descr_buf; -static DRAM_ATTR lldesc_t s_dma_descr_inter; - -#endif - -#if defined(CONFIG_IDF_TARGET_ESP32) -static inline void nerd_sha_ll_read_digest_swap(void* ptr) -{ - DPORT_INTERRUPT_DISABLE(); - ((uint32_t*)ptr)[0] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 0 * 4)); - ((uint32_t*)ptr)[1] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 1 * 4)); - ((uint32_t*)ptr)[2] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 2 * 4)); - ((uint32_t*)ptr)[3] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 3 * 4)); - ((uint32_t*)ptr)[4] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 4 * 4)); - ((uint32_t*)ptr)[5] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 5 * 4)); - ((uint32_t*)ptr)[6] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 6 * 4)); - ((uint32_t*)ptr)[7] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 7 * 4)); - DPORT_INTERRUPT_RESTORE(); -} - -static inline void nerd_sha_ll_read_digest(void* ptr) -{ - DPORT_INTERRUPT_DISABLE(); - ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 0 * 4); - ((uint32_t*)ptr)[1] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 1 * 4); - ((uint32_t*)ptr)[2] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 2 * 4); - ((uint32_t*)ptr)[3] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 3 * 4); - ((uint32_t*)ptr)[4] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 4 * 4); - ((uint32_t*)ptr)[5] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 5 * 4); - ((uint32_t*)ptr)[6] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 6 * 4); - ((uint32_t*)ptr)[7] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 7 * 4); - DPORT_INTERRUPT_RESTORE(); -} - -static inline void nerd_sha_hal_wait_idle() -{ - while (DPORT_REG_READ(SHA_256_BUSY_REG)) - {} -} - -static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) -{ - uint32_t *data_words = (uint32_t *)input_text; - uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); - - reg_addr_buf[0] = data_words[0]; - reg_addr_buf[1] = data_words[1]; - reg_addr_buf[2] = data_words[2]; - reg_addr_buf[3] = data_words[3]; - reg_addr_buf[4] = data_words[4]; - reg_addr_buf[5] = data_words[5]; - reg_addr_buf[6] = data_words[6]; - reg_addr_buf[7] = data_words[7]; - reg_addr_buf[8] = data_words[8]; - reg_addr_buf[9] = data_words[9]; - reg_addr_buf[10] = data_words[10]; - reg_addr_buf[11] = data_words[11]; - reg_addr_buf[12] = data_words[12]; - reg_addr_buf[13] = data_words[13]; - reg_addr_buf[14] = data_words[14]; - reg_addr_buf[15] = data_words[15]; -} - -static inline void nerd_sha_ll_fill_text_block_sha256_swap(const void *input_text) -{ - uint32_t *data_words = (uint32_t *)input_text; - uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); - - reg_addr_buf[0] = __builtin_bswap32(data_words[0]); - reg_addr_buf[1] = __builtin_bswap32(data_words[1]); - reg_addr_buf[2] = __builtin_bswap32(data_words[2]); - reg_addr_buf[3] = __builtin_bswap32(data_words[3]); - reg_addr_buf[4] = __builtin_bswap32(data_words[4]); - reg_addr_buf[5] = __builtin_bswap32(data_words[5]); - reg_addr_buf[6] = __builtin_bswap32(data_words[6]); - reg_addr_buf[7] = __builtin_bswap32(data_words[7]); - reg_addr_buf[8] = __builtin_bswap32(data_words[8]); - reg_addr_buf[9] = __builtin_bswap32(data_words[9]); - reg_addr_buf[10] = __builtin_bswap32(data_words[10]); - reg_addr_buf[11] = __builtin_bswap32(data_words[11]); - reg_addr_buf[12] = __builtin_bswap32(data_words[12]); - reg_addr_buf[13] = __builtin_bswap32(data_words[13]); - reg_addr_buf[14] = __builtin_bswap32(data_words[14]); - reg_addr_buf[15] = __builtin_bswap32(data_words[15]); -} - -static inline void nerd_sha_ll_fill_text_block_sha256_double(const void *input_text) -{ - uint32_t *data_words = (uint32_t *)input_text; - uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); - -#if 0 - //No change - reg_addr_buf[0] = data_words[0]; - reg_addr_buf[1] = data_words[1]; - reg_addr_buf[2] = data_words[2]; - reg_addr_buf[3] = data_words[3]; - reg_addr_buf[4] = data_words[4]; - reg_addr_buf[5] = data_words[5]; - reg_addr_buf[6] = data_words[6]; - reg_addr_buf[7] = data_words[7]; -#endif - reg_addr_buf[8] = 0x80000000; - reg_addr_buf[9] = 0x00000000; - reg_addr_buf[10] = 0x00000000; - reg_addr_buf[11] = 0x00000000; - reg_addr_buf[12] = 0x00000000; - reg_addr_buf[13] = 0x00000000; - reg_addr_buf[14] = 0x00000000; - reg_addr_buf[15] = 0x00000100; -} -#endif - -IRAM_ATTR void HwShaTest() -{ - uint8_t interResult[64]; - uint8_t midstate[32]; - uint8_t hash[64]; - memset(interResult, 0, sizeof(interResult)); - interResult[32] = 0x80; - interResult[62] = 0x01; - interResult[63] = 0x00; - - memset(interResult_aligned, 0, sizeof(interResult_aligned)); - interResult_aligned[32] = 0x80; - interResult_aligned[62] = 0x01; - interResult_aligned[63] = 0x00; - - uint32_t bake[16]; - - uint32_t time_start = micros(); - int test_count = 1000000; - -#if 0 - //Generic software - //esp32s3 16KH/s - //esp32D 9.5KH/s - test_count = 20000; - mbedtls_sha256_context ctx; - mbedtls_sha256_init(&ctx); - for (int i = 0; i < test_count; ++i) - { - mbedtls_sha256_starts_ret(&ctx,0); - mbedtls_sha256_update_ret(&ctx, s_test_buffer, 80); - mbedtls_sha256_finish_ret(&ctx, interResult); - - mbedtls_sha256_starts_ret(&ctx,0); - mbedtls_sha256_update_ret(&ctx, interResult, 32); - mbedtls_sha256_finish_ret(&ctx, hash); - } - mbedtls_sha256_free(&ctx); -#endif - -#if 1 - //nerdSha256 - //ESP32 39KH/s - //ESP32S3 39.01KH/s - test_count = 100000; - nerdSHA256_context ctx; - nerd_mids(ctx.digest, s_test_buffer); - for (int i = 0; i < test_count; ++i) - { - nerd_sha256d(&ctx, s_test_buffer+64, hash); - } -#endif - -#if 0 - //nerdSha256 bake - //ESP32 : 41KH/s - //ESP32S3 : 42.32KH/s - test_count = 100000; - nerdSHA256_context ctx; - nerd_mids(&ctx, s_test_buffer); - nerd_sha256_bake(ctx.digest, s_test_buffer+64, bake); //15 words - for (int i = 0; i < test_count; ++i) - { - nerd_sha256d_baked(ctx.digest, s_test_buffer+64, bake, hash); - } -#endif - -#if 0 - //Hardware high level 62KH/s - esp_sha_acquire_hardware(); - for (int i = 0; i < test_count; ++i) - { - esp_sha_dma(SHA2_256, s_test_buffer+64, 64, s_test_buffer, 64, true); - esp_sha_read_digest_state(SHA2_256, interResult); - esp_sha_dma(SHA2_256, 0, 0, interResult, 64, true); - esp_sha_read_digest_state(SHA2_256, hash); - } - esp_sha_release_hardware(); -#endif - -#if 0 - //ESP32D 5.50KH/s - test_count = 40000; - //esp_sha_lock_engine(SHA2_256); - for (int i = 0; i < test_count; ++i) - { - esp_sha(SHA2_256, s_test_buffer, 80, interResult); - esp_sha(SHA2_256, interResult, 32, hash); - } - //esp_sha_unlock_engine(SHA2_256); -#endif - -#if 0 - //ESP32D - //Invalid result!! - test_count = 100000; - esp_sha_lock_engine(SHA2_256); - for (int i = 0; i < test_count; ++i) - { - esp_sha_block(SHA2_256, s_test_buffer, true); - esp_sha_block(SHA2_256, s_test_buffer+64, false); - esp_sha_read_digest_state(SHA2_256, interResult); - esp_sha_block(SHA2_256, interResult, true); - esp_sha_read_digest_state(SHA2_256, hash); - } - esp_sha_unlock_engine(SHA2_256); -#endif - -#if 0 - //ESP32D Hardware SHA ~200KH/s - test_count = 50000; - periph_module_enable(PERIPH_SHA_MODULE); - uint8_t buffer_swap[128]; - for (int i = 0; i < 32; ++i) - ((uint32_t*)buffer_swap)[i] = __builtin_bswap32(((const uint32_t*)s_test_buffer)[i]); - - uint8_t inter_swap[64]; - for (int i = 0; i < 16; ++i) - ((uint32_t*)inter_swap)[i] = __builtin_bswap32(((const uint32_t*)interResult)[i]); - - for (int i = 0; i < test_count; ++i) - { - //sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); - nerd_sha_hal_wait_idle(); - nerd_sha_ll_fill_text_block_sha256(buffer_swap); - sha_ll_start_block(SHA2_256); - - //sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); - nerd_sha_hal_wait_idle(); - nerd_sha_ll_fill_text_block_sha256(buffer_swap+64); - sha_ll_continue_block(SHA2_256); - - nerd_sha_hal_wait_idle(); - sha_ll_load(SHA2_256); - //nerd_sha_ll_read_digest_swap(interResult); - - //sha_hal_hash_block(SHA2_256, interResult, 64/4, true); - nerd_sha_hal_wait_idle(); - nerd_sha_ll_fill_text_block_sha256_double(inter_swap); - sha_ll_start_block(SHA2_256); - - nerd_sha_hal_wait_idle(); - sha_ll_load(SHA2_256); - nerd_sha_ll_read_digest_swap(hash); - } -#endif - -#if 0 - //Hardware low level + midstate 156KH/s - esp_sha_acquire_hardware(); - sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); - sha_hal_read_digest(SHA2_256, midstate); - for (int i = 0; i < test_count; ++i) - { - sha_hal_write_digest(SHA2_256, midstate); - sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); - sha_hal_read_digest(SHA2_256, interResult); - sha_hal_hash_block(SHA2_256, interResult, 64/4, true); - sha_hal_read_digest(SHA2_256, hash); - } - esp_sha_release_hardware(); -#endif - -#if 0 - //Hardware low level + midstate + aligned 156KH/s (No sense) - esp_sha_acquire_hardware(); - sha_hal_hash_block(SHA2_256, s_test_buffer_aligned, 64/4, true); - sha_hal_read_digest(SHA2_256, midstate_aligned); - for (int i = 0; i < test_count; ++i) - { - sha_hal_write_digest(SHA2_256, midstate_aligned); - sha_hal_hash_block(SHA2_256, s_test_buffer_aligned+64, 64/4, false); - sha_hal_read_digest(SHA2_256, interResult_aligned); - sha_hal_hash_block(SHA2_256, interResult_aligned, 64/4, true); - sha_hal_read_digest(SHA2_256, hash_aligned); - } - esp_sha_release_hardware(); - memcpy(hash, hash_aligned, sizeof(hash_aligned)); -#endif - -#if 0 - //Hardware LL 162.43KH/s - esp_sha_acquire_hardware(); - //sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); - sha_hal_wait_idle(); - sha_ll_fill_text_block(s_test_buffer, 64/4); - sha_ll_start_block(SHA2_256); - - //sha_hal_read_digest(SHA2_256, midstate); - sha_ll_load(SHA2_256); - sha_hal_wait_idle(); - sha_ll_read_digest(SHA2_256, midstate, 256 / 32); - - for (int i = 0; i < test_count; ++i) - { - //sha_hal_write_digest(SHA2_256, midstate); - sha_ll_write_digest(SHA2_256, midstate, 256 / 32); - //nerd_sha_ll_write_digest_sha256(midstate); - - //sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); - //sha_hal_wait_idle(); - nerd_sha_hal_wait_idle(); - //sha_ll_fill_text_block(s_test_buffer+64, 64/4); - nerd_sha_ll_fill_text_block_sha256(s_test_buffer+64); - sha_ll_continue_block(SHA2_256); - - //sha_hal_read_digest(SHA2_256, interResult); - sha_ll_load(SHA2_256); - //sha_hal_wait_idle(); - nerd_sha_hal_wait_idle(); - //sha_ll_read_digest(SHA2_256, interResult, 256 / 32); - nerd_sha_ll_read_digest(interResult); - - //sha_hal_hash_block(SHA2_256, interResult, 64/4, true); - //sha_hal_wait_idle(); - nerd_sha_hal_wait_idle(); - //sha_ll_fill_text_block(interResult, 64/4); - nerd_sha_ll_fill_text_block_sha256(interResult); - sha_ll_start_block(SHA2_256); - - //sha_hal_read_digest(SHA2_256, hash); - sha_ll_load(SHA2_256); - //sha_hal_wait_idle(); - nerd_sha_hal_wait_idle(); - //sha_ll_read_digest(SHA2_256, hash, 256 / 32); - nerd_sha_ll_read_digest(hash); - } - esp_sha_release_hardware(); -#endif - -#if 0 - //DMA hash - uint8_t* dma_cap_buf = (uint8_t*)heap_caps_malloc(128, MALLOC_CAP_8BIT|MALLOC_CAP_DMA|MALLOC_CAP_INTERNAL); - memcpy(dma_cap_buf, s_test_buffer, 128); - - uint8_t* dma_cap_inter = (uint8_t*)heap_caps_malloc(64, MALLOC_CAP_8BIT|MALLOC_CAP_DMA|MALLOC_CAP_INTERNAL); - memcpy(dma_cap_inter, interResult, 64); - - uint8_t* dma_cap_hash = (uint8_t*)heap_caps_malloc(32, MALLOC_CAP_8BIT|MALLOC_CAP_DMA|MALLOC_CAP_INTERNAL); - - memset(&s_dma_descr_input, 0, sizeof(lldesc_t)); - memset(&s_dma_descr_buf, 0, sizeof(lldesc_t)); - memset(&s_dma_descr_inter, 0, sizeof(lldesc_t)); - - - s_dma_descr_input.length = 64; - s_dma_descr_input.size = 64; - s_dma_descr_input.owner = 1; - s_dma_descr_input.eof = 1; - s_dma_descr_input.buf = dma_cap_buf+64; - - s_dma_descr_buf.length = 64; - s_dma_descr_buf.size = 64; - s_dma_descr_buf.owner = 1; - s_dma_descr_buf.buf = dma_cap_buf; - s_dma_descr_buf.eof = 0; - s_dma_descr_buf.empty = (uint32_t)(&s_dma_descr_input); - - s_dma_descr_inter.length = 64; - s_dma_descr_inter.size = 64; - s_dma_descr_inter.owner = 1; - s_dma_descr_inter.buf = dma_cap_inter; - s_dma_descr_inter.eof = 1; - - //49.83KH/s - esp_sha_acquire_hardware(); - for (int i = 0; i < test_count; ++i) - { - esp_crypto_shared_gdma_start(&s_dma_descr_buf, NULL, GDMA_TRIG_PERIPH_SHA); - sha_hal_hash_dma(SHA2_256, 2, true); - sha_hal_wait_idle(); - esp_sha_read_digest_state(SHA2_256, dma_cap_inter); - - esp_crypto_shared_gdma_start(&s_dma_descr_inter, NULL, GDMA_TRIG_PERIPH_SHA); - sha_hal_hash_dma(SHA2_256, 1, true); - sha_hal_wait_idle(); - esp_sha_read_digest_state(SHA2_256, hash); - } - esp_sha_release_hardware(); -#endif - - uint32_t time_end = micros(); - double hash_rate = ((double)test_count * 1000000) / (double)(time_end - time_start); - Serial.print("Hashrate="); - Serial.print(hash_rate/1000); - Serial.println("KH/s"); - - Serial.print("interResult: "); - for (size_t i = 0; i < 32; i++) - Serial.printf("%02x", interResult[i]); - Serial.println(""); - - Serial.print("hash: "); - for (size_t i = 0; i < 32; i++) - Serial.printf("%02x", hash[i]); - Serial.println(""); - - //should be - //54cd9f1ebc3db9a626688e5bb91d808abbd4079b2cba7f43fa08bfced300ef19 - //6fa464b007f2d577edfa5dfe9dfc3f9209f36d1a6711d314ea68ccdd03000000 -} - -#endif /********* INIT *****/ void setup() @@ -610,10 +80,7 @@ void setup() //disableCore1WDT(); #ifdef HW_SHA256_TEST - while (1) - { - HwShaTest(); - } + while (1) HwShaTest(); #endif // Setup the buttons diff --git a/src/ShaTests/nerdSHA_HWTest.cpp b/src/ShaTests/nerdSHA_HWTest.cpp new file mode 100644 index 0000000..7808946 --- /dev/null +++ b/src/ShaTests/nerdSHA_HWTest.cpp @@ -0,0 +1,531 @@ +#ifdef HW_SHA256_TEST + +#include +#include "ShaTests/nerdSHA256plus.h" +#include "mbedtls/sha256.h" +#include +#include +#include +#include +#include +#include +#include +#include + +static const uint8_t s_test_buffer[128] = +{ + 0x00, 0x00, 0x00, 0x22, 0x99, 0x44, 0xbb, 0xff, 0xbb, 0x00, 0x00, 0x77, 0x44, 0xcc, 0x11, 0x77, + 0x88, 0x55, 0xbb, 0x44, 0x55, 0x00, 0x77, 0x88, 0x99, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xbb, 0xbb, 0x66, 0x11, 0x88, 0x33, 0x44, 0x99, 0xcc, 0x33, 0xff, 0x22, + 0x11, 0xaa, 0x77, 0xee, 0xbb, 0x66, 0xee, 0xcc, 0xee, 0x66, 0xee, 0xdd, 0x77, 0x55, 0x22, 0x22, + 0xcc, 0xcc, 0x66, 0xee, 0x22, 0xdd, 0x99, 0x66, 0x66, 0x88, 0x00, 0x11, 0x2e, 0x33, 0x41, 0x19, + + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x80 +}; + +static const uint8_t s_test_buffer_aligned[128] __attribute__((aligned(256))) = +{ + 0x00, 0x00, 0x00, 0x22, 0x99, 0x44, 0xbb, 0xff, 0xbb, 0x00, 0x00, 0x77, 0x44, 0xcc, 0x11, 0x77, + 0x88, 0x55, 0xbb, 0x44, 0x55, 0x00, 0x77, 0x88, 0x99, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xbb, 0xbb, 0x66, 0x11, 0x88, 0x33, 0x44, 0x99, 0xcc, 0x33, 0xff, 0x22, + 0x11, 0xaa, 0x77, 0xee, 0xbb, 0x66, 0xee, 0xcc, 0xee, 0x66, 0xee, 0xdd, 0x77, 0x55, 0x22, 0x22, + 0xcc, 0xcc, 0x66, 0xee, 0x22, 0xdd, 0x99, 0x66, 0x66, 0x88, 0x00, 0x11, 0x2e, 0x33, 0x41, 0x19, + + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x80 +}; + +static uint8_t interResult_aligned[64] __attribute__((aligned(256))); +static uint8_t midstate_aligned[32] __attribute__((aligned(256))); +static uint8_t hash_aligned[64] __attribute__((aligned(256))); + +#if defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3) +static inline void nerd_sha_hal_wait_idle() +{ + while (REG_READ(SHA_BUSY_REG)) + {} +} + +static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) +{ + uint32_t *data_words = (uint32_t *)input_text; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + + REG_WRITE(®_addr_buf[0], data_words[0]); + REG_WRITE(®_addr_buf[1], data_words[1]); + REG_WRITE(®_addr_buf[2], data_words[2]); + REG_WRITE(®_addr_buf[3], data_words[3]); + REG_WRITE(®_addr_buf[4], data_words[4]); + REG_WRITE(®_addr_buf[5], data_words[5]); + REG_WRITE(®_addr_buf[6], data_words[6]); + REG_WRITE(®_addr_buf[7], data_words[7]); + REG_WRITE(®_addr_buf[8], data_words[8]); + REG_WRITE(®_addr_buf[9], data_words[9]); + REG_WRITE(®_addr_buf[10], data_words[10]); + REG_WRITE(®_addr_buf[11], data_words[11]); + REG_WRITE(®_addr_buf[12], data_words[12]); + REG_WRITE(®_addr_buf[13], data_words[13]); + REG_WRITE(®_addr_buf[14], data_words[14]); + REG_WRITE(®_addr_buf[15], data_words[15]); +} + +static inline void nerd_sha_ll_write_digest_sha256(void *digest_state) +{ + uint32_t *digest_state_words = (uint32_t *)digest_state; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_H_BASE); + + REG_WRITE(®_addr_buf[0], digest_state_words[0]); + REG_WRITE(®_addr_buf[1], digest_state_words[1]); + REG_WRITE(®_addr_buf[2], digest_state_words[2]); + REG_WRITE(®_addr_buf[3], digest_state_words[3]); + REG_WRITE(®_addr_buf[4], digest_state_words[4]); + REG_WRITE(®_addr_buf[5], digest_state_words[5]); + REG_WRITE(®_addr_buf[6], digest_state_words[6]); + REG_WRITE(®_addr_buf[7], digest_state_words[7]); +} + +//void IRAM_ATTR esp_dport_access_read_buffer(uint32_t *buff_out, uint32_t address, uint32_t num_words) +static inline void nerd_sha_ll_read_digest(void* ptr) +{ + DPORT_INTERRUPT_DISABLE(); +#if 0 + for (uint32_t i = 0; i < 256 / 32; ++i) + { + ((uint32_t*)ptr)[i] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + i * 4); + } +#else + ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 0 * 4); + ((uint32_t*)ptr)[1] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 1 * 4); + ((uint32_t*)ptr)[2] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 2 * 4); + ((uint32_t*)ptr)[3] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 3 * 4); + ((uint32_t*)ptr)[4] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 4 * 4); + ((uint32_t*)ptr)[5] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 5 * 4); + ((uint32_t*)ptr)[6] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 6 * 4); + ((uint32_t*)ptr)[7] = DPORT_SEQUENCE_REG_READ(SHA_H_BASE + 7 * 4); +#endif + DPORT_INTERRUPT_RESTORE(); +} + +static IRAM_ATTR uint8_t dma_buffer[128] __attribute__((aligned(32))); +static IRAM_ATTR uint8_t dma_inter[64] __attribute__((aligned(32))); +static IRAM_ATTR uint8_t dma_hash[32] __attribute__((aligned(32))); +static DRAM_ATTR lldesc_t s_dma_descr_input; +static DRAM_ATTR lldesc_t s_dma_descr_buf; +static DRAM_ATTR lldesc_t s_dma_descr_inter; + +#endif + +#if defined(CONFIG_IDF_TARGET_ESP32) +static inline void nerd_sha_ll_read_digest_swap(void* ptr) +{ + DPORT_INTERRUPT_DISABLE(); + ((uint32_t*)ptr)[0] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 0 * 4)); + ((uint32_t*)ptr)[1] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 1 * 4)); + ((uint32_t*)ptr)[2] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 2 * 4)); + ((uint32_t*)ptr)[3] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 3 * 4)); + ((uint32_t*)ptr)[4] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 4 * 4)); + ((uint32_t*)ptr)[5] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 5 * 4)); + ((uint32_t*)ptr)[6] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 6 * 4)); + ((uint32_t*)ptr)[7] = __builtin_bswap32(DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 7 * 4)); + DPORT_INTERRUPT_RESTORE(); +} + +static inline void nerd_sha_ll_read_digest(void* ptr) +{ + DPORT_INTERRUPT_DISABLE(); + ((uint32_t*)ptr)[0] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 0 * 4); + ((uint32_t*)ptr)[1] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 1 * 4); + ((uint32_t*)ptr)[2] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 2 * 4); + ((uint32_t*)ptr)[3] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 3 * 4); + ((uint32_t*)ptr)[4] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 4 * 4); + ((uint32_t*)ptr)[5] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 5 * 4); + ((uint32_t*)ptr)[6] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 6 * 4); + ((uint32_t*)ptr)[7] = DPORT_SEQUENCE_REG_READ(SHA_TEXT_BASE + 7 * 4); + DPORT_INTERRUPT_RESTORE(); +} + +static inline void nerd_sha_hal_wait_idle() +{ + while (DPORT_REG_READ(SHA_256_BUSY_REG)) + {} +} + +static inline void nerd_sha_ll_fill_text_block_sha256(const void *input_text) +{ + uint32_t *data_words = (uint32_t *)input_text; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + + reg_addr_buf[0] = data_words[0]; + reg_addr_buf[1] = data_words[1]; + reg_addr_buf[2] = data_words[2]; + reg_addr_buf[3] = data_words[3]; + reg_addr_buf[4] = data_words[4]; + reg_addr_buf[5] = data_words[5]; + reg_addr_buf[6] = data_words[6]; + reg_addr_buf[7] = data_words[7]; + reg_addr_buf[8] = data_words[8]; + reg_addr_buf[9] = data_words[9]; + reg_addr_buf[10] = data_words[10]; + reg_addr_buf[11] = data_words[11]; + reg_addr_buf[12] = data_words[12]; + reg_addr_buf[13] = data_words[13]; + reg_addr_buf[14] = data_words[14]; + reg_addr_buf[15] = data_words[15]; +} + +static inline void nerd_sha_ll_fill_text_block_sha256_swap(const void *input_text) +{ + uint32_t *data_words = (uint32_t *)input_text; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + + reg_addr_buf[0] = __builtin_bswap32(data_words[0]); + reg_addr_buf[1] = __builtin_bswap32(data_words[1]); + reg_addr_buf[2] = __builtin_bswap32(data_words[2]); + reg_addr_buf[3] = __builtin_bswap32(data_words[3]); + reg_addr_buf[4] = __builtin_bswap32(data_words[4]); + reg_addr_buf[5] = __builtin_bswap32(data_words[5]); + reg_addr_buf[6] = __builtin_bswap32(data_words[6]); + reg_addr_buf[7] = __builtin_bswap32(data_words[7]); + reg_addr_buf[8] = __builtin_bswap32(data_words[8]); + reg_addr_buf[9] = __builtin_bswap32(data_words[9]); + reg_addr_buf[10] = __builtin_bswap32(data_words[10]); + reg_addr_buf[11] = __builtin_bswap32(data_words[11]); + reg_addr_buf[12] = __builtin_bswap32(data_words[12]); + reg_addr_buf[13] = __builtin_bswap32(data_words[13]); + reg_addr_buf[14] = __builtin_bswap32(data_words[14]); + reg_addr_buf[15] = __builtin_bswap32(data_words[15]); +} + +static inline void nerd_sha_ll_fill_text_block_sha256_double(const void *input_text) +{ + uint32_t *data_words = (uint32_t *)input_text; + uint32_t *reg_addr_buf = (uint32_t *)(SHA_TEXT_BASE); + +#if 0 + //No change + reg_addr_buf[0] = data_words[0]; + reg_addr_buf[1] = data_words[1]; + reg_addr_buf[2] = data_words[2]; + reg_addr_buf[3] = data_words[3]; + reg_addr_buf[4] = data_words[4]; + reg_addr_buf[5] = data_words[5]; + reg_addr_buf[6] = data_words[6]; + reg_addr_buf[7] = data_words[7]; +#endif + reg_addr_buf[8] = 0x80000000; + reg_addr_buf[9] = 0x00000000; + reg_addr_buf[10] = 0x00000000; + reg_addr_buf[11] = 0x00000000; + reg_addr_buf[12] = 0x00000000; + reg_addr_buf[13] = 0x00000000; + reg_addr_buf[14] = 0x00000000; + reg_addr_buf[15] = 0x00000100; +} +#endif + +IRAM_ATTR void HwShaTest() +{ + uint8_t interResult[64]; + uint8_t midstate[32]; + uint8_t hash[64]; + memset(interResult, 0, sizeof(interResult)); + interResult[32] = 0x80; + interResult[62] = 0x01; + interResult[63] = 0x00; + + memset(interResult_aligned, 0, sizeof(interResult_aligned)); + interResult_aligned[32] = 0x80; + interResult_aligned[62] = 0x01; + interResult_aligned[63] = 0x00; + + uint32_t bake[16]; + + uint32_t time_start = micros(); + int test_count = 1000000; + +#if 0 + //Generic software + //esp32s3 16KH/s + //esp32D 9.5KH/s + test_count = 20000; + mbedtls_sha256_context ctx; + mbedtls_sha256_init(&ctx); + for (int i = 0; i < test_count; ++i) + { + mbedtls_sha256_starts_ret(&ctx,0); + mbedtls_sha256_update_ret(&ctx, s_test_buffer, 80); + mbedtls_sha256_finish_ret(&ctx, interResult); + + mbedtls_sha256_starts_ret(&ctx,0); + mbedtls_sha256_update_ret(&ctx, interResult, 32); + mbedtls_sha256_finish_ret(&ctx, hash); + } + mbedtls_sha256_free(&ctx); +#endif + +#if 1 + //nerdSha256 + //ESP32 39KH/s + //ESP32S3 39.01KH/s + test_count = 100000; + nerdSHA256_context ctx; + nerd_mids(ctx.digest, s_test_buffer); + for (int i = 0; i < test_count; ++i) + { + nerd_sha256d(&ctx, s_test_buffer+64, hash); + } +#endif + +#if 0 + //nerdSha256 bake + //ESP32 : 41KH/s + //ESP32S3 : 42.32KH/s + test_count = 100000; + nerdSHA256_context ctx; + nerd_mids(&ctx, s_test_buffer); + nerd_sha256_bake(ctx.digest, s_test_buffer+64, bake); //15 words + for (int i = 0; i < test_count; ++i) + { + nerd_sha256d_baked(ctx.digest, s_test_buffer+64, bake, hash); + } +#endif + +#if 0 + //Hardware high level 62KH/s + esp_sha_acquire_hardware(); + for (int i = 0; i < test_count; ++i) + { + esp_sha_dma(SHA2_256, s_test_buffer+64, 64, s_test_buffer, 64, true); + esp_sha_read_digest_state(SHA2_256, interResult); + esp_sha_dma(SHA2_256, 0, 0, interResult, 64, true); + esp_sha_read_digest_state(SHA2_256, hash); + } + esp_sha_release_hardware(); +#endif + +#if 0 + //ESP32D 5.50KH/s + test_count = 40000; + //esp_sha_lock_engine(SHA2_256); + for (int i = 0; i < test_count; ++i) + { + esp_sha(SHA2_256, s_test_buffer, 80, interResult); + esp_sha(SHA2_256, interResult, 32, hash); + } + //esp_sha_unlock_engine(SHA2_256); +#endif + +#if 0 + //ESP32D + //Invalid result!! + test_count = 100000; + esp_sha_lock_engine(SHA2_256); + for (int i = 0; i < test_count; ++i) + { + esp_sha_block(SHA2_256, s_test_buffer, true); + esp_sha_block(SHA2_256, s_test_buffer+64, false); + esp_sha_read_digest_state(SHA2_256, interResult); + esp_sha_block(SHA2_256, interResult, true); + esp_sha_read_digest_state(SHA2_256, hash); + } + esp_sha_unlock_engine(SHA2_256); +#endif + +#if 0 + //ESP32D Hardware SHA ~200KH/s + test_count = 50000; + periph_module_enable(PERIPH_SHA_MODULE); + uint8_t buffer_swap[128]; + for (int i = 0; i < 32; ++i) + ((uint32_t*)buffer_swap)[i] = __builtin_bswap32(((const uint32_t*)s_test_buffer)[i]); + + uint8_t inter_swap[64]; + for (int i = 0; i < 16; ++i) + ((uint32_t*)inter_swap)[i] = __builtin_bswap32(((const uint32_t*)interResult)[i]); + + for (int i = 0; i < test_count; ++i) + { + //sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); + nerd_sha_hal_wait_idle(); + nerd_sha_ll_fill_text_block_sha256(buffer_swap); + sha_ll_start_block(SHA2_256); + + //sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); + nerd_sha_hal_wait_idle(); + nerd_sha_ll_fill_text_block_sha256(buffer_swap+64); + sha_ll_continue_block(SHA2_256); + + nerd_sha_hal_wait_idle(); + sha_ll_load(SHA2_256); + //nerd_sha_ll_read_digest_swap(interResult); + + //sha_hal_hash_block(SHA2_256, interResult, 64/4, true); + nerd_sha_hal_wait_idle(); + nerd_sha_ll_fill_text_block_sha256_double(inter_swap); + sha_ll_start_block(SHA2_256); + + nerd_sha_hal_wait_idle(); + sha_ll_load(SHA2_256); + nerd_sha_ll_read_digest_swap(hash); + } +#endif + +#if 0 + //Hardware low level + midstate 156KH/s + esp_sha_acquire_hardware(); + sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); + sha_hal_read_digest(SHA2_256, midstate); + for (int i = 0; i < test_count; ++i) + { + sha_hal_write_digest(SHA2_256, midstate); + sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); + sha_hal_read_digest(SHA2_256, interResult); + sha_hal_hash_block(SHA2_256, interResult, 64/4, true); + sha_hal_read_digest(SHA2_256, hash); + } + esp_sha_release_hardware(); +#endif + +#if 0 + //Hardware low level + midstate + aligned 156KH/s (No sense) + esp_sha_acquire_hardware(); + sha_hal_hash_block(SHA2_256, s_test_buffer_aligned, 64/4, true); + sha_hal_read_digest(SHA2_256, midstate_aligned); + for (int i = 0; i < test_count; ++i) + { + sha_hal_write_digest(SHA2_256, midstate_aligned); + sha_hal_hash_block(SHA2_256, s_test_buffer_aligned+64, 64/4, false); + sha_hal_read_digest(SHA2_256, interResult_aligned); + sha_hal_hash_block(SHA2_256, interResult_aligned, 64/4, true); + sha_hal_read_digest(SHA2_256, hash_aligned); + } + esp_sha_release_hardware(); + memcpy(hash, hash_aligned, sizeof(hash_aligned)); +#endif + +#if 0 + //Hardware LL 162.43KH/s + esp_sha_acquire_hardware(); + //sha_hal_hash_block(SHA2_256, s_test_buffer, 64/4, true); + sha_hal_wait_idle(); + sha_ll_fill_text_block(s_test_buffer, 64/4); + sha_ll_start_block(SHA2_256); + + //sha_hal_read_digest(SHA2_256, midstate); + sha_ll_load(SHA2_256); + sha_hal_wait_idle(); + sha_ll_read_digest(SHA2_256, midstate, 256 / 32); + + for (int i = 0; i < test_count; ++i) + { + //sha_hal_write_digest(SHA2_256, midstate); + sha_ll_write_digest(SHA2_256, midstate, 256 / 32); + //nerd_sha_ll_write_digest_sha256(midstate); + + //sha_hal_hash_block(SHA2_256, s_test_buffer+64, 64/4, false); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); + //sha_ll_fill_text_block(s_test_buffer+64, 64/4); + nerd_sha_ll_fill_text_block_sha256(s_test_buffer+64); + sha_ll_continue_block(SHA2_256); + + //sha_hal_read_digest(SHA2_256, interResult); + sha_ll_load(SHA2_256); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); + //sha_ll_read_digest(SHA2_256, interResult, 256 / 32); + nerd_sha_ll_read_digest(interResult); + + //sha_hal_hash_block(SHA2_256, interResult, 64/4, true); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); + //sha_ll_fill_text_block(interResult, 64/4); + nerd_sha_ll_fill_text_block_sha256(interResult); + sha_ll_start_block(SHA2_256); + + //sha_hal_read_digest(SHA2_256, hash); + sha_ll_load(SHA2_256); + //sha_hal_wait_idle(); + nerd_sha_hal_wait_idle(); + //sha_ll_read_digest(SHA2_256, hash, 256 / 32); + nerd_sha_ll_read_digest(hash); + } + esp_sha_release_hardware(); +#endif + +#if 0 + //DMA hash + uint8_t* dma_cap_buf = (uint8_t*)heap_caps_malloc(128, MALLOC_CAP_8BIT|MALLOC_CAP_DMA|MALLOC_CAP_INTERNAL); + memcpy(dma_cap_buf, s_test_buffer, 128); + + uint8_t* dma_cap_inter = (uint8_t*)heap_caps_malloc(64, MALLOC_CAP_8BIT|MALLOC_CAP_DMA|MALLOC_CAP_INTERNAL); + memcpy(dma_cap_inter, interResult, 64); + + uint8_t* dma_cap_hash = (uint8_t*)heap_caps_malloc(32, MALLOC_CAP_8BIT|MALLOC_CAP_DMA|MALLOC_CAP_INTERNAL); + + memset(&s_dma_descr_input, 0, sizeof(lldesc_t)); + memset(&s_dma_descr_buf, 0, sizeof(lldesc_t)); + memset(&s_dma_descr_inter, 0, sizeof(lldesc_t)); + + + s_dma_descr_input.length = 64; + s_dma_descr_input.size = 64; + s_dma_descr_input.owner = 1; + s_dma_descr_input.eof = 1; + s_dma_descr_input.buf = dma_cap_buf+64; + + s_dma_descr_buf.length = 64; + s_dma_descr_buf.size = 64; + s_dma_descr_buf.owner = 1; + s_dma_descr_buf.buf = dma_cap_buf; + s_dma_descr_buf.eof = 0; + s_dma_descr_buf.empty = (uint32_t)(&s_dma_descr_input); + + s_dma_descr_inter.length = 64; + s_dma_descr_inter.size = 64; + s_dma_descr_inter.owner = 1; + s_dma_descr_inter.buf = dma_cap_inter; + s_dma_descr_inter.eof = 1; + + //49.83KH/s + esp_sha_acquire_hardware(); + for (int i = 0; i < test_count; ++i) + { + esp_crypto_shared_gdma_start(&s_dma_descr_buf, NULL, GDMA_TRIG_PERIPH_SHA); + sha_hal_hash_dma(SHA2_256, 2, true); + sha_hal_wait_idle(); + esp_sha_read_digest_state(SHA2_256, dma_cap_inter); + + esp_crypto_shared_gdma_start(&s_dma_descr_inter, NULL, GDMA_TRIG_PERIPH_SHA); + sha_hal_hash_dma(SHA2_256, 1, true); + sha_hal_wait_idle(); + esp_sha_read_digest_state(SHA2_256, hash); + } + esp_sha_release_hardware(); +#endif + + uint32_t time_end = micros(); + double hash_rate = ((double)test_count * 1000000) / (double)(time_end - time_start); + Serial.print("Hashrate="); + Serial.print(hash_rate/1000); + Serial.println("KH/s"); + + Serial.print("interResult: "); + for (size_t i = 0; i < 32; i++) + Serial.printf("%02x", interResult[i]); + Serial.println(""); + + Serial.print("hash: "); + for (size_t i = 0; i < 32; i++) + Serial.printf("%02x", hash[i]); + Serial.println(""); + + //should be + //54cd9f1ebc3db9a626688e5bb91d808abbd4079b2cba7f43fa08bfced300ef19 + //6fa464b007f2d577edfa5dfe9dfc3f9209f36d1a6711d314ea68ccdd03000000 +} + +#endif \ No newline at end of file diff --git a/src/ShaTests/nerdSHA_HWTest.h b/src/ShaTests/nerdSHA_HWTest.h new file mode 100644 index 0000000..79e1351 --- /dev/null +++ b/src/ShaTests/nerdSHA_HWTest.h @@ -0,0 +1,5 @@ +#ifdef HW_SHA256_TEST + +IRAM_ATTR void HwShaTest(); + +#endif \ No newline at end of file diff --git a/src/monitor.cpp b/src/monitor.cpp index ffed616..c603acf 100644 --- a/src/monitor.cpp +++ b/src/monitor.cpp @@ -157,6 +157,8 @@ String getBTCprice(void){ if (WiFi.status() != WL_CONNECTED) return "$" + String(bitcoin_price); HTTPClient http; + bool priceUpdated = false; + try { http.begin(getBTCAPI); int httpCode = http.GET(); @@ -167,9 +169,7 @@ String getBTCprice(void){ DynamicJsonDocument doc(1024); deserializeJson(doc, payload); - if (doc.containsKey("bpi") && doc["bpi"].containsKey("USD")) { - bitcoin_price = doc["bpi"]["USD"]["rate_float"].as(); - } + if (doc.containsKey("last_trade_price")) bitcoin_price = doc["last_trade_price"]; doc.clear(); diff --git a/src/monitor.h b/src/monitor.h index ffc4a26..e4a0d3d 100644 --- a/src/monitor.h +++ b/src/monitor.h @@ -14,7 +14,8 @@ //API BTC price (Update to USDT cus it's more liquidity and flow price updade) -#define getBTCAPI "https://api.coindesk.com/v1/bpi/currentprice.json" +//#define getBTCAPI "https://api.coindesk.com/v1/bpi/currentprice.json" -- doesn't work anymore +#define getBTCAPI "https://api.blockchain.com/v3/exchange/tickers/BTC-USDT" #define UPDATE_BTC_min 1 diff --git a/src/wManager.cpp b/src/wManager.cpp index 9e4c16f..992845e 100644 --- a/src/wManager.cpp +++ b/src/wManager.cpp @@ -204,7 +204,9 @@ void init_WifiManager() wm.setConfigPortalBlocking(true); //Hacemos que el portal SI bloquee el firmware drawSetupScreen(); mMonitor.NerdStatus = NM_Connecting; - if (!wm.startConfigPortal(DEFAULT_SSID, DEFAULT_WIFIPW)) + wm.startConfigPortal(DEFAULT_SSID, DEFAULT_WIFIPW); + + if (shouldSaveConfig) { //Could be break forced after edditing, so save new config Serial.println("failed to connect and hit timeout"); From 698e6b7317a3b23ce566e6c2f417df30a6a7593e Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 00:29:55 +0200 Subject: [PATCH 27/39] Add automatic firmware factory and update file generation - Added post_build_merge.py script with ESP32 variant auto-detection - Integrated post-build script to all platformio.ini environments - Generates factory.bin (complete from 0x0) and firmware.bin (app only from 0x10000) - Auto-detects ESP32/S2/S3/C3 by analyzing bootloader signature - Organizes output by firmware version in firmware/{version}/ folders --- platformio.ini | 41 ++++++++- post_build_merge.py | 202 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 242 insertions(+), 1 deletion(-) create mode 100644 post_build_merge.py diff --git a/platformio.ini b/platformio.ini index 6769fbf..c936d52 100644 --- a/platformio.ini +++ b/platformio.ini @@ -11,12 +11,15 @@ [platformio] globallib_dir = lib -default_envs = NerdminerV2, NerdminerV2-T-HMI, wt32-sc01, wt32-sc01-plus, han_m5stack, M5Stick-C, esp32cam, ESP32-2432S028R, ESP32_2432S028_2USB, Lilygo-T-Embed, ESP32-devKitv1, NerdminerV2-S3-DONGLE, NerdminerV2-S3-GEEK, NerdminerV2-S3-AMOLED, NerdminerV2-S3-AMOLED-TOUCH, NerdminerV2-T-QT, NerdminerV2-T-Display_V1, ESP32-2432S028R, M5-StampS3, ESP32-S3-devKitv1, ESP32-S3-mini-wemos, ESP32-S2-mini-wemos, ESP32-S3-mini-weact, ESP32-D0WD-V3-weact, ESP32-C3-devKitmv1, ESP32-C3-super-mini +default_envs = NerdminerV2 ; NerdminerV2-T-HMI, wt32-sc01, wt32-sc01-plus, han_m5stack, M5Stick-C, esp32cam, ESP32-2432S028R, ESP32_2432S028_2USB, Lilygo-T-Embed, ESP32-devKitv1, NerdminerV2-S3-DONGLE, NerdminerV2-S3-GEEK, NerdminerV2-S3-AMOLED, NerdminerV2-S3-AMOLED-TOUCH, NerdminerV2-T-QT, NerdminerV2-T-Display_V1, ESP32-2432S028R, M5-StampS3, ESP32-S3-devKitv1, ESP32-S3-mini-wemos, ESP32-S2-mini-wemos, ESP32-S3-mini-weact, ESP32-D0WD-V3-weact, ESP32-C3-super-mini [env:M5Stick-C-Plus2] platform = espressif32@6.6.0 board = m5stick-c-plus2 framework = arduino +extra_scripts = + pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -50,6 +53,7 @@ board = m5stick-c framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -81,6 +85,9 @@ lib_ignore = platform = espressif32@6.6.0 board = m5stick-c framework = arduino +extra_scripts = + pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -115,6 +122,7 @@ board = esp-wrover-kit framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -149,6 +157,7 @@ board = esp32-s3-devkitc-1 framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -186,6 +195,7 @@ board = lolin_s3_mini framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -220,6 +230,7 @@ board = m5stack-core-esp32 framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -252,6 +263,7 @@ board = lolin_s2_mini framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -283,6 +295,7 @@ board = lolin_s3_mini framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -320,6 +333,7 @@ board = esp32dev framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -351,6 +365,7 @@ board = seeed_xiao_esp32c3 framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -383,6 +398,7 @@ board = esp32-c3-devkitm-1 framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -414,6 +430,9 @@ lib_ignore = platform = espressif32@6.6.0 board = esp32-c3-devkitm-1 framework = arduino +extra_scripts = + pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -443,6 +462,9 @@ lib_ignore = platform = espressif32@6.6.0 board = esp32-s3-0.42oled framework = arduino +extra_scripts = + pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -474,6 +496,7 @@ board = esp32-s3-devkitc-1 framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -508,6 +531,8 @@ board = esp32-s3-devkitc-1 framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -544,6 +569,7 @@ board = esp32-s3-devkitc-1 framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -580,6 +606,7 @@ board = esp32dev framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -609,6 +636,7 @@ board = esp32dev ;esp-wrover-kit framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -641,6 +669,7 @@ board = lilygo-t-amoled framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py board_build.partitions = huge_app.csv build_flags = -DNERDMINER_S3_AMOLED @@ -665,6 +694,7 @@ board = lilygo-t-amoled framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py board_build.partitions = huge_app.csv build_flags = -DNERDMINER_S3_AMOLED @@ -689,6 +719,7 @@ board = esp32-s3-devkitc-1 framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py board_build.partitions = huge_app.csv build_flags = -DNERDMINER_S3_DONGLE @@ -714,6 +745,7 @@ board = esp32-s3-devkitc-1 framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py board_build.partitions = huge_app.csv build_flags = -DNERDMINER_S3_GEEK @@ -742,6 +774,7 @@ board = esp32cam framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -773,6 +806,7 @@ board = esp32-s3-t-qt-pro framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time @@ -804,6 +838,7 @@ board = esp32dev framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_speed = 115200 upload_speed = 921600 ;build_type = debug @@ -854,6 +889,7 @@ board = esp32dev framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_speed = 115200 monitor_filters = esp32_exception_decoder @@ -909,6 +945,7 @@ board = lilygo-t-hmi framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py board_build.partitions = default_16MB.csv monitor_filters = esp32_exception_decoder @@ -948,6 +985,7 @@ board = ttgo-lora32-v1 framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_speed = 115200 upload_speed = 115200 board_build.partitions = huge_app.csv @@ -971,6 +1009,7 @@ board = m5stack-stamps3 framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_filters = esp32_exception_decoder time diff --git a/post_build_merge.py b/post_build_merge.py new file mode 100644 index 0000000..01d947c --- /dev/null +++ b/post_build_merge.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +""" +Post-build script for NerdMiner_v2 firmware merging +Ported from merge_firmware_universal.js - detects ESP32 type from bootloader signature +Generates factory (0x0) and update (0x10000) files automatically after build + +Usage: Add to platformio.ini environments: +extra_scripts = + pre:auto_firmware_version.py + post:post_build_merge.py +""" + +import os +import subprocess +from pathlib import Path + +Import("env") + +def detect_esp32_type(bootloader_path): + """Detect ESP32 type by analyzing bootloader signature - ported from JS version""" + try: + with open(bootloader_path, 'rb') as f: + bootloader_data = f.read() + + if len(bootloader_data) < 13: + print("Warning: Bootloader too small, defaulting to ESP32") + return 'ESP32' + + chip_id = bootloader_data[12] # Chip ID at byte 12 + size = len(bootloader_data) + + # Detect ESP32 type based on bootloader signature (from JS version) + if chip_id == 0x09 and size >= 15000: + return 'ESP32-S3' + elif chip_id == 0x05 and size >= 13000 and size < 14000: + return 'ESP32-C3' + elif chip_id == 0x02 and size >= 13000 and size < 15000: + return 'ESP32-S2' + elif chip_id == 0x00 and size >= 17000: + return 'ESP32' + else: + # Fallback: try to detect by size (from JS version) + if size >= 17000: + return 'ESP32' + elif size >= 15000: + return 'ESP32-S3' + elif size >= 13600: + return 'ESP32-S2' + elif size >= 13000: + return 'ESP32-C3' + else: + return 'ESP32' # Default fallback + + except Exception as e: + print(f"Warning: Could not analyze bootloader ({e}), defaulting to ESP32") + return 'ESP32' + +def get_memory_layout(esp_type): + """Get memory addresses for each ESP32 variant - ported from JS version""" + if esp_type == 'ESP32-C3': + # ESP32-C3: Bootloader at 0x0000, no boot_app0 + return { + 'bootloader': 0x0000, + 'partitions': 0x8000, + 'firmware': 0x10000 + } + elif esp_type == 'ESP32-S2': + # ESP32-S2: Bootloader at 0x1000, boot_app0 at 0xE000 + return { + 'bootloader': 0x1000, + 'partitions': 0x8000, + 'boot_app0': 0xE000, + 'firmware': 0x10000 + } + elif esp_type == 'ESP32-S3': + # ESP32-S3: Bootloader at 0x0000, no boot_app0 + return { + 'bootloader': 0x0000, + 'partitions': 0x8000, + 'firmware': 0x10000 + } + else: + # ESP32 Classic: Bootloader at 0x1000, boot_app0 at 0xE000 + return { + 'bootloader': 0x1000, + 'partitions': 0x8000, + 'boot_app0': 0xE000, + 'firmware': 0x10000 + } + +def get_firmware_version(): + """Get firmware version from git""" + try: + result = subprocess.run(["git", "describe", "--tags", "--dirty"], + stdout=subprocess.PIPE, text=True, + cwd=env.subst("$PROJECT_DIR")) + if result.returncode == 0: + version = result.stdout.strip() + # Clean up version string + version = version.replace('Release', '').replace('release', '') + return version + except: + pass + return "dev" + +def create_merged_firmware(source, target, env): + """Main function called after firmware build""" + + # Get build info + project_dir = Path(env.subst("$PROJECT_DIR")) + build_dir = Path(env.subst("$BUILD_DIR")) + env_name = env.subst("$PIOENV") + version = get_firmware_version() + + print(f"\nšŸ”Ø Building firmware files for {env_name}...") + + # File paths in build directory + bootloader_file = build_dir / "bootloader.bin" + partitions_file = build_dir / "partitions.bin" + boot_app0_file = build_dir / "boot_app0.bin" + firmware_file = build_dir / "firmware.bin" + + # Check if firmware exists + if not firmware_file.exists(): + print(f"āŒ Firmware file not found: {firmware_file}") + return + + # Auto-detect ESP32 type + esp_type = detect_esp32_type(bootloader_file) if bootloader_file.exists() else 'ESP32' + addresses = get_memory_layout(esp_type) + + print(f"šŸ“± Detected: {esp_type} (bootloader at 0x{addresses['bootloader']:04X})") + + # Output directory with version subfolder + version_dir = project_dir / "firmware" / version + version_dir.mkdir(parents=True, exist_ok=True) + + # Output filenames (simplified names) + factory_file = version_dir / f"{env_name}_factory.bin" + update_file = version_dir / f"{env_name}_firmware.bin" + + # 1. Create update file (just copy firmware.bin) + try: + import shutil + shutil.copy2(firmware_file, update_file) + print(f"āœ… Firmware: {update_file.name}") + except Exception as e: + print(f"āŒ Error creating firmware file: {e}") + return + + # 2. Create factory file (merged) + try: + # Create merged binary - 4MB filled with 0xFF + merged_size = 0x400000 # 4MB + merged_data = bytearray([0xFF] * merged_size) + max_address = 0 + + # Files to merge + files_to_merge = { + 'bootloader': bootloader_file, + 'partitions': partitions_file, + 'firmware': firmware_file + } + + # Add boot_app0 for ESP32 Classic and S2 + if 'boot_app0' in addresses: + files_to_merge['boot_app0'] = boot_app0_file + + # Merge files at their respective addresses + for file_type, file_path in files_to_merge.items(): + if file_path.exists(): + address = addresses[file_type] + with open(file_path, 'rb') as f: + data = f.read() + + print(f" šŸ“„ {file_type} at 0x{address:06X}: {len(data)} bytes") + + if address + len(data) <= merged_size: + merged_data[address:address+len(data)] = data + max_address = max(max_address, address + len(data)) + else: + print(f"āš ļø Warning: {file_type} too large, truncating") + remaining = merged_size - address + merged_data[address:address+remaining] = data[:remaining] + max_address = merged_size + else: + print(f"āš ļø Warning: {file_type} not found: {file_path}") + + # Find actual end of data (round up to 4K boundary) + actual_end = ((max_address + 4095) // 4096) * 4096 + + # Write factory file + with open(factory_file, 'wb') as f: + f.write(merged_data[:actual_end]) + + print(f"āœ… Factory: {factory_file.name} ({actual_end} bytes)") + + except Exception as e: + print(f"āŒ Error creating factory file: {e}") + +# Add post-build hook +env.AddPostAction("$BUILD_DIR/firmware.bin", create_merged_firmware) \ No newline at end of file From 3e9f7f361bbefc259659e3ca9770fdafa6211cfb Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 00:46:05 +0200 Subject: [PATCH 28/39] Update GitHub Actions to use factory and firmware files only - Modified release and prerelease workflows to use firmware/ folder instead of separate binaries - Removed archiving of individual bootloader, partitions, and boot_app0 files - Releases now contain only factory.bin (complete) and firmware.bin (update) files - Simplified workflow by removing binary renaming steps --- .github/workflows/prerelease.yml | 27 +++++++-------------------- .github/workflows/release.yml | 27 +++++++-------------------- 2 files changed, 14 insertions(+), 40 deletions(-) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 968e7dd..eec3048 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -25,17 +25,11 @@ jobs: run: pip install --upgrade platformio - name: Build PlatformIO Project run: pio run - - name: Archive built binaries + - name: Archive firmware files uses: actions/upload-artifact@v4 with: - name: built-binaries-${{ github.sha }} - path: .pio/build/*/*.bin - if-no-files-found: error - - name: Archive bootapp binary - uses: actions/upload-artifact@v4 - with: - name: bootapp-binary-${{ github.sha }} - path: ~/.platformio/packages/framework-arduinoespressif32/tools/partitions/boot_app0.bin + name: firmware-files-${{ github.sha }} + path: firmware/ if-no-files-found: error - name: Get version id: version_step @@ -49,18 +43,11 @@ jobs: runs-on: ubuntu-latest if: github.ref == 'refs/heads/dev' steps: - - name: Download built binaries + - name: Download firmware files uses: actions/download-artifact@v4 with: - path: ${{ github.workspace }}/binaries - name: built-binaries-${{ github.sha }} - - name: Download bootapp binary - uses: actions/download-artifact@v4 - with: - path: ${{ github.workspace }}/binaries - name: bootapp-binary-${{ github.sha }} - - name: Rename built binaries - run: find ${{ github.workspace }}/binaries -mindepth 2 -maxdepth 3 -type f -name "*.bin" -exec sh -c 'cp "$0" "${{ github.workspace }}/binaries/$(basename $(dirname "$0"))_$(basename $0)"' {} \; + path: ${{ github.workspace }}/ + name: firmware-files-${{ github.sha }} - name: Pre-release uses: softprops/action-gh-release@v2 with: @@ -70,5 +57,5 @@ jobs: generate_release_notes: true prerelease: true fail_on_unmatched_files: true - files: ${{ github.workspace }}/binaries/*.bin + files: ${{ github.workspace }}/firmware/**/*.bin diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4f32f58..6fd47a1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -25,17 +25,11 @@ jobs: run: pip install --upgrade platformio - name: Build PlatformIO Project run: pio run - - name: Archive built binaries + - name: Archive firmware files uses: actions/upload-artifact@v4 with: - name: built-binaries-${{ github.sha }} - path: .pio/build/*/*.bin - if-no-files-found: error - - name: Archive bootapp binary - uses: actions/upload-artifact@v4 - with: - name: bootapp-binary-${{ github.sha }} - path: ~/.platformio/packages/framework-arduinoespressif32/tools/partitions/boot_app0.bin + name: firmware-files-${{ github.sha }} + path: firmware/ if-no-files-found: error - name: Get version id: version_step @@ -59,18 +53,11 @@ jobs: issue-title: "Releasing Nerdminer version ${{ needs.build.outputs.version }}" issue-body: "Please approve or deny the release of ${{ needs.build.outputs.version }}." exclude-workflow-initiator-as-approver: false - - name: Download built binaries + - name: Download firmware files uses: actions/download-artifact@v4 with: - path: ${{ github.workspace }}/binaries - name: built-binaries-${{ github.sha }} - - name: Download bootapp binary - uses: actions/download-artifact@v4 - with: - path: ${{ github.workspace }}/binaries - name: bootapp-binary-${{ github.sha }} - - name: Rename built binaries - run: find ${{ github.workspace }}/binaries -mindepth 2 -maxdepth 3 -type f -name "*.bin" -exec sh -c 'cp "$0" "${{ github.workspace }}/binaries/$(basename $(dirname "$0"))_$(basename $0)"' {} \; + path: ${{ github.workspace }}/ + name: firmware-files-${{ github.sha }} - name: Release uses: softprops/action-gh-release@v2 with: @@ -80,5 +67,5 @@ jobs: generate_release_notes: true make_latest: true fail_on_unmatched_files: true - files: ${{ github.workspace }}/binaries/*.bin + files: ${{ github.workspace }}/firmware/**/*.bin From da04042ae5235f5b0441477e597e75600eaba14d Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 01:02:29 +0200 Subject: [PATCH 29/39] Fix workflow branches to match repository structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - prerelease.yml now triggers on pre-release branch instead of dev - release.yml now triggers on main branch instead of master - Matches develop → pre-release → main workflow --- .github/workflows/prerelease.yml | 4 ++-- .github/workflows/release.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index eec3048..9727526 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -3,7 +3,7 @@ name: Nerdminer Pre-Release on: push: branches: - - dev + - pre-release jobs: build: @@ -41,7 +41,7 @@ jobs: contents: write discussions: write runs-on: ubuntu-latest - if: github.ref == 'refs/heads/dev' + if: github.ref == 'refs/heads/pre-release' steps: - name: Download firmware files uses: actions/download-artifact@v4 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6fd47a1..5884ce2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -3,7 +3,7 @@ name: Nerdminer Release on: push: branches: - - master + - main jobs: build: @@ -42,7 +42,7 @@ jobs: discussions: write issues: write runs-on: ubuntu-latest - if: github.ref == 'refs/heads/master' + if: github.ref == 'refs/heads/main' steps: - uses: trstringer/manual-approval@v1 timeout-minutes: 120 From 21f2a1a3d9ac78f0423bb07f9e2855b935bf7fb8 Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 01:23:24 +0200 Subject: [PATCH 30/39] Fix workflow branches to match repository structure --- .github/workflows/prerelease.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 9727526..216da5c 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -3,7 +3,7 @@ name: Nerdminer Pre-Release on: push: branches: - - pre-release + - prerelease jobs: build: @@ -41,14 +41,14 @@ jobs: contents: write discussions: write runs-on: ubuntu-latest - if: github.ref == 'refs/heads/pre-release' + if: github.ref == 'refs/heads/prerelease' steps: - name: Download firmware files uses: actions/download-artifact@v4 with: path: ${{ github.workspace }}/ name: firmware-files-${{ github.sha }} - - name: Pre-release + - name: Prerelease uses: softprops/action-gh-release@v2 with: name: nerdminer-prerelease-${{ needs.build.outputs.version }} From c640f9c420f73f065f883ef1e45d86f1dbf6fd26 Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 08:45:29 +0200 Subject: [PATCH 31/39] Added some platformio.ini boards and post scripts --- platformio.ini | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/platformio.ini b/platformio.ini index fb5b62e..6b756c9 100644 --- a/platformio.ini +++ b/platformio.ini @@ -11,7 +11,7 @@ [platformio] globallib_dir = lib -default_envs = NerdminerV2 ; NerdminerV2-T-HMI, wt32-sc01, wt32-sc01-plus, han_m5stack, M5Stick-C, esp32cam, ESP32-2432S028R, ESP32_2432S028_2USB, Lilygo-T-Embed, ESP32-devKitv1, NerdminerV2-S3-DONGLE, NerdminerV2-S3-GEEK, NerdminerV2-S3-AMOLED, NerdminerV2-S3-AMOLED-TOUCH, NerdminerV2-T-QT, NerdminerV2-T-Display_V1, ESP32-2432S028R, M5-StampS3, ESP32-S3-devKitv1, ESP32-S3-mini-wemos, ESP32-S2-mini-wemos, ESP32-S3-mini-weact, ESP32-D0WD-V3-weact, ESP32-C3-super-mini +default_envs = NerdminerV2; NerdminerV2-T-HMI, wt32-sc01, wt32-sc01-plus, han_m5stack, M5Stick-C, M5Stick-C-Plus2, M5Stick-CPlus, esp32cam, ESP32-2432S028R, ESP32_2432S028_2USB, Lilygo-T-Embed, ESP32-devKitv1, NerdminerV2-S3-DONGLE, NerdminerV2-S3-GEEK, NerdminerV2-S3-AMOLED, NerdminerV2-S3-AMOLED-TOUCH, NerdminerV2-T-QT, NerdminerV2-T-Display_V1, TTGO-T-Display, M5-StampS3, ESP32-S3-devKitv1, ESP32-S3-mini-wemos, ESP32-S2-mini-wemos, ESP32-S3-mini-weact, ESP32-D0WD-V3-weact, ESP32-C3-super-mini, ESP32-C3-devKitmv1, ESP32-C3-042-OLED, ESP32-S3-042-OLED, ESP32-C3-spotpear, esp32-s3-devkitc1-n32r8 [env:M5Stick-C-Plus2] platform = espressif32@6.6.0 @@ -558,6 +558,7 @@ board = esp32-s3-devkitc1-n32r8 framework = arduino extra_scripts = pre:auto_firmware_version.py + post:post_build_merge.py monitor_speed = 115200 upload_speed = 115200 From 4255fe939e474e334de945752737ec5e69275e0b Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 08:49:20 +0200 Subject: [PATCH 32/39] Fix firmware file pattern in GitHub Actions workflows - Changed from firmware/**/*.bin to firmware/*/*.bin for better compatibility - Resolves issue where files in firmware/dev/ were not being found - Affects both release and prerelease workflows --- .github/workflows/prerelease.yml | 2 +- .github/workflows/release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 216da5c..3418615 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -57,5 +57,5 @@ jobs: generate_release_notes: true prerelease: true fail_on_unmatched_files: true - files: ${{ github.workspace }}/firmware/**/*.bin + files: ${{ github.workspace }}/firmware/*/*.bin diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5884ce2..353489a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -67,5 +67,5 @@ jobs: generate_release_notes: true make_latest: true fail_on_unmatched_files: true - files: ${{ github.workspace }}/firmware/**/*.bin + files: ${{ github.workspace }}/firmware/*/*.bin From 45346fe2174c69bef0688eaa5aabd517351c63a3 Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 09:27:29 +0200 Subject: [PATCH 33/39] fix env --- platformio.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platformio.ini b/platformio.ini index 6b756c9..d0d62db 100644 --- a/platformio.ini +++ b/platformio.ini @@ -11,7 +11,7 @@ [platformio] globallib_dir = lib -default_envs = NerdminerV2; NerdminerV2-T-HMI, wt32-sc01, wt32-sc01-plus, han_m5stack, M5Stick-C, M5Stick-C-Plus2, M5Stick-CPlus, esp32cam, ESP32-2432S028R, ESP32_2432S028_2USB, Lilygo-T-Embed, ESP32-devKitv1, NerdminerV2-S3-DONGLE, NerdminerV2-S3-GEEK, NerdminerV2-S3-AMOLED, NerdminerV2-S3-AMOLED-TOUCH, NerdminerV2-T-QT, NerdminerV2-T-Display_V1, TTGO-T-Display, M5-StampS3, ESP32-S3-devKitv1, ESP32-S3-mini-wemos, ESP32-S2-mini-wemos, ESP32-S3-mini-weact, ESP32-D0WD-V3-weact, ESP32-C3-super-mini, ESP32-C3-devKitmv1, ESP32-C3-042-OLED, ESP32-S3-042-OLED, ESP32-C3-spotpear, esp32-s3-devkitc1-n32r8 +default_envs = NerdminerV2 ; NerdminerV2-T-HMI, wt32-sc01, wt32-sc01-plus, han_m5stack, M5Stick-C, M5Stick-C-Plus2, M5Stick-CPlus, esp32cam, ESP32-2432S028R, ESP32_2432S028_2USB, Lilygo-T-Embed, ESP32-devKitv1, NerdminerV2-S3-DONGLE, NerdminerV2-S3-GEEK, NerdminerV2-S3-AMOLED, NerdminerV2-S3-AMOLED-TOUCH, NerdminerV2-T-QT, NerdminerV2-T-Display_V1, TTGO-T-Display, M5-StampS3, ESP32-S3-devKitv1, ESP32-S3-mini-wemos, ESP32-S2-mini-wemos, ESP32-S3-mini-weact, ESP32-D0WD-V3-weact, ESP32-C3-super-mini, ESP32-C3-devKitmv1, ESP32-C3-042-OLED, ESP32-S3-042-OLED, ESP32-C3-spotpear, esp32-s3-devkitc1-n32r8 [env:M5Stick-C-Plus2] platform = espressif32@6.6.0 From 00942251dfd3504c81b825ff084c4166e6b52e99 Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 11:38:40 +0200 Subject: [PATCH 34/39] Add debugging to prerelease workflow - Added debug step to show all generated .bin files - Check directory structure and firmware folder contents - Help troubleshoot why firmware files are not being found --- .github/workflows/prerelease.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 3418615..8abf4c5 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -25,6 +25,19 @@ jobs: run: pip install --upgrade platformio - name: Build PlatformIO Project run: pio run + - name: Debug - Check generated files + run: | + echo "=== All .bin files found ===" + find . -name "*.bin" -type f + echo "=== Directory structure ===" + ls -la + echo "=== .pio/build contents ===" + find .pio/build -name "*.bin" -type f 2>/dev/null || echo "No .pio/build files" + echo "=== firmware directory ===" + ls -la firmware/ 2>/dev/null || echo "No firmware directory" + find firmware -name "*.bin" -type f 2>/dev/null || echo "No firmware .bin files" + echo "=== Script check ===" + ls -la post_build_merge.py - name: Archive firmware files uses: actions/upload-artifact@v4 with: From 210275a3e86b025f1b180e3e5630f73949e54799 Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 13:28:26 +0200 Subject: [PATCH 35/39] Add debugging to prerelease download step - Debug downloaded artifact contents in prerelease job - Check if firmware files are correctly downloaded from build job - Help identify path issues between build and release steps --- .github/workflows/prerelease.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 8abf4c5..eb1f989 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -61,6 +61,15 @@ jobs: with: path: ${{ github.workspace }}/ name: firmware-files-${{ github.sha }} + - name: Debug - Check downloaded files + run: | + echo "=== Working directory ===" + pwd + ls -la + echo "=== All .bin files after download ===" + find . -name "*.bin" -type f + echo "=== firmware directory after download ===" + find firmware -type f 2>/dev/null || echo "No firmware directory found" - name: Prerelease uses: softprops/action-gh-release@v2 with: From 64fd6fc6d00b69ce865886aeb366d35aa1436a05 Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 13:38:00 +0200 Subject: [PATCH 36/39] Use broader file pattern to debug artifact discovery - Changed from firmware/*/*.bin to **/*.bin for testing - Should find any .bin files regardless of directory structure - Help identify if issue is path-specific or file generation --- .github/workflows/prerelease.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index eb1f989..de878a6 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -79,5 +79,5 @@ jobs: generate_release_notes: true prerelease: true fail_on_unmatched_files: true - files: ${{ github.workspace }}/firmware/*/*.bin + files: ${{ github.workspace }}/**/*.bin From 7f41ad63e902f6ae51f50ec8f2056c560e56cdaf Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 13:45:19 +0200 Subject: [PATCH 37/39] test envs --- platformio.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platformio.ini b/platformio.ini index d0d62db..c3db3f3 100644 --- a/platformio.ini +++ b/platformio.ini @@ -11,7 +11,7 @@ [platformio] globallib_dir = lib -default_envs = NerdminerV2 ; NerdminerV2-T-HMI, wt32-sc01, wt32-sc01-plus, han_m5stack, M5Stick-C, M5Stick-C-Plus2, M5Stick-CPlus, esp32cam, ESP32-2432S028R, ESP32_2432S028_2USB, Lilygo-T-Embed, ESP32-devKitv1, NerdminerV2-S3-DONGLE, NerdminerV2-S3-GEEK, NerdminerV2-S3-AMOLED, NerdminerV2-S3-AMOLED-TOUCH, NerdminerV2-T-QT, NerdminerV2-T-Display_V1, TTGO-T-Display, M5-StampS3, ESP32-S3-devKitv1, ESP32-S3-mini-wemos, ESP32-S2-mini-wemos, ESP32-S3-mini-weact, ESP32-D0WD-V3-weact, ESP32-C3-super-mini, ESP32-C3-devKitmv1, ESP32-C3-042-OLED, ESP32-S3-042-OLED, ESP32-C3-spotpear, esp32-s3-devkitc1-n32r8 +default_envs = NerdminerV2, NerdminerV2-T-HMI ; wt32-sc01, wt32-sc01-plus, han_m5stack, M5Stick-C, M5Stick-C-Plus2, M5Stick-CPlus, esp32cam, ESP32-2432S028R, ESP32_2432S028_2USB, Lilygo-T-Embed, ESP32-devKitv1, NerdminerV2-S3-DONGLE, NerdminerV2-S3-GEEK, NerdminerV2-S3-AMOLED, NerdminerV2-S3-AMOLED-TOUCH, NerdminerV2-T-QT, NerdminerV2-T-Display_V1, TTGO-T-Display, M5-StampS3, ESP32-S3-devKitv1, ESP32-S3-mini-wemos, ESP32-S2-mini-wemos, ESP32-S3-mini-weact, ESP32-D0WD-V3-weact, ESP32-C3-super-mini, ESP32-C3-devKitmv1, ESP32-C3-042-OLED, ESP32-S3-042-OLED, ESP32-C3-spotpear, esp32-s3-devkitc1-n32r8 [env:M5Stick-C-Plus2] platform = espressif32@6.6.0 From be141c5adfbed0504f9da873676b97c845eeccc6 Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 13:56:12 +0200 Subject: [PATCH 38/39] Bump version to V1.8.0 - Update CURRENT_VERSION in src/version.h - Reflects new release with automatic firmware generation features --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index dfff237..96ab477 100644 --- a/src/version.h +++ b/src/version.h @@ -1,6 +1,6 @@ #ifndef VERSION_H #define VERSION_H -#define CURRENT_VERSION "V1.7.0" +#define CURRENT_VERSION "V1.8.0" #endif // VERSION_H From 4033d58171423bc19451422d0d8cca6815429ca9 Mon Sep 17 00:00:00 2001 From: bitmaker Date: Tue, 2 Sep 2025 13:57:39 +0200 Subject: [PATCH 39/39] Update envs to build all boards - Update current env to build all boards --- platformio.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platformio.ini b/platformio.ini index c3db3f3..7a3abd0 100644 --- a/platformio.ini +++ b/platformio.ini @@ -11,7 +11,7 @@ [platformio] globallib_dir = lib -default_envs = NerdminerV2, NerdminerV2-T-HMI ; wt32-sc01, wt32-sc01-plus, han_m5stack, M5Stick-C, M5Stick-C-Plus2, M5Stick-CPlus, esp32cam, ESP32-2432S028R, ESP32_2432S028_2USB, Lilygo-T-Embed, ESP32-devKitv1, NerdminerV2-S3-DONGLE, NerdminerV2-S3-GEEK, NerdminerV2-S3-AMOLED, NerdminerV2-S3-AMOLED-TOUCH, NerdminerV2-T-QT, NerdminerV2-T-Display_V1, TTGO-T-Display, M5-StampS3, ESP32-S3-devKitv1, ESP32-S3-mini-wemos, ESP32-S2-mini-wemos, ESP32-S3-mini-weact, ESP32-D0WD-V3-weact, ESP32-C3-super-mini, ESP32-C3-devKitmv1, ESP32-C3-042-OLED, ESP32-S3-042-OLED, ESP32-C3-spotpear, esp32-s3-devkitc1-n32r8 +default_envs = NerdminerV2, NerdminerV2-T-HMI, wt32-sc01, wt32-sc01-plus, han_m5stack, M5Stick-C, M5Stick-C-Plus2, M5Stick-CPlus, esp32cam, ESP32-2432S028R, ESP32_2432S028_2USB, Lilygo-T-Embed, ESP32-devKitv1, NerdminerV2-S3-DONGLE, NerdminerV2-S3-GEEK, NerdminerV2-S3-AMOLED, NerdminerV2-S3-AMOLED-TOUCH, NerdminerV2-T-QT, NerdminerV2-T-Display_V1, TTGO-T-Display, M5-StampS3, ESP32-S3-devKitv1, ESP32-S3-mini-wemos, ESP32-S2-mini-wemos, ESP32-S3-mini-weact, ESP32-D0WD-V3-weact, ESP32-C3-super-mini, ESP32-C3-devKitmv1, ESP32-C3-042-OLED, ESP32-S3-042-OLED, ESP32-C3-spotpear, esp32-s3-devkitc1-n32r8 [env:M5Stick-C-Plus2] platform = espressif32@6.6.0