Browse Source

cpu: Minor optimization by checking H==0 before calling fulltest

Luke Dashjr 13 years ago
parent
commit
75cbffceac
5 changed files with 10 additions and 5 deletions
  1. 2 1
      sha256_4way.c
  2. 2 1
      sha256_altivec_4way.c
  3. 2 1
      sha256_sse2_amd64.c
  4. 2 1
      sha256_sse2_i386.c
  5. 2 1
      sha256_sse4_amd64.c

+ 2 - 1
sha256_4way.c

@@ -108,6 +108,7 @@ bool ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate,
 	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
+	uint32_t *hash32 = (uint32_t *)phash;
     unsigned int *nNonce_p = (unsigned int*)(pdata + 76);
 
 	pdata += 64;
@@ -133,7 +134,7 @@ bool ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate,
                 for (i = 0; i < 32/4; i++)
                     ((unsigned int*)phash)[i] = thash[i][j];
 
-		if (fulltest(phash, ptarget)) {
+		if (unlikely(hash32[7] == 0 && fulltest(phash, ptarget))) {
 					nonce += j;
 					*last_nonce = nonce;
 					*nNonce_p = nonce;

+ 2 - 1
sha256_altivec_4way.c

@@ -81,6 +81,7 @@ bool ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate,
 	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
+	uint32_t *hash32 = (uint32_t *)phash;
     unsigned int *nNonce_p = (unsigned int*)(pdata + 76);
 
 	pdata += 64;
@@ -105,7 +106,7 @@ bool ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate,
                 for (i = 0; i < 32/4; i++)
                     ((unsigned int*)phash)[i] = thash[i][j];
 
-		if (fulltest(phash, ptarget)) {
+		if (unlikely(hash32[7] == 0 && fulltest(phash, ptarget))) {
 					nonce += j;
 					*last_nonce = nonce;
 					*nNonce_p = nonce;

+ 2 - 1
sha256_sse2_amd64.c

@@ -57,6 +57,7 @@ bool scanhash_sse2_64(int thr_id, const unsigned char *pmidstate,
 	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
+	uint32_t *hash32 = (uint32_t *)phash;
     uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
     uint32_t m_midstate[8], m_w[16], m_w1[16];
     __m128i m_4w[64] __attribute__ ((aligned (0x100)));
@@ -115,7 +116,7 @@ bool scanhash_sse2_64(int thr_id, const unsigned char *pmidstate,
 		    *(uint32_t *)&(phash)[i*4] = mi.i[j];
 		}
 
-		if (fulltest(phash, ptarget)) {
+		if (unlikely(hash32[7] == 0 && fulltest(phash, ptarget))) {
 		     nonce += j;
 		     *last_nonce = nonce + 1;
 		     *nNonce_p = nonce;

+ 2 - 1
sha256_sse2_i386.c

@@ -57,6 +57,7 @@ bool scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
 	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
+	uint32_t *hash32 = (uint32_t *)phash;
     uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
     uint32_t m_midstate[8], m_w[16], m_w1[16];
     __m128i m_4w[64] __attribute__ ((aligned (0x100)));
@@ -117,7 +118,7 @@ bool scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
 		    *(uint32_t *)&(phash)[i*4] = mi.i[j];
 		}
 
-		if (fulltest(phash, ptarget)) {
+		if (unlikely(hash32[7] == 0 && fulltest(phash, ptarget))) {
 		     nonce += j;
 		     *last_nonce = nonce;
 		     *nNonce_p = nonce;

+ 2 - 1
sha256_sse4_amd64.c

@@ -56,6 +56,7 @@ bool scanhash_sse4_64(int thr_id, const unsigned char *pmidstate,
 	uint32_t max_nonce, uint32_t *last_nonce,
 	uint32_t nonce)
 {
+	uint32_t *hash32 = (uint32_t *)phash;
     uint32_t *nNonce_p = (uint32_t *)(pdata + 76);
     uint32_t m_midstate[8], m_w[16], m_w1[16];
     __m128i m_4w[64], m_4hash[64], m_4hash1[64];
@@ -114,7 +115,7 @@ bool scanhash_sse4_64(int thr_id, const unsigned char *pmidstate,
 		    *(uint32_t *)&(phash)[i*4] = mi.i[j];
 		}
 
-		if (fulltest(phash, ptarget)) {
+		if (unlikely(hash32[7] == 0 && fulltest(phash, ptarget))) {
 			nonce += j;
 			*last_nonce = nonce;
 			*nNonce_p = nonce;