13 years ago · 5e7fc0073b
--- a/configure.ac
+++ b/configure.ac
@@ -85,7 +85,6 @@ esac
 
				 
			
 
				 case $target in
			
 
				   *-*-mingw*)
			
 
				-    have_x86_64=false
			
 
				     have_win32=true
			
 
				     DLOPEN_FLAGS=""
			
 
				     WS2_LIBS="-lws2_32"
			
@@ -504,7 +503,7 @@ if test "x$have_x86_32$have_x86_64" != "xfalsefalse"; then
 
				 AC_PATH_PROG([YASM],[yasm],[false])
			
 
				 if test "x$YASM" != "xfalse" ; then
			
 
				   AC_MSG_CHECKING([if yasm version is greater than 1.0.1])
			
 
				-  yasmver=`yasm --version | head -1 | cut -d\  -f2`
			
 
				+  yasmver=`"$YASM" --version | head -1 | cut -d\  -f2`
			
 
				   yamajor=`echo $yasmver | cut -d. -f1`
			
 
				   yaminor=`echo $yasmver | cut -d. -f2`
			
 
				   yamini=`echo $yasmver | cut -d. -f3`
			
@@ -531,6 +530,18 @@ if test "x$YASM" != "xfalse" ; then
 
				 fi
			
 
				 if test "x$has_yasm" = "xfalse" ; then
			
 
				   AC_MSG_NOTICE([yasm is required for the assembly algorithms. They will be skipped.])
			
 
				+else
			
 
				+  if test "x$have_x86_64" = xtrue; then
			
 
				+    if test "x$have_win32" = xtrue; then
			
 
				+      YASM_FMT="win64"
			
 
				+    else
			
 
				+      YASM_FMT="elf64"
			
 
				+    fi
			
 
				+  elif test "x$have_win32" = xtrue; then
			
 
				+    YASM_FMT="coff"
			
 
				+  else
			
 
				+    YASM_FMT="elf32"
			
 
				+  fi
			
 
				 fi
			
 
				 fi
			
 
				 
			
@@ -743,6 +754,7 @@ AC_SUBST(PDCURSES_LIBS)
 
				 AC_SUBST(WS2_LIBS)
			
 
				 AC_SUBST(MATH_LIBS)
			
 
				 AC_SUBST(UDEV_LIBS)
			
 
				+AC_SUBST(YASM_FMT)
			
 
				 
			
 
				 AC_CONFIG_FILES([
			
 
				 	Makefile
			
--- a/driver-cpu.c
+++ b/driver-cpu.c
@@ -726,8 +726,8 @@ static void cpu_detect()
 
				 	// Reckon number of cores in the box
			
 
				 	#if defined(WIN32)
			
 
				 	{
			
 
				-		DWORD system_am;
			
 
				-		DWORD process_am;
			
 
				+		DWORD_PTR system_am;
			
 
				+		DWORD_PTR process_am;
			
 
				 		BOOL ok = GetProcessAffinityMask(
			
 
				 			GetCurrentProcess(),
			
 
				 			&system_am,
			
--- a/x86_32/Makefile.am
+++ b/x86_32/Makefile.am
@@ -5,4 +5,4 @@ SUFFIXES = .asm
 
				 libx8632_a_SOURCES	= sha256_xmm.asm
			
 
				 
			
 
				 .asm.o:
			
 
				-	$(YASM) -f elf32 $<
			
 
				+	$(YASM) -f $(YASM_FMT) $<
			
--- a/x86_64/Makefile.am
+++ b/x86_64/Makefile.am
@@ -5,4 +5,4 @@ SUFFIXES = .asm
 
				 libx8664_a_SOURCES	= sha256_xmm_amd64.asm sha256_sse4_amd64.asm
			
 
				 
			
 
				 .asm.o:
			
 
				-	$(YASM) -f elf64 $<
			
 
				+	$(YASM) -f $(YASM_FMT) -o $@ $<
			
--- a/x86_64/sha256_sse4_amd64.asm
+++ b/x86_64/sha256_sse4_amd64.asm
@@ -13,9 +13,17 @@
 
				 ALIGN 32
			
 
				 BITS 64
			
 
				 
			
 
				+%ifidn __OUTPUT_FORMAT__,win64
			
 
				+%define hash rcx
			
 
				+%define data rdx
			
 
				+%define init r8
			
 
				+%define temp r9
			
 
				+%else
			
 
				 %define hash rdi
			
 
				 %define data rsi
			
 
				 %define init rdx
			
 
				+%define temp rcx
			
 
				+%endif
			
 
				 
			
 
				 ; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
			
 
				 %define LAB_CALC_PARA	2
			
@@ -27,18 +35,28 @@ extern g_4sha256_k
 
				 
			
 
				 global CalcSha256_x64_sse4
			
 
				 ;	CalcSha256	hash(rdi), data(rsi), init(rdx)
			
 
				+;	CalcSha256	hash(rcx), data(rdx), init(r8)
			
 
				 CalcSha256_x64_sse4:
			
 
				 
			
 
				 	push	rbx
			
 
				+%ifidn __OUTPUT_FORMAT__,win64
			
 
				+	sub	rsp, 16 * 6
			
 
				+	movdqa	[rsp + 16*0], xmm6
			
 
				+	movdqa	[rsp + 16*1], xmm7
			
 
				+	movdqa	[rsp + 16*2], xmm8
			
 
				+	movdqa	[rsp + 16*3], xmm9
			
 
				+	movdqa	[rsp + 16*4], xmm10
			
 
				+	movdqa	[rsp + 16*5], xmm11
			
 
				+%endif
			
 
				 
			
 
				 LAB_NEXT_NONCE:
			
 
				 
			
 
				-	mov	rcx, 64*4					; 256 - rcx is # of SHA-2 rounds
			
 
				+	mov	temp, 64*4					; 256 - temp is # of SHA-2 rounds
			
 
				 	mov	rax, 16*4					; 64 - rax is where we expand to
			
 
				 
			
 
				 LAB_SHA:
			
 
				-	push	rcx
			
 
				-	lea	rcx, qword [data+rcx*4]				; + 1024
			
 
				+	push	temp
			
 
				+	lea	temp, qword [data+temp*4]			; + 1024
			
 
				 	lea	r11, qword [data+rax*4]				; + 256
			
 
				 
			
 
				 LAB_CALC:
			
@@ -122,10 +140,10 @@ LAB_CALC:
 
				 %endrep
			
 
				 
			
 
				 	add	r11, LAB_CALC_UNROLL*LAB_CALC_PARA*16
			
 
				-	cmp	r11, rcx
			
 
				+	cmp	r11, temp
			
 
				 	jb	LAB_CALC
			
 
				 
			
 
				-	pop	rcx
			
 
				+	pop	temp
			
 
				 	mov	rax, 0
			
 
				 
			
 
				 ; Load the init values of the message into the hash.
			
@@ -219,12 +237,12 @@ LAB_LOOP:
 
				 %assign i i+1
			
 
				 %endrep
			
 
				 
			
 
				-	cmp	rax, rcx
			
 
				+	cmp	rax, temp
			
 
				 	jb	LAB_LOOP
			
 
				 
			
 
				 ; Finished the 64 rounds, calculate hash and save
			
 
				 
			
 
				-	movntdqa	xmm1, [rdx]
			
 
				+	movntdqa	xmm1, [init]
			
 
				 	pshufd	xmm2, xmm1, 0x55
			
 
				 	paddd	xmm5, xmm2
			
 
				 	pshufd	xmm6, xmm1, 0xAA
			
@@ -234,7 +252,7 @@ LAB_LOOP:
 
				 	pshufd	xmm1, xmm1, 0
			
 
				 	paddd	xmm7, xmm1
			
 
				 
			
 
				-	movntdqa	xmm1, [rdx+4*4]
			
 
				+	movntdqa	xmm1, [init+4*4]
			
 
				 	pshufd	xmm2, xmm1, 0x55
			
 
				 	paddd	xmm8, xmm2
			
 
				 	pshufd	xmm6, xmm1, 0xAA
			
@@ -254,6 +272,15 @@ LAB_LOOP:
 
				 	movdqa	[hash+7*16], xmm10
			
 
				 
			
 
				 LAB_RET:
			
 
				+%ifidn __OUTPUT_FORMAT__,win64
			
 
				+	movdqa	xmm6, [rsp + 16*0]
			
 
				+	movdqa	xmm7, [rsp + 16*1]
			
 
				+	movdqa	xmm8, [rsp + 16*2]
			
 
				+	movdqa	xmm9, [rsp + 16*3]
			
 
				+	movdqa	xmm10, [rsp + 16*4]
			
 
				+	movdqa	xmm11, [rsp + 16*5]
			
 
				+	add	rsp, 16 * 6
			
 
				+%endif
			
 
				 	pop	rbx
			
 
				 	ret
			
 
				 
			
--- a/x86_64/sha256_xmm_amd64.asm
+++ b/x86_64/sha256_xmm_amd64.asm
@@ -22,10 +22,17 @@
 
				 ALIGN 32
			
 
				 BITS 64
			
 
				 
			
 
				+%ifidn __OUTPUT_FORMAT__,win64
			
 
				+%define hash  rcx
			
 
				+%define hash1 rdx
			
 
				+%define data  r8
			
 
				+%define init  r9
			
 
				+%else
			
 
				 %define hash  rdi
			
 
				 %define hash1 rsi
			
 
				 %define data  rdx
			
 
				 %define init  rcx
			
 
				+%endif
			
 
				 
			
 
				 ; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
			
 
				 %define SHA_CALC_W_PARA         2
			
@@ -227,6 +234,15 @@ sha256_sse2_64_new:
 
				 %endif
			
 
				 
			
 
				     push        rbx
			
 
				+%ifidn __OUTPUT_FORMAT__,win64
			
 
				+    sub         rsp, 16 * 6
			
 
				+    movdqa      [rsp + 16*0], xmm6
			
 
				+    movdqa      [rsp + 16*1], xmm7
			
 
				+    movdqa      [rsp + 16*2], xmm8
			
 
				+    movdqa      [rsp + 16*3], xmm9
			
 
				+    movdqa      [rsp + 16*4], xmm10
			
 
				+    movdqa      [rsp + 16*5], xmm13
			
 
				+%endif
			
 
				 
			
 
				 %macro  SHA_256  0
			
 
				     mov         rbx, 64*4   ; rbx is # of SHA-2 rounds
			
@@ -318,6 +334,15 @@ sha256_sse2_64_new:
 
				     movdqa    [hash+7*16], rH
			
 
				 
			
 
				 LAB_RET:
			
 
				+%ifidn __OUTPUT_FORMAT__,win64
			
 
				+    movdqa    xmm6, [rsp + 16*0]
			
 
				+    movdqa    xmm7, [rsp + 16*1]
			
 
				+    movdqa    xmm8, [rsp + 16*2]
			
 
				+    movdqa    xmm9, [rsp + 16*3]
			
 
				+    movdqa    xmm10, [rsp + 16*4]
			
 
				+    movdqa    xmm13, [rsp + 16*5]
			
 
				+    add       rsp, 16 * 6
			
 
				+%endif
			
 
				     pop       rbx
			
 
				     ret