Browse Source

Compile CPU mining for win32 and win64 (incomplete)

James Z.M. Gao 13 years ago
parent
commit
5e7fc0073b
6 changed files with 78 additions and 14 deletions
  1. 14 2
      configure.ac
  2. 2 2
      driver-cpu.c
  3. 1 1
      x86_32/Makefile.am
  4. 1 1
      x86_64/Makefile.am
  5. 35 8
      x86_64/sha256_sse4_amd64.asm
  6. 25 0
      x86_64/sha256_xmm_amd64.asm

+ 14 - 2
configure.ac

@@ -85,7 +85,6 @@ esac
 
 case $target in
   *-*-mingw*)
-    have_x86_64=false
     have_win32=true
     DLOPEN_FLAGS=""
     WS2_LIBS="-lws2_32"
@@ -504,7 +503,7 @@ if test "x$have_x86_32$have_x86_64" != "xfalsefalse"; then
 AC_PATH_PROG([YASM],[yasm],[false])
 if test "x$YASM" != "xfalse" ; then
   AC_MSG_CHECKING([if yasm version is greater than 1.0.1])
-  yasmver=`yasm --version | head -1 | cut -d\  -f2`
+  yasmver=`"$YASM" --version | head -1 | cut -d\  -f2`
   yamajor=`echo $yasmver | cut -d. -f1`
   yaminor=`echo $yasmver | cut -d. -f2`
   yamini=`echo $yasmver | cut -d. -f3`
@@ -531,6 +530,18 @@ if test "x$YASM" != "xfalse" ; then
 fi
 if test "x$has_yasm" = "xfalse" ; then
   AC_MSG_NOTICE([yasm is required for the assembly algorithms. They will be skipped.])
+else
+  if test "x$have_x86_64" = xtrue; then
+    if test "x$have_win32" = xtrue; then
+      YASM_FMT="win64"
+    else
+      YASM_FMT="elf64"
+    fi
+  elif test "x$have_win32" = xtrue; then
+    YASM_FMT="coff"
+  else
+    YASM_FMT="elf32"
+  fi
 fi
 fi
 
@@ -743,6 +754,7 @@ AC_SUBST(PDCURSES_LIBS)
 AC_SUBST(WS2_LIBS)
 AC_SUBST(MATH_LIBS)
 AC_SUBST(UDEV_LIBS)
+AC_SUBST(YASM_FMT)
 
 AC_CONFIG_FILES([
 	Makefile

+ 2 - 2
driver-cpu.c

@@ -726,8 +726,8 @@ static void cpu_detect()
 	// Reckon number of cores in the box
 	#if defined(WIN32)
 	{
-		DWORD system_am;
-		DWORD process_am;
+		DWORD_PTR system_am;
+		DWORD_PTR process_am;
 		BOOL ok = GetProcessAffinityMask(
 			GetCurrentProcess(),
 			&system_am,

+ 1 - 1
x86_32/Makefile.am

@@ -5,4 +5,4 @@ SUFFIXES = .asm
 libx8632_a_SOURCES	= sha256_xmm.asm
 
 .asm.o:
-	$(YASM) -f elf32 $<
+	$(YASM) -f $(YASM_FMT) $<

+ 1 - 1
x86_64/Makefile.am

@@ -5,4 +5,4 @@ SUFFIXES = .asm
 libx8664_a_SOURCES	= sha256_xmm_amd64.asm sha256_sse4_amd64.asm
 
 .asm.o:
-	$(YASM) -f elf64 $<
+	$(YASM) -f $(YASM_FMT) -o $@ $<

+ 35 - 8
x86_64/sha256_sse4_amd64.asm

@@ -13,9 +13,17 @@
 ALIGN 32
 BITS 64
 
+%ifidn __OUTPUT_FORMAT__,win64
+%define hash rcx
+%define data rdx
+%define init r8
+%define temp r9
+%else
 %define hash rdi
 %define data rsi
 %define init rdx
+%define temp rcx
+%endif
 
 ; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
 %define LAB_CALC_PARA	2
@@ -27,18 +35,28 @@ extern g_4sha256_k
 
 global CalcSha256_x64_sse4
 ;	CalcSha256	hash(rdi), data(rsi), init(rdx)
+;	CalcSha256	hash(rcx), data(rdx), init(r8)
 CalcSha256_x64_sse4:
 
 	push	rbx
+%ifidn __OUTPUT_FORMAT__,win64
+	sub	rsp, 16 * 6
+	movdqa	[rsp + 16*0], xmm6
+	movdqa	[rsp + 16*1], xmm7
+	movdqa	[rsp + 16*2], xmm8
+	movdqa	[rsp + 16*3], xmm9
+	movdqa	[rsp + 16*4], xmm10
+	movdqa	[rsp + 16*5], xmm11
+%endif
 
 LAB_NEXT_NONCE:
 
-	mov	rcx, 64*4					; 256 - rcx is # of SHA-2 rounds
+	mov	temp, 64*4					; 256 - temp is # of SHA-2 rounds
 	mov	rax, 16*4					; 64 - rax is where we expand to
 
 LAB_SHA:
-	push	rcx
-	lea	rcx, qword [data+rcx*4]				; + 1024
+	push	temp
+	lea	temp, qword [data+temp*4]			; + 1024
 	lea	r11, qword [data+rax*4]				; + 256
 
 LAB_CALC:
@@ -122,10 +140,10 @@ LAB_CALC:
 %endrep
 
 	add	r11, LAB_CALC_UNROLL*LAB_CALC_PARA*16
-	cmp	r11, rcx
+	cmp	r11, temp
 	jb	LAB_CALC
 
-	pop	rcx
+	pop	temp
 	mov	rax, 0
 
 ; Load the init values of the message into the hash.
@@ -219,12 +237,12 @@ LAB_LOOP:
 %assign i i+1
 %endrep
 
-	cmp	rax, rcx
+	cmp	rax, temp
 	jb	LAB_LOOP
 
 ; Finished the 64 rounds, calculate hash and save
 
-	movntdqa	xmm1, [rdx]
+	movntdqa	xmm1, [init]
 	pshufd	xmm2, xmm1, 0x55
 	paddd	xmm5, xmm2
 	pshufd	xmm6, xmm1, 0xAA
@@ -234,7 +252,7 @@ LAB_LOOP:
 	pshufd	xmm1, xmm1, 0
 	paddd	xmm7, xmm1
 
-	movntdqa	xmm1, [rdx+4*4]
+	movntdqa	xmm1, [init+4*4]
 	pshufd	xmm2, xmm1, 0x55
 	paddd	xmm8, xmm2
 	pshufd	xmm6, xmm1, 0xAA
@@ -254,6 +272,15 @@ LAB_LOOP:
 	movdqa	[hash+7*16], xmm10
 
 LAB_RET:
+%ifidn __OUTPUT_FORMAT__,win64
+	movdqa	xmm6, [rsp + 16*0]
+	movdqa	xmm7, [rsp + 16*1]
+	movdqa	xmm8, [rsp + 16*2]
+	movdqa	xmm9, [rsp + 16*3]
+	movdqa	xmm10, [rsp + 16*4]
+	movdqa	xmm11, [rsp + 16*5]
+	add	rsp, 16 * 6
+%endif
 	pop	rbx
 	ret
 

+ 25 - 0
x86_64/sha256_xmm_amd64.asm

@@ -22,10 +22,17 @@
 ALIGN 32
 BITS 64
 
+%ifidn __OUTPUT_FORMAT__,win64
+%define hash  rcx
+%define hash1 rdx
+%define data  r8
+%define init  r9
+%else
 %define hash  rdi
 %define hash1 rsi
 %define data  rdx
 %define init  rcx
+%endif
 
 ; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
 %define SHA_CALC_W_PARA         2
@@ -227,6 +234,15 @@ sha256_sse2_64_new:
 %endif
 
     push        rbx
+%ifidn __OUTPUT_FORMAT__,win64
+    sub         rsp, 16 * 6
+    movdqa      [rsp + 16*0], xmm6
+    movdqa      [rsp + 16*1], xmm7
+    movdqa      [rsp + 16*2], xmm8
+    movdqa      [rsp + 16*3], xmm9
+    movdqa      [rsp + 16*4], xmm10
+    movdqa      [rsp + 16*5], xmm13
+%endif
 
 %macro  SHA_256  0
     mov         rbx, 64*4   ; rbx is # of SHA-2 rounds
@@ -318,6 +334,15 @@ sha256_sse2_64_new:
     movdqa    [hash+7*16], rH
 
 LAB_RET:
+%ifidn __OUTPUT_FORMAT__,win64
+    movdqa    xmm6, [rsp + 16*0]
+    movdqa    xmm7, [rsp + 16*1]
+    movdqa    xmm8, [rsp + 16*2]
+    movdqa    xmm9, [rsp + 16*3]
+    movdqa    xmm10, [rsp + 16*4]
+    movdqa    xmm13, [rsp + 16*5]
+    add       rsp, 16 * 6
+%endif
     pop       rbx
     ret