14 years ago · 2a3b7359eb
--- a/poclbm120222.cl
+++ b/poclbm120222.cl
@@ -36,7 +36,10 @@ __constant uint K[64] = {
 
				 #ifdef BITALIGN
			
 
				 	#pragma OPENCL EXTENSION cl_amd_media_ops : enable
			
 
				 	#define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
			
 
				- #ifdef BFI_INT
			
 
				+#else
			
 
				+	#define rotr(x, y) rotate((u)x, (u)(32 - y))
			
 
				+#endif
			
 
				+#ifdef BFI_INT
			
 
				 	// Well, slight problem... It turns out BFI_INT isn't actually exposed to
			
 
				 	// OpenCL (or CAL IL for that matter) in any way. However, there is 
			
 
				 	// a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
			
@@ -49,23 +52,19 @@ __constant uint K[64] = {
 
				 	
			
 
				 	// Ma can also be implemented in terms of BFI_INT...
			
 
				 	#define Ma(x, y, z) amd_bytealign( (z^x), (y), (x) )
			
 
				- #else // BFI_INT
			
 
				-	// Later SDKs optimise this to BFI INT without patching and GCN
			
 
				-	// actually fails if manually patched with BFI_INT
			
 
				+
			
 
				+	// AMD's KernelAnalyzer throws errors compiling the kernel if we use
			
 
				+	// amd_bytealign on constants with vectors enabled, so we use this to avoid
			
 
				+	// problems. (this is used 4 times, and likely optimized out by the compiler.)
			
 
				+	#define Ma2(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
			
 
				+#else // BFI_INT
			
 
				+	//GCN actually fails if manually patched with BFI_INT
			
 
				 
			
 
				 	#define ch(x, y, z) bitselect((u)z, (u)y, (u)x)
			
 
				 	#define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
			
 
				-#endif
			
 
				-#else // BITALIGN
			
 
				-	#define ch(x, y, z) (z ^ (x & (y ^ z)))
			
 
				-	#define Ma(x, y, z) ((x & z) | (y & (x | z)))
			
 
				-	#define rotr(x, y) rotate((u)x, (u)(32 - y))
			
 
				+	#define Ma2(x, y, z) Ma(x, y, z)
			
 
				 #endif
			
 
				 
			
 
				-// AMD's KernelAnalyzer throws errors compiling the kernel if we use 
			
 
				-// amd_bytealign on constants with vectors enabled, so we use this to avoid 
			
 
				-// problems. (this is used 4 times, and likely optimized out by the compiler.)
			
 
				-#define Ma2(x, y, z) ((y & z) | (x & (y | z)))
			
 
				 
			
 
				 __kernel
			
 
				 	__attribute__((vec_type_hint(u)))