Browse Source

Merge branch 'ocl_update_20130512' into bfgminer

Luke Dashjr 12 years ago
parent
commit
6147f11c9a
8 changed files with 984 additions and 711 deletions
  1. 1 0
      Makefile.am
  2. 56 369
      README
  3. 360 0
      README.GPU
  4. 3 3
      configure.ac
  5. 203 116
      diablo130302.cl
  6. 6 8
      ocl.c
  7. 83 39
      poclbm130302.cl
  8. 272 176
      scrypt130302.cl

+ 1 - 0
Makefile.am

@@ -5,6 +5,7 @@ EXTRA_DIST	= example.conf m4/gnulib-cache.m4 linux-usb-bfgminer \
 		  api-example.php miner.php	\
 		  API.class API.java api-example.c windows-build.txt \
 		  bitstreams/* README.FPGA README.RPC README.scrypt \
+		  README.GPU \
                   api-example.py
 
 SUBDIRS		= lib ccan

+ 56 - 369
README

@@ -31,7 +31,52 @@ irc://irc.freenode.net/eligius
 
 License: GPLv3.  See COPYING for details.
 
-READ EXECUTIVE SUMMARY BELOW FOR FIRST TIME USERS!
+SEE ALSO README.FPGA, README.GPU, README.RPC, AND README.scrypt FOR MORE
+INFORMATION ON EACH.
+
+---
+
+EXECUTIVE SUMMARY ON USAGE:
+
+After saving configuration from the menu, you do not need to give BFGMiner any
+arguments and it will load your configuration.
+
+Any configuration file may also contain a single
+	"include" : "filename"
+to recursively include another configuration file.
+Writing the configuration will save all settings from all files in the output.
+
+
+Single pool:
+
+bfgminer -o http://pool:port -u username -p password
+
+Multiple pools:
+
+bfgminer -o http://pool1:port -u pool1username -p pool1password -o http://pool2:port -u pool2usernmae -p pool2password
+
+Single pool with a standard http proxy, regular desktop:
+
+bfgminer -o http://pool:port -x http://proxy:port -u username -p password
+
+Single pool with a socks5 proxy, regular desktop:
+
+bfgminer -o http://pool:port -x socks5://proxy:port -u username -p password
+
+The list of proxy types are:
+ http:    standard http 1.1 proxy
+ socks4:  socks4 proxy
+ socks5:  socks5 proxy
+ socks4a: socks4a proxy
+ socks5h: socks5 proxy using a hostname
+
+Proxy support requires cURL version 7.21.7 or newer.
+
+If you specify the --socks-proxy option to BFGMiner, it will only be applied to
+all pools that don't specify their own proxy setting like above
+
+---
+BUILDING BFGMINER
 
 Dependencies:
 	autoconf             http://www.gnu.org/software/autoconf/
@@ -50,7 +95,7 @@ Optional Dependencies:
 	  libncursesw5-dev       ^ same
 	  libpdcurses        http://pdcurses.sourceforge.net/ (Linux/Mac/Windows)
 
-	Multiple FPGA autodetection: any one of:
+	Multiple ASIC/FPGA autodetection: any one of:
 	  sysfs              (builtin to most Linux kernels, just mount on /sys)
 	  libudev-dev        http://www.freedesktop.org/software/systemd/libudev/
 
@@ -78,39 +123,15 @@ BFGMiner specific configuration options:
 	--without-curses        Compile support for curses TUI (default enabled)
 	--without-libudev       Autodetect FPGAs using libudev (default enabled)
 
----
-
-To build with GPU mining support:
-
-Install AMD APP sdk, ideal version (see FAQ!) - put it into a system location.
-Download the correct version for either 32 bit or 64 bit from here:
-	http://developer.amd.com/tools/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/downloads/
-
-This will give you a file with a name like:
- AMD-APP-SDK-v2.4-lnx64.tgz (64-bit)
-or
- AMD-APP-SDK-v2.4-lnx32.tgz (32-bit)
-
-Then:
-
-sudo -i
-cd /opt
-tar xf /path/to/AMD-APP-SDK-v2.4-lnx##.tgz
-cd /
-tar xf /opt/AMD-APP-SDK-v2.4-lnx##/icd-registration.tgz
-ln -s /opt/AMD-APP-SDK-v2.4-lnx##/include/CL /usr/include
-ln -s /opt/AMD-APP-SDK-v2.4-lnx##/lib/x86_64/* /usr/lib/
-ldconfig
-
-Where ## is 32 or 64, depending on the bitness of the SDK you downloaded.
-If you are on 32 bit, x86_64 in the 2nd last line should be x86
-
 Basic *nix build instructions:
 
 ./autogen.sh    # only needed if building from git repo
 ./configure
 make
 
+No installation is necessary. You may run BFGMiner from the build directory
+directly.
+
 On Mac OS X, you can use Homebrew to install the dependency libraries. When you
 are ready to build BFGMiner, you may need to point the configure script at one
 or more pkg-config paths. For example:
@@ -287,71 +308,6 @@ CPU only options (not included in binaries):
 --enable-cpu|-C     Enable CPU mining with other mining (default: no CPU mining if other devices exist)
 
 
-
----
-
-EXECUTIVE SUMMARY ON USAGE:
-
-After saving configuration from the menu, you do not need to give BFGMiner any
-arguments and it will load your configuration.
-
-Any configuration file may also contain a single
-	"include" : "filename"
-to recursively include another configuration file.
-Writing the configuration will save all settings from all files in the output.
-
-
-Single pool, regular desktop:
-
-bfgminer -o http://pool:port -u username -p password
-
-Single pool, dedicated miner:
-
-bfgminer -o http://pool:port -u username -p password -I 9
-
-Single pool, first card regular desktop, 3 other dedicated cards:
-
-bfgminer -o http://pool:port -u username -p password -I d,9,9,9
-
-Multiple pool, dedicated miner:
-
-bfgminer -o http://pool1:port -u pool1username -p pool1password -o http://pool2:port -u pool2usernmae -p pool2password -I 9
-
-Add overclocking settings, GPU and fan control for all cards:
-
-bfgminer -o http://pool:port -u username -p password -I 9 --auto-fan --auto-gpu --gpu-engine 750-950 --gpu-memclock 300
-
-Add overclocking settings, GPU and fan control with different engine settings for 4 cards:
-
-bfgminer -o http://pool:port -u username -p password -I 9 --auto-fan --auto-gpu --gpu-engine 750-950,945,700-930,960 --gpu-memclock 300
-
-Single pool with a standard http proxy, regular desktop:
-
-bfgminer -o http://pool:port -x http://proxy:port -u username -p password
-
-Single pool with a socks5 proxy, regular desktop:
-
-bfgminer -o http://pool:port -x socks5://proxy:port -u username -p password
-
-The list of proxy types are:
- http:    standard http 1.1 proxy
- socks4:  socks4 proxy
- socks5:  socks5 proxy
- socks4a: socks4a proxy
- socks5h: socks5 proxy using a hostname
-
-Proxy support requires cURL version 7.21.7 or newer.
-
-If you specify the --socks-proxy option to BFGMiner, it will only be applied to
-all pools that don't specify their own proxy setting like above
-
-READ WARNINGS AND DOCUMENTATION BELOW ABOUT OVERCLOCKING
-
-On Linux you virtually always need to export your display settings before
-starting to get all the cards recognised and/or temperature+clocking working:
-
-export DISPLAY=:0
-
 ---
 
 WHILE RUNNING:
@@ -470,12 +426,6 @@ RF is Remote Fail occasions (server slow to accept work)
 E  is Efficiency defined as number of shares accepted (multiplied by their
           difficulty) per 2 KB of bandwidth
 
-NOTE: Running intensities above 9 with current hardware is likely to only
-diminish return performance even if the hash rate might appear better. A good
-starting baseline intensity to try on dedicated miners is 9. Higher values are
-there to cope with future improvements in hardware.
-
-
 The block display shows:
 Block: ...1b89f8d3 #217364  Diff:7.67M (54.93Th/s)  Started: [17:17:22]
 
@@ -600,223 +550,12 @@ For example (this is wrapped, but it's all on one line for real):
     f681634a4f1f63d01a0cd43fb338000000000080000000000000000000000000
     0000000000000000000000000000000000000000000000000000000080020000
 
----
-OVERCLOCKING WARNING AND INFORMATION
-
-AS WITH ALL OVERCLOCKING TOOLS YOU ARE ENTIRELY RESPONSIBLE FOR ANY HARM YOU
-MAY CAUSE TO YOUR HARDWARE. OVERCLOCKING CAN INVALIDATE WARRANTIES, DAMAGE
-HARDWARE AND EVEN CAUSE FIRES. THE AUTHOR ASSUMES NO RESPONSIBILITY FOR ANY
-DAMAGE YOU MAY CAUSE OR UNPLANNED CHILDREN THAT MAY OCCUR AS A RESULT.
-
-The GPU monitoring, clocking and fanspeed control incorporated into BFGMiner
-comes through use of the ATI Display Library. As such, it only supports ATI
-GPUs. Even if ADL support is successfully built into BFGMiner, unless the card
-and driver supports it, no GPU monitoring/settings will be available.
-
-BFGMiner supports initial setting of GPU engine clock speed, memory clock
-speed, voltage, fanspeed, and the undocumented powertune feature of 69x0+ GPUs.
-The setting passed to BFGMiner is used by all GPUs unless separate values are
-specified. All settings can all be changed within the menu on the fly on a
-per-GPU basis.
-
-For example:
---gpu-engine 950 --gpu-memclock 825
-
-will try to set all GPU engine clocks to 950 and all memory clocks to 825,
-while:
---gpu-engine 950,945,930,960 --gpu-memclock 300
-
-will try to set the engine clock of card 0 to 950, 1 to 945, 2 to 930, 3 to
-960 and all memory clocks to 300.
-
-You can substitute 0 to leave the engine clock of a card at its default.
-For example, to keep the 2nd GPU to its default clocks:
---gpu-engine 950,0,930,960 --gpu-memclock 300,0,300,300
-
-AUTO MODES:
-There are two "auto" modes in BFGMiner, --auto-fan and --auto-gpu. These can be
-used independently of each other and are complementary. Both auto modes are
-designed to safely change settings while trying to maintain a target
-temperature. By default this is set to 75 degrees C but can be changed with:
-
---temp-target
-e.g.
---temp-target 80
-Sets all cards' target temperature to 80 degrees.
-
---temp-target 75,85
-Sets card 0 target temperature to 75, and card 1 to 85 degrees.
-
-AUTO FAN:
-e.g.
---auto-fan (implies 85% upper limit)
---gpu-fan 25-85,65 --auto-fan
-
-Fan control in auto fan works off the theory that the minimum possible fan
-required to maintain an optimal temperature will use less power, make less
-noise, and prolong the life of the fan. In auto-fan mode, the fan speed is
-limited to 85% if the temperature is below "overheat" intentionally, as higher
-fanspeeds on GPUs do not produce signficantly more cooling, yet significantly
-shorten the lifespan of the fans. If temperature reaches the overheat value,
-fanspeed will still be increased to 100%. The overheat value is set to 85
-degrees by default and can be changed with:
-
---temp-overheat
-e.g.
---temp-overheat 75,85
-Sets card 0 overheat threshold to 75 degrees and card 1 to 85.
-
-AUTO GPU:
-e.g.
---auto-gpu --gpu-engine 750-950
---auto-gpu --gpu-engine 750-950,945,700-930,960
-
-GPU control in auto gpu tries to maintain as high a clock speed as possible
-while not reaching overheat temperatures. As a lower clock speed limit, the
-auto-gpu mode checks the GPU card's "normal" clock speed and will not go below
-this unless you have manually set a lower speed in the range. Also, unless a
-higher clock speed was specified at startup, it will not raise the clockspeed.
-If the temperature climbs, fanspeed is adjusted and optimised before GPU engin
-e clockspeed is adjusted. If fan speed control is not available or already
-optimal, then GPU clock speed is only decreased if it goes over the target
-temperature by the hysteresis amount, which is set to 3 by default and can be
-changed with:
---temp-hysteresis
-If the temperature drops below the target temperature, and engine clock speed
-is not at the highest level set at startup, BFGMiner will raise the clock speed.
-If at any time you manually set an even higher clock speed successfully in
-BFGMiner, it will record this value and use it as its new upper limit (and the
-same for low clock speeds and lower limits). If the temperature goes over the
-cutoff limit (95 degrees by default), BFGMiner will completely disable the GPU
-from mining and it will not be re-enabled unless manually done so. The cutoff
-temperature can be changed with:
-
---temp-cutoff
-e.g.
---temp-cutoff 95,105
-Sets card 0 cutoff temperature to 95 and card 1 to 105.
-
---gpu-memdiff -125
-This setting will modify the memory speed whenever the GPU clock speed is
-modified by --auto-gpu. In this example, it will set the memory speed to be 125
-MHz lower than the GPU speed. This is useful for some cards like the 6970 which
-normally don't allow a bigger clock speed difference. The 6970 is known to only
-allow -125, while the 7970 only allows -150.
-
-
-CHANGING SETTINGS:
-When setting values, it is important to realise that even though the driver
-may report the value was changed successfully, and the new card power profile
-information contains the values you set it to, that the card itself may
-refuse to use those settings. As the performance profile changes dynamically,
-querying the "current" value on the card can be wrong as well. So when changing
-values in BFGMiner, after a pause of 1 second, it will report to you the current
-values where you should check that your change has taken. An example is that
-6970 reference cards will accept low memory values but refuse to actually run
-those lower memory values unless they're within 125 of the engine clock speed.
-In that scenario, they usually set their real speed back to their default.
-
-BFGMiner reports the so-called "safe" range of whatever it is you are modifying
-when you ask to modify it on the fly. However, you can change settings to values
-outside this range. Despite this, the card can easily refuse to accept your
-changes, or worse, to accept your changes and then silently ignore them. So
-there is absolutely to know how far to/from where/to it can set things safely or
-otherwise, and there is nothing stopping you from at least trying to set them
-outside this range. Being very conscious of these possible failures is why
-BFGMiner will report back the current values for you to examine how exactly the
-card has responded. Even within the reported range of accepted values by the
-card, it is very easy to crash just about any card, so it cannot use those
-values to determine what range to set. You have to provide something meaningful
-manually for BFGMiner to work with through experimentation.
-
-STARTUP / SHUTDOWN:
-When BFGMiner starts up, it tries to read off the current profile information
-for clock and fan speeds and stores these values. When quitting BFGMiner, it
-will then try to restore the original values. Changing settings outside of
-BFGMiner while it's running may be reset to the startup BFGMiner values when
-BFGMiner shuts down because of this.
-
 ---
 
 RPC API
 
 For RPC API details see the README.RPC file
 
----
-
-GPU DEVICE ISSUES and use of --gpu-map
-
-GPUs mine with OpenCL software via the GPU device driver. This means you need
-to have both an OpenCL SDK installed, and the GPU device driver RUNNING (i.e.
-Xorg up and running configured for all devices that will mine on linux etc.)
-Meanwhile, the hardware monitoring that BFGMiner offers for AMD devices relies
-on the ATI Display Library (ADL) software to work. OpenCL DOES NOT TALK TO THE
-ADL. There is no 100% reliable way to know that OpenCL devices are identical
-to the ADL devices, as neither give off the same information. BFGMiner does its
-best to correlate these devices based on the order that OpenCL and ADL numbers
-them. It is possible that this will fail for the following reasons:
-
-1. The device order is listed differently by OpenCL and ADL (rare), even if the
-number of devices is the same.
-2. There are more OpenCL devices than ADL. OpenCL stupidly sees one GPU as two
-devices if you have two monitors connected to the one GPU.
-3. There are more ADL devices than OpenCL. ADL devices include any ATI GPUs,
-including ones that can't mine, like some older R4xxx cards.
-
-To cope with this, the ADVANCED option for --gpu-map is provided with BFGMiner.
-DO NOT USE THIS UNLESS YOU KNOW WHAT YOU ARE DOING. The default will work the
-vast majority of the time unless you know you have a problem already.
-
-To get useful information, start BFGMiner with just the -n option. You will get
-output that looks like this:
-
-[2012-04-25 13:17:34] CL Platform 0 vendor: Advanced Micro Devices, Inc.
-[2012-04-25 13:17:34] CL Platform 0 name: AMD Accelerated Parallel Processing
-[2012-04-25 13:17:34] CL Platform 0 version: OpenCL 1.1 AMD-APP (844.4)
-[2012-04-25 13:17:34] Platform 0 devices: 3
-[2012-04-25 13:17:34]   0       Tahiti
-[2012-04-25 13:17:34]   1       Tahiti
-[2012-04-25 13:17:34]   2       Cayman
-[2012-04-25 13:17:34] GPU 0 AMD Radeon HD 7900 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 1 AMD Radeon HD 7900 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 2 AMD Radeon HD 6900 Series hardware monitoring enabled
-[2012-04-25 13:17:34] 3 GPU devices max detected
-
-Note the number of devices here match, and the order is the same. If devices 1
-and 2 were different between Tahiti and Cayman, you could run BFGMiner with:
---gpu-map 2:1,1:2
-And it would swap the monitoring it received from ADL device 1 and put it to
-OpenCL device 2 and vice versa.
-
-If you have 2 monitors connected to the first device it would look like this:
-
-[2012-04-25 13:17:34] Platform 0 devices: 4
-[2012-04-25 13:17:34]   0       Tahiti
-[2012-04-25 13:17:34]   1       Tahiti
-[2012-04-25 13:17:34]   2       Tahiti
-[2012-04-25 13:17:34]   3       Cayman
-[2012-04-25 13:17:34] GPU 0 AMD Radeon HD 7900 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 1 AMD Radeon HD 7900 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 2 AMD Radeon HD 6900 Series hardware monitoring enabled
-
-To work around this, you would use:
--d 0 -d 2 -d 3 --gpu-map 2:1,3:2
-
-If you have an older card as well as the rest it would look like this:
-
-[2012-04-25 13:17:34] Platform 0 devices: 3
-[2012-04-25 13:17:34]   0       Tahiti
-[2012-04-25 13:17:34]   1       Tahiti
-[2012-04-25 13:17:34]   2       Cayman
-[2012-04-25 13:17:34] GPU 0 AMD Radeon HD 4500 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 1 AMD Radeon HD 7900 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 2 AMD Radeon HD 7900 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 3 AMD Radeon HD 6900 Series hardware monitoring enabled
-
-To work around this you would use:
---gpu-map 0:1,1:2,2:3
-
-
 ---
 
 FAQ
@@ -841,9 +580,6 @@ A: No, BFGMiner keeps a database of the block it's working on to ensure it does
 not work on stale blocks, and having different blocks from two networks would
 make it invalidate the work from each other.
 
-Q: Can I change the intensity settings individually for each GPU?
-A: Yes, pass a list separated by commas such as -I d,4,9,9
-
 Q: Can I put multiple pools in the config file?
 A: Yes, check the example.conf file. Alternatively, set up everything either on
 the command line or via the menu after startup and choose settings->write
@@ -853,33 +589,10 @@ Q: The build fails with gcc is unable to build a binary.
 A: Remove the "-march=native" component of your CFLAGS as your version of GCC
 does not support it.
 
-Q: The CPU usage is high.
-A: The ATI drivers after 11.6 have a bug that makes them consume 100% of one
-CPU core unnecessarily so downgrade to 11.6. Binding BFGMiner to one CPU core on
-windows can minimise it to 100% (instead of more than one core). Driver version
-11.11 on linux and 11.12 on windows appear to have fixed this issue. Note that
-later drivers may have an apparent return of high CPU usage. Try
-'export GPU_USE_SYNC_OBJECTS=1' on Linux before starting BFGMiner. You can also
-set this variable in windows via a batch file or on the command line before
-starting BFGMiner with 'setx GPU_USE_SYNC_OBJECTS 1'
-
 Q: Can you implement feature X?
 A: I can, but time is limited, and people who donate are more likely to get
 their feature requests implemented.
 
-Q: My GPU hangs and I have to reboot it to get it going again?
-A: The more aggressively the mining software uses your GPU, the less overclock
-you will be able to run. You are more likely to hit your limits with BFGMiner
-and you will find you may need to overclock your GPU less aggressively. The
-software cannot be responsible and make your GPU hang directly. If you simply
-cannot get it to ever stop hanging, try decreasing the intensity, and if even
-that fails, try changing to the poclbm kernel with -k poclbm, though you will
-sacrifice performance. BFGMiner is designed to try and safely restart GPUs as
-much as possible, but NOT if that restart might actually crash the rest of the
-GPUs mining, or even the machine. It tries to restart them with a separate
-thread and if that separate thread dies, it gives up trying to restart any more
-GPUs.
-
 Q: Work keeps going to my backup pool even though my primary pool hasn't
 failed?
 A: BFGMiner checks for conditions where the primary pool is lagging and will
@@ -903,10 +616,6 @@ A: Everyone will always have their own view of what's important to monitor.
 The defaults are very sane and I have very little interest in changing this
 any further.
 
-Q: Can you change the autofan/autogpu to change speeds in a different manner?
-A: The defaults are sane and safe. I'm not interested in changing them further.
-The starting fan speed is set to 50% in auto-fan mode as a safety precaution.
-
 Q: Why is my efficiency above/below 1.00?
 A: Efficiency simply means how many shares you return for the amount of
 bandwidth used. It does not correlate with efficient use of your hardware, and
@@ -916,7 +625,7 @@ other factors.
 Q: What are the best parameters to pass for X pool/hardware/device.
 A: Virtually always, the DEFAULT parameters give the best results. Most user
 defined settings lead to worse performance. The ONLY thing most users should
-need to set is the Intensity.
+need to set is the Intensity for GPUs.
 
 Q: What happened to CPU mining?
 A: Being increasingly irrelevant for most users, and a maintenance issue, it is
@@ -924,29 +633,6 @@ no longer under active development and will not be supported unless someone
 steps up to help maintain it. No binary builds supporting CPU mining will be
 released but CPU mining can be built into BFGMiner when it is compiled.
 
-Q: I upgraded BFGMiner version and my hashrate suddenly dropped!
-A: No, you upgraded your SDK version unwittingly between upgrades of BFGMiner
-and that caused  your hashrate to drop. See the next question.
-
-Q: I upgraded my ATI driver/SDK/BFGMiner and my hashrate suddenly dropped!
-A: The hashrate performance in BFGMiner is tied to the version of the ATI SDK
-that is installed only for the very first time BFGMiner is run. This generates
-binaries that are used by the GPU every time after that. Any upgrades to the
-SDK after that time will have no effect on the binaries. However, if you
-install a fresh version of BFGMiner, and have since upgraded your SDK, new
-binaries will be built. It is known that the 2.6 ATI SDK has a huge hashrate
-penalty on generating new binaries. It is recommended to not use this SDK at
-this time unless you are using an ATI 7xxx card that needs it.
-
-Q: Which ATI SDK is the best for BFGMiner?
-A: At the moment, versions 2.4 and 2.5 work the best. If you are forced to use
-the 2.6 SDK, the phatk kernel will perform poorly, while the diablo or my
-custom modified poclbm kernel are optimised for it.
-
-Q: I have multiple SDKs installed, can I choose which one it uses?
-A: Run bfgminer with the -n option and it will list all the platforms currently
-installed. Then you can tell BFGMiner which platform to use with --gpu-platform.
-
 Q: GUI version?
 A: No. The RPC interface makes it possible for someone else to write one
 though.
@@ -956,10 +642,6 @@ A: Start BFGMiner with your regular commands and add -D -T --verbose and provide
 the full startup output and a summary of your hardware, operating system, ATI
 driver version and ATI stream version.
 
-Q: BFGMiner reports no devices or only one device on startup on Linux although
-I have multiple devices and drivers+SDK installed properly?
-A: Try "export DISPLAY=:0" before running BFGMiner.
-
 Q: My network gets slower and slower and then dies for a minute?
 A; Try the --net-delay option.
 
@@ -987,6 +669,11 @@ mining. Since the acronym needs to be only 3 characters, the "Field-" part has
 been skipped. "PGA" is also used for devices built with Application-Specific
 Integrated Circuits (ASICs).
 
+Q: What is an ASIC?
+A: BFGMiner currently supports 2 ASICs: Avalon and BitForce SC devices. They are
+Application Specify Integrated Circuit devices and provide the highest
+performance per unit power due to being dedicated to only one purpose.
+
 Q: How do I get my BFL/Icarus/Lancelot/Cairnsmore device to auto-recognise?
 A: On Linux, if the /dev/ttyUSB* devices don't automatically appear, the only
 thing that needs to be done is to load the driver for them:

+ 360 - 0
README.GPU

@@ -0,0 +1,360 @@
+EXECUTIVE SUMMARY ON GPU USAGE (SEE ALSO README.scrypt FOR SCRYPT MINING):
+
+Single pool, regular desktop:
+
+bfgminer -o http://pool:port -u username -p password
+
+By default if you have configured your system properly, BFGMiner will mine on
+ALL GPUs, but in "dynamic" mode which is designed to keep your system usable
+and sacrifice some mining performance.
+
+Single pool, dedicated miner:
+
+bfgminer -o http://pool:port -u username -p password -I 9
+
+Single pool, first card regular desktop, 3 other dedicated cards:
+
+bfgminer -o http://pool:port -u username -p password -I d,9,9,9
+
+Multiple pool, dedicated miner:
+
+bfgminer -o http://pool1:port -u pool1username -p pool1password -o http://pool2:port -u pool2usernmae -p pool2password -I 9
+
+Add overclocking settings, GPU and fan control for all cards:
+
+bfgminer -o http://pool:port -u username -p password -I 9 --auto-fan --auto-gpu --gpu-engine 750-950 --gpu-memclock 300
+
+Add overclocking settings, GPU and fan control with different engine settings for 4 cards:
+
+bfgminer -o http://pool:port -u username -p password -I 9 --auto-fan --auto-gpu --gpu-engine 750-950,945,700-930,960 --gpu-memclock 300
+
+READ WARNINGS AND DOCUMENTATION BELOW ABOUT OVERCLOCKING
+
+To configure multiple displays on linux you need to configure your Xorg cleanly
+to use them all:
+
+sudo aticonfig --adapter=all -f --initial
+
+On Linux you virtually always need to export your display settings before
+starting to get all the cards recognised and/or temperature+clocking working:
+
+export DISPLAY=:0
+
+---
+SETUP FOR GPU SUPPORT:
+
+To setup GPU mining support:
+
+Install AMD APP sdk, ideal version (see FAQ!) - put it into a system location.
+Download the correct version for either 32 bit or 64 bit from here:
+	http://developer.amd.com/tools/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/downloads/
+
+This will give you a file with a name like:
+ AMD-APP-SDK-v2.4-lnx64.tgz (64-bit)
+or
+ AMD-APP-SDK-v2.4-lnx32.tgz (32-bit)
+
+Then:
+
+sudo -i
+cd /opt
+tar xf /path/to/AMD-APP-SDK-v2.4-lnx##.tgz
+cd /
+tar xf /opt/AMD-APP-SDK-v2.4-lnx##/icd-registration.tgz
+ln -s /opt/AMD-APP-SDK-v2.4-lnx##/include/CL /usr/include
+ln -s /opt/AMD-APP-SDK-v2.4-lnx##/lib/x86_64/* /usr/lib/
+ldconfig
+
+Where ## is 32 or 64, depending on the bitness of the SDK you downloaded.
+If you are on 32 bit, x86_64 in the 2nd last line should be x86
+
+
+---
+INTENSITY INFORMATION:
+
+Intensity correlates with the size of work being submitted at any one time to
+a GPU. The higher the number the larger the size of work. Generally speaking
+finding an optimal value rather than the highest value is the correct approach
+as hash rate rises up to a point with higher intensities but above that, the
+device may be very slow to return responses, or produce errors.
+
+NOTE: Running intensities above 9 with current hardware is likely to only
+diminish return performance even if the hash rate might appear better. A good
+starting baseline intensity to try on dedicated miners is 9. Higher values are
+there to cope with future improvements in hardware.
+
+
+---
+OVERCLOCKING WARNING AND INFORMATION
+
+AS WITH ALL OVERCLOCKING TOOLS YOU ARE ENTIRELY RESPONSIBLE FOR ANY HARM YOU
+MAY CAUSE TO YOUR HARDWARE. OVERCLOCKING CAN INVALIDATE WARRANTIES, DAMAGE
+HARDWARE AND EVEN CAUSE FIRES. THE AUTHOR ASSUMES NO RESPONSIBILITY FOR ANY
+DAMAGE YOU MAY CAUSE OR UNPLANNED CHILDREN THAT MAY OCCUR AS A RESULT.
+
+The GPU monitoring, clocking and fanspeed control incorporated into BFGMiner
+comes through use of the ATI Display Library. As such, it only supports ATI
+GPUs. Even if ADL support is successfully built into BFGMiner, unless the card
+and driver supports it, no GPU monitoring/settings will be available.
+
+BFGMiner supports initial setting of GPU engine clock speed, memory clock
+speed, voltage, fanspeed, and the undocumented powertune feature of 69x0+ GPUs.
+The setting passed to BFGMiner is used by all GPUs unless separate values are
+specified. All settings can all be changed within the menu on the fly on a
+per-GPU basis.
+
+For example:
+--gpu-engine 950 --gpu-memclock 825
+
+will try to set all GPU engine clocks to 950 and all memory clocks to 825,
+while:
+--gpu-engine 950,945,930,960 --gpu-memclock 300
+
+will try to set the engine clock of card 0 to 950, 1 to 945, 2 to 930, 3 to
+960 and all memory clocks to 300.
+
+You can substitute 0 to leave the engine clock of a card at its default.
+For example, to keep the 2nd GPU to its default clocks:
+--gpu-engine 950,0,930,960 --gpu-memclock 300,0,300,300
+
+AUTO MODES:
+There are two "auto" modes in BFGMiner, --auto-fan and --auto-gpu. These can be
+used independently of each other and are complementary. Both auto modes are
+designed to safely change settings while trying to maintain a target
+temperature. By default this is set to 75 degrees C but can be changed with:
+
+--temp-target
+e.g.
+--temp-target 80
+Sets all cards' target temperature to 80 degrees.
+
+--temp-target 75,85
+Sets card 0 target temperature to 75, and card 1 to 85 degrees.
+
+AUTO FAN:
+e.g.
+--auto-fan (implies 85% upper limit)
+--gpu-fan 25-85,65 --auto-fan
+
+Fan control in auto fan works off the theory that the minimum possible fan
+required to maintain an optimal temperature will use less power, make less
+noise, and prolong the life of the fan. In auto-fan mode, the fan speed is
+limited to 85% if the temperature is below "overheat" intentionally, as higher
+fanspeeds on GPUs do not produce signficantly more cooling, yet significantly
+shorten the lifespan of the fans. If temperature reaches the overheat value,
+fanspeed will still be increased to 100%. The overheat value is set to 85
+degrees by default and can be changed with:
+
+--temp-overheat
+e.g.
+--temp-overheat 75,85
+Sets card 0 overheat threshold to 75 degrees and card 1 to 85.
+
+AUTO GPU:
+e.g.
+--auto-gpu --gpu-engine 750-950
+--auto-gpu --gpu-engine 750-950,945,700-930,960
+
+GPU control in auto gpu tries to maintain as high a clock speed as possible
+while not reaching overheat temperatures. As a lower clock speed limit, the
+auto-gpu mode checks the GPU card's "normal" clock speed and will not go below
+this unless you have manually set a lower speed in the range. Also, unless a
+higher clock speed was specified at startup, it will not raise the clockspeed.
+If the temperature climbs, fanspeed is adjusted and optimised before GPU engin
+e clockspeed is adjusted. If fan speed control is not available or already
+optimal, then GPU clock speed is only decreased if it goes over the target
+temperature by the hysteresis amount, which is set to 3 by default and can be
+changed with:
+--temp-hysteresis
+If the temperature drops below the target temperature, and engine clock speed
+is not at the highest level set at startup, BFGMiner will raise the clock speed.
+If at any time you manually set an even higher clock speed successfully in
+BFGMiner, it will record this value and use it as its new upper limit (and the
+same for low clock speeds and lower limits). If the temperature goes over the
+cutoff limit (95 degrees by default), BFGMiner will completely disable the GPU
+from mining and it will not be re-enabled unless manually done so. The cutoff
+temperature can be changed with:
+
+--temp-cutoff
+e.g.
+--temp-cutoff 95,105
+Sets card 0 cutoff temperature to 95 and card 1 to 105.
+
+--gpu-memdiff -125
+This setting will modify the memory speed whenever the GPU clock speed is
+modified by --auto-gpu. In this example, it will set the memory speed to be 125
+MHz lower than the GPU speed. This is useful for some cards like the 6970 which
+normally don't allow a bigger clock speed difference. The 6970 is known to only
+allow -125, while the 7970 only allows -150.
+
+
+CHANGING SETTINGS:
+When setting values, it is important to realise that even though the driver
+may report the value was changed successfully, and the new card power profile
+information contains the values you set it to, that the card itself may
+refuse to use those settings. As the performance profile changes dynamically,
+querying the "current" value on the card can be wrong as well. So when changing
+values in BFGMiner, after a pause of 1 second, it will report to you the current
+values where you should check that your change has taken. An example is that
+6970 reference cards will accept low memory values but refuse to actually run
+those lower memory values unless they're within 125 of the engine clock speed.
+In that scenario, they usually set their real speed back to their default.
+
+BFGMiner reports the so-called "safe" range of whatever it is you are modifying
+when you ask to modify it on the fly. However, you can change settings to values
+outside this range. Despite this, the card can easily refuse to accept your
+changes, or worse, to accept your changes and then silently ignore them. So
+there is absolutely to know how far to/from where/to it can set things safely or
+otherwise, and there is nothing stopping you from at least trying to set them
+outside this range. Being very conscious of these possible failures is why
+BFGMiner will report back the current values for you to examine how exactly the
+card has responded. Even within the reported range of accepted values by the
+card, it is very easy to crash just about any card, so it cannot use those
+values to determine what range to set. You have to provide something meaningful
+manually for BFGMiner to work with through experimentation.
+
+STARTUP / SHUTDOWN:
+When BFGMiner starts up, it tries to read off the current profile information
+for clock and fan speeds and stores these values. When quitting BFGMiner, it
+will then try to restore the original values. Changing settings outside of
+BFGMiner while it's running may be reset to the startup BFGMiner values when
+BFGMiner shuts down because of this.
+
+---
+
+GPU DEVICE ISSUES and use of --gpu-map
+
+GPUs mine with OpenCL software via the GPU device driver. This means you need
+to have both an OpenCL SDK installed, and the GPU device driver RUNNING (i.e.
+Xorg up and running configured for all devices that will mine on linux etc.)
+Meanwhile, the hardware monitoring that BFGMiner offers for AMD devices relies
+on the ATI Display Library (ADL) software to work. OpenCL DOES NOT TALK TO THE
+ADL. There is no 100% reliable way to know that OpenCL devices are identical
+to the ADL devices, as neither give off the same information. BFGMiner does its
+best to correlate these devices based on the order that OpenCL and ADL numbers
+them. It is possible that this will fail for the following reasons:
+
+1. The device order is listed differently by OpenCL and ADL (rare), even if the
+number of devices is the same.
+2. There are more OpenCL devices than ADL. OpenCL stupidly sees one GPU as two
+devices if you have two monitors connected to the one GPU.
+3. There are more ADL devices than OpenCL. ADL devices include any ATI GPUs,
+including ones that can't mine, like some older R4xxx cards.
+
+To cope with this, the ADVANCED option for --gpu-map is provided with BFGMiner.
+DO NOT USE THIS UNLESS YOU KNOW WHAT YOU ARE DOING. The default will work the
+vast majority of the time unless you know you have a problem already.
+
+To get useful information, start BFGMiner with just the -n option. You will get
+output that looks like this:
+
+[2012-04-25 13:17:34] CL Platform 0 vendor: Advanced Micro Devices, Inc.
+[2012-04-25 13:17:34] CL Platform 0 name: AMD Accelerated Parallel Processing
+[2012-04-25 13:17:34] CL Platform 0 version: OpenCL 1.1 AMD-APP (844.4)
+[2012-04-25 13:17:34] Platform 0 devices: 3
+[2012-04-25 13:17:34]   0       Tahiti
+[2012-04-25 13:17:34]   1       Tahiti
+[2012-04-25 13:17:34]   2       Cayman
+[2012-04-25 13:17:34] GPU 0 AMD Radeon HD 7900 Series  hardware monitoring enabled
+[2012-04-25 13:17:34] GPU 1 AMD Radeon HD 7900 Series  hardware monitoring enabled
+[2012-04-25 13:17:34] GPU 2 AMD Radeon HD 6900 Series hardware monitoring enabled
+[2012-04-25 13:17:34] 3 GPU devices max detected
+
+Note the number of devices here match, and the order is the same. If devices 1
+and 2 were different between Tahiti and Cayman, you could run BFGMiner with:
+--gpu-map 2:1,1:2
+And it would swap the monitoring it received from ADL device 1 and put it to
+OpenCL device 2 and vice versa.
+
+If you have 2 monitors connected to the first device it would look like this:
+
+[2012-04-25 13:17:34] Platform 0 devices: 4
+[2012-04-25 13:17:34]   0       Tahiti
+[2012-04-25 13:17:34]   1       Tahiti
+[2012-04-25 13:17:34]   2       Tahiti
+[2012-04-25 13:17:34]   3       Cayman
+[2012-04-25 13:17:34] GPU 0 AMD Radeon HD 7900 Series  hardware monitoring enabled
+[2012-04-25 13:17:34] GPU 1 AMD Radeon HD 7900 Series  hardware monitoring enabled
+[2012-04-25 13:17:34] GPU 2 AMD Radeon HD 6900 Series hardware monitoring enabled
+
+To work around this, you would use:
+-d 0 -d 2 -d 3 --gpu-map 2:1,3:2
+
+If you have an older card as well as the rest it would look like this:
+
+[2012-04-25 13:17:34] Platform 0 devices: 3
+[2012-04-25 13:17:34]   0       Tahiti
+[2012-04-25 13:17:34]   1       Tahiti
+[2012-04-25 13:17:34]   2       Cayman
+[2012-04-25 13:17:34] GPU 0 AMD Radeon HD 4500 Series  hardware monitoring enabled
+[2012-04-25 13:17:34] GPU 1 AMD Radeon HD 7900 Series  hardware monitoring enabled
+[2012-04-25 13:17:34] GPU 2 AMD Radeon HD 7900 Series  hardware monitoring enabled
+[2012-04-25 13:17:34] GPU 3 AMD Radeon HD 6900 Series hardware monitoring enabled
+
+To work around this you would use:
+--gpu-map 0:1,1:2,2:3
+
+
+---
+GPU FAQ:
+
+Q: Can I change the intensity settings individually for each GPU?
+A: Yes, pass a list separated by commas such as -I d,4,9,9
+
+Q: The CPU usage is high.
+A: The ATI drivers after 11.6 have a bug that makes them consume 100% of one
+CPU core unnecessarily so downgrade to 11.6. Binding BFGMiner to one CPU core on
+windows can minimise it to 100% (instead of more than one core). Driver version
+11.11 on linux and 11.12 on windows appear to have fixed this issue. Note that
+later drivers may have an apparent return of high CPU usage. Try
+'export GPU_USE_SYNC_OBJECTS=1' on Linux before starting BFGMiner. You can also
+set this variable in windows via a batch file or on the command line before
+starting BFGMiner with 'setx GPU_USE_SYNC_OBJECTS 1'
+
+Q: My GPU hangs and I have to reboot it to get it going again?
+A: The more aggressively the mining software uses your GPU, the less overclock
+you will be able to run. You are more likely to hit your limits with BFGMiner
+and you will find you may need to overclock your GPU less aggressively. The
+software cannot be responsible and make your GPU hang directly. If you simply
+cannot get it to ever stop hanging, try decreasing the intensity, and if even
+that fails, try changing to the poclbm kernel with -k poclbm, though you will
+sacrifice performance. BFGMiner is designed to try and safely restart GPUs as
+much as possible, but NOT if that restart might actually crash the rest of the
+GPUs mining, or even the machine. It tries to restart them with a separate
+thread and if that separate thread dies, it gives up trying to restart any more
+GPUs.
+
+Q: Can you change the autofan/autogpu to change speeds in a different manner?
+A: The defaults are sane and safe. I'm not interested in changing them further.
+The starting fan speed is set to 50% in auto-fan mode as a safety precaution.
+
+Q: I upgraded BFGMiner version and my hashrate suddenly dropped!
+A: No, you upgraded your SDK version unwittingly between upgrades of BFGMiner
+and that caused your hashrate to drop. See the next question.
+
+Q: I upgraded my ATI driver/SDK/BFGMiner and my hashrate suddenly dropped!
+A: The hashrate performance in BFGMiner is tied to the version of the ATI SDK
+that is installed only for the very first time BFGMiner is run. This generates
+binaries that are used by the GPU every time after that. Any upgrades to the
+SDK after that time will have no effect on the binaries. However, if you
+install a fresh version of BFGMiner, and have since upgraded your SDK, new
+binaries will be built. It is known that the 2.6 ATI SDK has a huge hashrate
+penalty on generating new binaries. It is recommended to not use this SDK at
+this time unless you are using an ATI 7xxx card that needs it.
+
+Q: Which ATI SDK is the best for BFGMiner?
+A: At the moment, versions 2.4 and 2.5 work the best. If you are forced to use
+the 2.6 SDK, the phatk kernel will perform poorly, while the diablo or my
+custom modified poclbm kernel are optimised for it.
+
+Q: I have multiple SDKs installed, can I choose which one it uses?
+A: Run bfgminer with the -n option and it will list all the platforms currently
+installed. Then you can tell BFGMiner which platform to use with --gpu-platform.
+
+Q: BFGMiner reports no devices or only one device on startup on Linux although
+I have multiple devices and drivers+SDK installed properly?
+A: Try "export DISPLAY=:0" before running BFGMiner.
+
+Q: Should I use crossfire/SLI?
+A: It does not benefit mining at all and depending on the GPU may actually
+worsen performance.

+ 3 - 3
configure.ac

@@ -723,10 +723,10 @@ fi
 AC_DEFINE_UNQUOTED([CGMINER_PREFIX], ["$prefix/bin"], [Path to bfgminer install])
 
 AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk121016"], [Filename for phatk kernel])
-AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm121016"], [Filename for poclbm kernel])
+AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm130302"], [Filename for poclbm kernel])
 AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn121016"], [Filename for diakgcn kernel])
-AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo121016"], [Filename for diablo kernel])
-AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt121016"], [Filename for scrypt kernel])
+AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo130302"], [Filename for diablo kernel])
+AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt130302"], [Filename for scrypt kernel])
 
 
 AC_SUBST(PTHREAD_FLAGS)

File diff suppressed because it is too large
+ 203 - 116
diablo130302.cl


+ 6 - 8
ocl.c

@@ -259,9 +259,10 @@ int clDevicesNum(void) {
 	char pbuff[256];
 	cl_uint numDevices;
 	cl_uint numPlatforms;
+	int most_devices = -1;
 	cl_platform_id *platforms;
 	cl_platform_id platform = NULL;
-	unsigned int most_devices = 0, i, mdplatform = 0;
+	unsigned int i, mdplatform = 0;
 	bool mdmesa = false;
 
 	status = clGetPlatformIDs(0, NULL, &numPlatforms);
@@ -302,20 +303,17 @@ int clDevicesNum(void) {
 			applog(LOG_INFO, "CL Platform %d version: %s", i, pbuff);
 		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
 		if (status != CL_SUCCESS) {
-			applog(LOG_ERR, "Error %d: Getting Device IDs (num)", status);
-			if ((int)i != opt_platform_id)
-				continue;
-			return -1;
+			applog(LOG_INFO, "Error %d: Getting Device IDs (num)", status);
+			continue;
 		}
 		applog(LOG_INFO, "Platform %d devices: %d", i, numDevices);
-		if (numDevices > most_devices) {
+		if ((int)numDevices > most_devices) {
 			most_devices = numDevices;
 			mdplatform = i;
 			mdmesa = strstr(pbuff, "MESA");
 		}
 		if (numDevices) {
 			unsigned int j;
-			char pbuff[256];
 			cl_device_id *devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));
 
 			clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
@@ -1025,7 +1023,7 @@ built:
 			applog(LOG_WARNING, "Your scrypt settings come to %lu", (unsigned long)bufsize);
 		} else
 			bufsize = cgpu->max_alloc;
-		applog(LOG_DEBUG, "Creating scrypt buffer sized %ld", (unsigned long)bufsize);
+		applog(LOG_DEBUG, "Creating scrypt buffer sized %lu", (unsigned long)bufsize);
 		clState->padbufsize = bufsize;
 
 		/* This buffer is weird and might work to some degree even if

+ 83 - 39
poclbm121016.cl → poclbm130302.cl

@@ -13,7 +13,7 @@
 	typedef uint u;
 #endif
 
-__constant uint K[64] = { 
+__constant uint K[87] = {
     0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
     0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
     0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
@@ -21,9 +21,56 @@ __constant uint K[64] = {
     0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
     0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
     0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+
+	0xc19bf3f4U,
+	0x80000000U,
+	0x00000280U,
+	0x00a00055U,
+	0xf377ed68U,
+	0xa54ff53aU,
+	0x08909ae5U,
+	0x90bb1e3cU,
+	0x9b05688cU,
+	0xca0b3af3U,
+	0x3c6ef372U,
+	0xbb67ae85U,
+	0x6a09e667U,
+	0x50c6645bU,
+	0x510e527fU,
+	0x3ac42e24U,
+	0x5807aa98U,
+	0xc19bf274U,
+	0x00a00000U,
+	0x00000100U,
+	0x11002000U,
+	0x00400022U,
+	0x136032edU
 };
 
+#define	xc19bf3f4U	K[64]
+#define	x80000000U	K[65]
+#define	x00000280U	K[66]
+#define	x00a00055U	K[67]
+#define	xf377ed68U	K[68]
+#define	xa54ff53aU	K[69]
+#define	x08909ae5U	K[70]
+#define	x90bb1e3cU	K[71]
+#define	x9b05688cU	K[72]
+#define	xca0b3af3U	K[73]
+#define	x3c6ef372U	K[74]
+#define	xbb67ae85U	K[75]
+#define	x6a09e667U	K[76]
+#define	x50c6645bU	K[77]
+#define	x510e527fU	K[78]
+#define	x3ac42e24U	K[79]
+#define	x5807aa98U	K[80]
+#define	xc19bf274U	K[81]
+#define	x00a00000U	K[82]
+#define	x00000100U	K[83]
+#define	x11002000U	K[84]
+#define	x00400022U	K[85]
+#define	x136032edU	K[86]
 
 // This part is not from the stock poclbm kernel. It's part of an optimization
 // added in the Phoenix Miner.
@@ -183,7 +230,7 @@ Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
 
 Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
 Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=0xC19BF3F4U;
+Vals[5]+=xc19bf3f4U;
 Vals[1]+=Vals[5];
 Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
 Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
@@ -223,7 +270,7 @@ Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
 Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
 
 W[4]=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
-W[4]+=0x80000000U;
+W[4]+=x80000000U;
 Vals[0]+=W[4];
 Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
 Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
@@ -242,7 +289,7 @@ Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
 Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
 
 W[6]=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
-W[6]+=0x00000280U;
+W[6]+=x00000280U;
 Vals[7]+=W[6];
 Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
 Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
@@ -321,7 +368,7 @@ Vals[3]+=Vals[6];
 Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
 Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
 
-W[14]=0x00a00055U;
+W[14]=x00a00055U;
 W[14]+=W[7];
 W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
 Vals[7]+=W[14];
@@ -701,61 +748,58 @@ Vals[5]+=state0;
 W[7]=state7;
 W[7]+=Vals[2];
 
-Vals[2]=0xF377ED68U;
+Vals[2]=xf377ed68U;
 Vals[2]+=Vals[5];
+W[0]=Vals[5];
+Vals[5]=x6a09e667U;
 
 W[3]=state3;
 W[3]+=Vals[0];
 
-Vals[0]=0xa54ff53aU;
+Vals[0]=xa54ff53aU;
 Vals[0]+=Vals[2];
-Vals[2]+=0x08909ae5U;
+Vals[2]+=x08909ae5U;
 
 W[6]=state6;
 W[6]+=Vals[3];
 
-Vals[3]=0x90BB1E3CU;
+Vals[3]=x90bb1e3cU;
 Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=(0x9b05688cU^(Vals[0]&0xca0b3af3U));
+Vals[3]+=(x9b05688cU^(Vals[0]&xca0b3af3U));
 
 Vals[7]+=state1;
 Vals[3]+=Vals[7];
+W[1]=Vals[7];
+Vals[7]=xbb67ae85U;
 
 W[2]=state2;
 W[2]+=Vals[6];
 
-Vals[6]=0x3c6ef372U;
+Vals[6]=x3c6ef372U;
 Vals[6]+=Vals[3];
 Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma2(0xbb67ae85U,Vals[2],0x6a09e667U);
+Vals[3]+=Ma2(Vals[7],Vals[2],Vals[5]);
 
 W[5]=state5;
 W[5]+=Vals[4];
 
-Vals[4]=0x50C6645BU;
+Vals[4]=x50c6645bU;
 Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],0x510e527fU);
+Vals[4]+=ch(Vals[6],Vals[0],x510e527fU);
 Vals[4]+=W[2];
 
-W[1]=Vals[7];
-Vals[7]=0xbb67ae85U;
 Vals[7]+=Vals[4];
 Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma2(0x6a09e667U,Vals[3],Vals[2]);
+Vals[4]+=Ma2(Vals[5],Vals[3],Vals[2]);
 
 W[4]=state4;
 W[4]+=Vals[1];
 
-Vals[1]=0x3AC42E24U;
+Vals[1]=x3ac42e24U;
 Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
 Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
 Vals[1]+=W[3];
-
-W[0]=Vals[5];
-
-Vals[5]=Vals[1];
-Vals[5]+=0x6a09e667U;
-
+Vals[5]+=Vals[1];
 Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
 Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
 
@@ -793,7 +837,7 @@ Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
 
 Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
 Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=0x5807AA98U;
+Vals[2]+=x5807aa98U;
 Vals[0]+=Vals[2];
 Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
 Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
@@ -842,7 +886,7 @@ Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
 
 Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
 Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=0xC19BF274U;
+Vals[5]+=xc19bf274U;
 Vals[1]+=Vals[5];
 Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
 Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
@@ -857,7 +901,7 @@ Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
 Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
 
 W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
-W[1]+=0x00a00000U;
+W[1]+=x00a00000U;
 Vals[3]+=W[1];
 Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
 Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
@@ -907,7 +951,7 @@ Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
 Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
 
 W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
-W[6]+=0x00000100U;
+W[6]+=x00000100U;
 W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
 Vals[7]+=W[6];
 Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
@@ -917,7 +961,7 @@ Vals[4]+=Vals[7];
 Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
 Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
 
-W[7]+=0x11002000U;
+W[7]+=x11002000U;
 W[7]+=W[0];
 W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
 Vals[5]+=W[7];
@@ -928,7 +972,7 @@ Vals[1]+=Vals[5];
 Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
 Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
 
-W[8]=0x80000000U;
+W[8]=x80000000U;
 W[8]+=W[1];
 W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
 Vals[2]+=W[8];
@@ -989,7 +1033,7 @@ Vals[3]+=Vals[6];
 Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
 Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
 
-W[14]=0x00400022U;
+W[14]=x00400022U;
 W[14]+=W[7];
 W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
 Vals[7]+=W[14];
@@ -1000,7 +1044,7 @@ Vals[4]+=Vals[7];
 Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
 Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
 
-W[15]=0x00000100U;
+W[15]=x00000100U;
 W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
 W[15]+=W[8];
 W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
@@ -1325,20 +1369,20 @@ Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
 #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #if defined(VECTORS2) || defined(VECTORS4)
-	if (any(Vals[2] == 0x136032edU)) {
-		if (Vals[2].x == 0x136032edU)
+	if (any(Vals[2] == x136032edU)) {
+		if (Vals[2].x == x136032edU)
 			SETFOUND(nonce.x);
-		if (Vals[2].y == 0x136032edU)
+		if (Vals[2].y == x136032edU)
 			SETFOUND(nonce.y);
 #if defined(VECTORS4)
-		if (Vals[2].z == 0x136032edU)
+		if (Vals[2].z == x136032edU)
 			SETFOUND(nonce.z);
-		if (Vals[2].w == 0x136032edU)
+		if (Vals[2].w == x136032edU)
 			SETFOUND(nonce.w);
 #endif
 	}
 #else
-	if (Vals[2] == 0x136032edU)
+	if (Vals[2] == x136032edU)
 		SETFOUND(nonce);
 #endif
 }

+ 272 - 176
scrypt121016.cl → scrypt130302.cl

@@ -1,18 +1,144 @@
+/*-
+ * Copyright 2009 Colin Percival, 2011 ArtForz, 2011 pooler, 2012 mtrlt,
+ * 2012-2013 Con Kolivas.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * This file was originally written by Colin Percival as part of the Tarsnap
+ * online backup system.
+ */
+
+__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
+__constant uint K[] = {
+	0x428a2f98U,
+	0x71374491U,
+	0xb5c0fbcfU,
+	0xe9b5dba5U,
+	0x3956c25bU,
+	0x59f111f1U,
+	0x923f82a4U,
+	0xab1c5ed5U,
+	0xd807aa98U,
+	0x12835b01U,
+	0x243185beU, // 10
+	0x550c7dc3U,
+	0x72be5d74U,
+	0x80deb1feU,
+	0x9bdc06a7U,
+	0xe49b69c1U,
+	0xefbe4786U,
+	0x0fc19dc6U,
+	0x240ca1ccU,
+	0x2de92c6fU,
+	0x4a7484aaU, // 20
+	0x5cb0a9dcU,
+	0x76f988daU,
+	0x983e5152U,
+	0xa831c66dU,
+	0xb00327c8U,
+	0xbf597fc7U,
+	0xc6e00bf3U,
+	0xd5a79147U,
+	0x06ca6351U,
+	0x14292967U, // 30
+	0x27b70a85U,
+	0x2e1b2138U,
+	0x4d2c6dfcU,
+	0x53380d13U,
+	0x650a7354U,
+	0x766a0abbU,
+	0x81c2c92eU,
+	0x92722c85U,
+	0xa2bfe8a1U,
+	0xa81a664bU, // 40
+	0xc24b8b70U,
+	0xc76c51a3U,
+	0xd192e819U,
+	0xd6990624U,
+	0xf40e3585U,
+	0x106aa070U,
+	0x19a4c116U,
+	0x1e376c08U,
+	0x2748774cU,
+	0x34b0bcb5U, // 50
+	0x391c0cb3U,
+	0x4ed8aa4aU,
+	0x5b9cca4fU,
+	0x682e6ff3U,
+	0x748f82eeU,
+	0x78a5636fU,
+	0x84c87814U,
+	0x8cc70208U,
+	0x90befffaU,
+	0xa4506cebU, // 60
+	0xbef9a3f7U,
+	0xc67178f2U,
+	0x98c7e2a2U,
+	0xfc08884dU,
+	0xcd2a11aeU,
+	0x510e527fU,
+	0x9b05688cU,
+	0xC3910C8EU,
+	0xfb6feee7U,
+	0x2a01a605U, // 70
+	0x0c2e12e0U,
+	0x4498517BU,
+	0x6a09e667U,
+	0xa4ce148bU,
+	0x95F61999U,
+	0xc19bf174U,
+	0xBB67AE85U,
+	0x3C6EF372U,
+	0xA54FF53AU,
+	0x1F83D9ABU, // 80
+	0x5BE0CD19U,
+	0x5C5C5C5CU,
+	0x36363636U,
+	0x80000000U,
+	0x000003FFU,
+	0x00000280U,
+	0x000004a0U,
+	0x00000300U
+};
+
 #define rotl(x,y) rotate(x,y)
 #define Ch(x,y,z) bitselect(z,y,x)
 #define Maj(x,y,z) Ch((x^z),y,z)
 
-#define EndianSwap(n) (rotl(n&0x00FF00FF,24U)|rotl(n&0xFF00FF00,8U))
+#define EndianSwap(n) (rotl(n & ES[0], 24U)|rotl(n & ES[1], 8U))
 
 #define Tr2(x)		(rotl(x, 30U) ^ rotl(x, 19U) ^ rotl(x, 10U))
 #define Tr1(x)		(rotl(x, 26U) ^ rotl(x, 21U) ^ rotl(x, 7U))
 #define Wr2(x)		(rotl(x, 25U) ^ rotl(x, 14U) ^ (x>>3U))
 #define Wr1(x)		(rotl(x, 15U) ^ rotl(x, 13U) ^ (x>>10U))
 
-#define RND(a, b, c, d, e, f, g, h, k)			\
-	h += Tr1(e) + Ch(e, f, g) + k;		\
-	d += h;					\
-	h += Tr2(a) + Maj(a, b, c);
+#define RND(a, b, c, d, e, f, g, h, k)	\
+	h += Tr1(e); 			\
+	h += Ch(e, f, g); 		\
+	h += k;				\
+	d += h;				\
+	h += Tr2(a); 			\
+	h += Maj(a, b, c);
 
 void SHA256(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3)
 {
@@ -31,184 +157,184 @@ void SHA256(uint4*restrict state0,uint4*restrict state1, const uint4 block0, con
 	uint4 W[4];
 
 	W[ 0].x = block0.x;
-	RND(A,B,C,D,E,F,G,H, W[0].x+0x428a2f98U);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[0]);
 	W[ 0].y = block0.y;
-	RND(H,A,B,C,D,E,F,G, W[0].y+0x71374491U);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[1]);
 	W[ 0].z = block0.z;
-	RND(G,H,A,B,C,D,E,F, W[0].z+0xb5c0fbcfU);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[2]);
 	W[ 0].w = block0.w;
-	RND(F,G,H,A,B,C,D,E, W[0].w+0xe9b5dba5U);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[3]);
 
 	W[ 1].x = block1.x;
-	RND(E,F,G,H,A,B,C,D, W[1].x+0x3956c25bU);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[4]);
 	W[ 1].y = block1.y;
-	RND(D,E,F,G,H,A,B,C, W[1].y+0x59f111f1U);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[5]);
 	W[ 1].z = block1.z;
-	RND(C,D,E,F,G,H,A,B, W[1].z+0x923f82a4U);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[6]);
 	W[ 1].w = block1.w;
-	RND(B,C,D,E,F,G,H,A, W[1].w+0xab1c5ed5U);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[7]);
 
 	W[ 2].x = block2.x;
-	RND(A,B,C,D,E,F,G,H, W[2].x+0xd807aa98U);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[8]);
 	W[ 2].y = block2.y;
-	RND(H,A,B,C,D,E,F,G, W[2].y+0x12835b01U);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[9]);
 	W[ 2].z = block2.z;
-	RND(G,H,A,B,C,D,E,F, W[2].z+0x243185beU);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[10]);
 	W[ 2].w = block2.w;
-	RND(F,G,H,A,B,C,D,E, W[2].w+0x550c7dc3U);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[11]);
 
 	W[ 3].x = block3.x;
-	RND(E,F,G,H,A,B,C,D, W[3].x+0x72be5d74U);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[12]);
 	W[ 3].y = block3.y;
-	RND(D,E,F,G,H,A,B,C, W[3].y+0x80deb1feU);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[13]);
 	W[ 3].z = block3.z;
-	RND(C,D,E,F,G,H,A,B, W[3].z+0x9bdc06a7U);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]);
 	W[ 3].w = block3.w;
-	RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[76]);
 
 	W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y);
-	RND(A,B,C,D,E,F,G,H, W[0].x+0xe49b69c1U);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]);
 
 	W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z);
-	RND(H,A,B,C,D,E,F,G, W[0].y+0xefbe4786U);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[16]);
 
 	W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w);
-	RND(G,H,A,B,C,D,E,F, W[0].z+0x0fc19dc6U);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[17]);
 
 	W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x);
-	RND(F,G,H,A,B,C,D,E, W[0].w+0x240ca1ccU);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[18]);
 
 	W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y);
-	RND(E,F,G,H,A,B,C,D, W[1].x+0x2de92c6fU);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[19]);
 
 	W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z);
-	RND(D,E,F,G,H,A,B,C, W[1].y+0x4a7484aaU);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[20]);
 
 	W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w);
-	RND(C,D,E,F,G,H,A,B, W[1].z+0x5cb0a9dcU);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[21]);
 
 	W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x);
-	RND(B,C,D,E,F,G,H,A, W[1].w+0x76f988daU);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[22]);
 
 	W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y);
-	RND(A,B,C,D,E,F,G,H, W[2].x+0x983e5152U);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[23]);
 
 	W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z);
-	RND(H,A,B,C,D,E,F,G, W[2].y+0xa831c66dU);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[24]);
 
 	W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w);
-	RND(G,H,A,B,C,D,E,F, W[2].z+0xb00327c8U);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[25]);
 
 	W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x);
-	RND(F,G,H,A,B,C,D,E, W[2].w+0xbf597fc7U);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[26]);
 
 	W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y);
-	RND(E,F,G,H,A,B,C,D, W[3].x+0xc6e00bf3U);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[27]);
 
 	W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z);
-	RND(D,E,F,G,H,A,B,C, W[3].y+0xd5a79147U);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[28]);
 
 	W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w);
-	RND(C,D,E,F,G,H,A,B, W[3].z+0x06ca6351U);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[29]);
 
 	W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x);
-	RND(B,C,D,E,F,G,H,A, W[3].w+0x14292967U);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[30]);
 
 	W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y);
-	RND(A,B,C,D,E,F,G,H, W[0].x+0x27b70a85U);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[31]);
 
 	W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z);
-	RND(H,A,B,C,D,E,F,G, W[0].y+0x2e1b2138U);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[32]);
 
 	W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w);
-	RND(G,H,A,B,C,D,E,F, W[0].z+0x4d2c6dfcU);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[33]);
 
 	W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x);
-	RND(F,G,H,A,B,C,D,E, W[0].w+0x53380d13U);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[34]);
 
 	W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y);
-	RND(E,F,G,H,A,B,C,D, W[1].x+0x650a7354U);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[35]);
 
 	W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z);
-	RND(D,E,F,G,H,A,B,C, W[1].y+0x766a0abbU);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[36]);
 
 	W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w);
-	RND(C,D,E,F,G,H,A,B, W[1].z+0x81c2c92eU);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[37]);
 
 	W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x);
-	RND(B,C,D,E,F,G,H,A, W[1].w+0x92722c85U);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[38]);
 
 	W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y);
-	RND(A,B,C,D,E,F,G,H, W[2].x+0xa2bfe8a1U);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[39]);
 
 	W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z);
-	RND(H,A,B,C,D,E,F,G, W[2].y+0xa81a664bU);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[40]);
 
 	W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w);
-	RND(G,H,A,B,C,D,E,F, W[2].z+0xc24b8b70U);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[41]);
 
 	W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x);
-	RND(F,G,H,A,B,C,D,E, W[2].w+0xc76c51a3U);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[42]);
 
 	W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y);
-	RND(E,F,G,H,A,B,C,D, W[3].x+0xd192e819U);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[43]);
 
 	W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z);
-	RND(D,E,F,G,H,A,B,C, W[3].y+0xd6990624U);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[44]);
 
 	W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w);
-	RND(C,D,E,F,G,H,A,B, W[3].z+0xf40e3585U);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[45]);
 
 	W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x);
-	RND(B,C,D,E,F,G,H,A, W[3].w+0x106aa070U);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[46]);
 
 	W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y);
-	RND(A,B,C,D,E,F,G,H, W[0].x+0x19a4c116U);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[47]);
 
 	W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z);
-	RND(H,A,B,C,D,E,F,G, W[0].y+0x1e376c08U);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[48]);
 
 	W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w);
-	RND(G,H,A,B,C,D,E,F, W[0].z+0x2748774cU);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[49]);
 
 	W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x);
-	RND(F,G,H,A,B,C,D,E, W[0].w+0x34b0bcb5U);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[50]);
 
 	W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y);
-	RND(E,F,G,H,A,B,C,D, W[1].x+0x391c0cb3U);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[51]);
 
 	W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z);
-	RND(D,E,F,G,H,A,B,C, W[1].y+0x4ed8aa4aU);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[52]);
 
 	W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w);
-	RND(C,D,E,F,G,H,A,B, W[1].z+0x5b9cca4fU);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[53]);
 
 	W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x);
-	RND(B,C,D,E,F,G,H,A, W[1].w+0x682e6ff3U);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[54]);
 
 	W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y);
-	RND(A,B,C,D,E,F,G,H, W[2].x+0x748f82eeU);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[55]);
 
 	W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z);
-	RND(H,A,B,C,D,E,F,G, W[2].y+0x78a5636fU);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[56]);
 
 	W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w);
-	RND(G,H,A,B,C,D,E,F, W[2].z+0x84c87814U);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[57]);
 
 	W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x);
-	RND(F,G,H,A,B,C,D,E, W[2].w+0x8cc70208U);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[58]);
 
 	W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y);
-	RND(E,F,G,H,A,B,C,D, W[3].x+0x90befffaU);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[59]);
 
 	W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z);
-	RND(D,E,F,G,H,A,B,C, W[3].y+0xa4506cebU);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[60]);
 
 	W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w);
-	RND(C,D,E,F,G,H,A,B, W[3].z+0xbef9a3f7U);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[61]);
 
 	W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x);
-	RND(B,C,D,E,F,G,H,A, W[3].w+0xc67178f2U);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[62]);
 	
 #undef A
 #undef B
@@ -237,191 +363,191 @@ void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block
 	uint4 W[4];
 
 	W[0].x = block0.x;
-	D=0x98c7e2a2U+W[0].x;
-	H=0xfc08884dU+W[0].x;
+	D= K[63] +W[0].x;
+	H= K[64] +W[0].x;
 
 	W[0].y = block0.y;
-	C=0xcd2a11aeU+Tr1(D)+Ch(D,0x510e527fU,0x9b05688cU)+W[0].y;
-	G=0xC3910C8EU+C+Tr2(H)+Ch(H,0xfb6feee7U,0x2a01a605U);
+	C= K[65] +Tr1(D)+Ch(D, K[66], K[67])+W[0].y;
+	G= K[68] +C+Tr2(H)+Ch(H, K[69] ,K[70]);
 
 	W[0].z = block0.z;
-	B=0x0c2e12e0U+Tr1(C)+Ch(C,D,0x510e527fU)+W[0].z;
-	F=0x4498517BU+B+Tr2(G)+Maj(G,H,0x6a09e667U);
+	B= K[71] +Tr1(C)+Ch(C,D,K[66])+W[0].z;
+	F= K[72] +B+Tr2(G)+Maj(G,H, K[73]);
 
 	W[0].w = block0.w;
-	A=0xa4ce148bU+Tr1(B)+Ch(B,C,D)+W[0].w; 
-	E=0x95F61999U+A+Tr2(F)+Maj(F,G,H);
+	A= K[74] +Tr1(B)+Ch(B,C,D)+W[0].w;
+	E= K[75] +A+Tr2(F)+Maj(F,G,H);
 
 	W[1].x = block1.x;
-	RND(E,F,G,H,A,B,C,D, W[1].x+0x3956c25bU);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[4]);
 	W[1].y = block1.y;
-	RND(D,E,F,G,H,A,B,C, W[1].y+0x59f111f1U);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[5]);
 	W[1].z = block1.z;
-	RND(C,D,E,F,G,H,A,B, W[1].z+0x923f82a4U);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[6]);
 	W[1].w = block1.w;
-	RND(B,C,D,E,F,G,H,A, W[1].w+0xab1c5ed5U);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[7]);
 	
 	W[2].x = block2.x;
-	RND(A,B,C,D,E,F,G,H, W[2].x+0xd807aa98U);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[8]);
 	W[2].y = block2.y;
-	RND(H,A,B,C,D,E,F,G, W[2].y+0x12835b01U);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[9]);
 	W[2].z = block2.z;
-	RND(G,H,A,B,C,D,E,F, W[2].z+0x243185beU);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[10]);
 	W[2].w = block2.w;
-	RND(F,G,H,A,B,C,D,E, W[2].w+0x550c7dc3U);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[11]);
 	
 	W[3].x = block3.x;
-	RND(E,F,G,H,A,B,C,D, W[3].x+0x72be5d74U);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[12]);
 	W[3].y = block3.y;
-	RND(D,E,F,G,H,A,B,C, W[3].y+0x80deb1feU);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[13]);
 	W[3].z = block3.z;
-	RND(C,D,E,F,G,H,A,B, W[3].z+0x9bdc06a7U);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]);
 	W[3].w = block3.w;
-	RND(B,C,D,E,F,G,H,A, W[3].w+0xc19bf174U);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[76]);
 
 	W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y);
-	RND(A,B,C,D,E,F,G,H, W[0].x+0xe49b69c1U);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]);
 
 	W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z);
-	RND(H,A,B,C,D,E,F,G, W[0].y+0xefbe4786U);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[16]);
 
 	W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w);
-	RND(G,H,A,B,C,D,E,F, W[0].z+0x0fc19dc6U);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[17]);
 
 	W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x);
-	RND(F,G,H,A,B,C,D,E, W[0].w+0x240ca1ccU);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[18]);
 
 	W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y);
-	RND(E,F,G,H,A,B,C,D, W[1].x+0x2de92c6fU);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[19]);
 
 	W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z);
-	RND(D,E,F,G,H,A,B,C, W[1].y+0x4a7484aaU);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[20]);
 
 	W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w);
-	RND(C,D,E,F,G,H,A,B, W[1].z+0x5cb0a9dcU);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[21]);
 
 	W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x);
-	RND(B,C,D,E,F,G,H,A, W[1].w+0x76f988daU);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[22]);
 
 	W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y);
-	RND(A,B,C,D,E,F,G,H, W[2].x+0x983e5152U);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[23]);
 
 	W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z);
-	RND(H,A,B,C,D,E,F,G, W[2].y+0xa831c66dU);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[24]);
 
 	W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w);
-	RND(G,H,A,B,C,D,E,F, W[2].z+0xb00327c8U);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[25]);
 
 	W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x);
-	RND(F,G,H,A,B,C,D,E, W[2].w+0xbf597fc7U);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[26]);
 
 	W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y);
-	RND(E,F,G,H,A,B,C,D, W[3].x+0xc6e00bf3U);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[27]);
 
 	W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z);
-	RND(D,E,F,G,H,A,B,C, W[3].y+0xd5a79147U);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[28]);
 
 	W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w);
-	RND(C,D,E,F,G,H,A,B, W[3].z+0x06ca6351U);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[29]);
 
 	W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x);
-	RND(B,C,D,E,F,G,H,A, W[3].w+0x14292967U);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[30]);
 
 	W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y);
-	RND(A,B,C,D,E,F,G,H, W[0].x+0x27b70a85U);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[31]);
 
 	W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z);
-	RND(H,A,B,C,D,E,F,G, W[0].y+0x2e1b2138U);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[32]);
 
 	W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w);
-	RND(G,H,A,B,C,D,E,F, W[0].z+0x4d2c6dfcU);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[33]);
 
 	W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x);
-	RND(F,G,H,A,B,C,D,E, W[0].w+0x53380d13U);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[34]);
 
 	W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y);
-	RND(E,F,G,H,A,B,C,D, W[1].x+0x650a7354U);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[35]);
 
 	W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z);
-	RND(D,E,F,G,H,A,B,C, W[1].y+0x766a0abbU);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[36]);
 
 	W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w);
-	RND(C,D,E,F,G,H,A,B, W[1].z+0x81c2c92eU);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[37]);
 
 	W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x);
-	RND(B,C,D,E,F,G,H,A, W[1].w+0x92722c85U);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[38]);
 
 	W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y);
-	RND(A,B,C,D,E,F,G,H, W[2].x+0xa2bfe8a1U);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[39]);
 
 	W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z);
-	RND(H,A,B,C,D,E,F,G, W[2].y+0xa81a664bU);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[40]);
 
 	W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w);
-	RND(G,H,A,B,C,D,E,F, W[2].z+0xc24b8b70U);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[41]);
 
 	W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x);
-	RND(F,G,H,A,B,C,D,E, W[2].w+0xc76c51a3U);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[42]);
 
 	W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y);
-	RND(E,F,G,H,A,B,C,D, W[3].x+0xd192e819U);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[43]);
 
 	W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z);
-	RND(D,E,F,G,H,A,B,C, W[3].y+0xd6990624U);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[44]);
 
 	W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w);
-	RND(C,D,E,F,G,H,A,B, W[3].z+0xf40e3585U);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[45]);
 
 	W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x);
-	RND(B,C,D,E,F,G,H,A, W[3].w+0x106aa070U);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[46]);
 
 	W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y);
-	RND(A,B,C,D,E,F,G,H, W[0].x+0x19a4c116U);
+	RND(A,B,C,D,E,F,G,H, W[0].x+ K[47]);
 
 	W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z);
-	RND(H,A,B,C,D,E,F,G, W[0].y+0x1e376c08U);
+	RND(H,A,B,C,D,E,F,G, W[0].y+ K[48]);
 
 	W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w);
-	RND(G,H,A,B,C,D,E,F, W[0].z+0x2748774cU);
+	RND(G,H,A,B,C,D,E,F, W[0].z+ K[49]);
 
 	W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x);
-	RND(F,G,H,A,B,C,D,E, W[0].w+0x34b0bcb5U);
+	RND(F,G,H,A,B,C,D,E, W[0].w+ K[50]);
 
 	W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y);
-	RND(E,F,G,H,A,B,C,D, W[1].x+0x391c0cb3U);
+	RND(E,F,G,H,A,B,C,D, W[1].x+ K[51]);
 
 	W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z);
-	RND(D,E,F,G,H,A,B,C, W[1].y+0x4ed8aa4aU);
+	RND(D,E,F,G,H,A,B,C, W[1].y+ K[52]);
 
 	W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w);
-	RND(C,D,E,F,G,H,A,B, W[1].z+0x5b9cca4fU);
+	RND(C,D,E,F,G,H,A,B, W[1].z+ K[53]);
 
 	W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x);
-	RND(B,C,D,E,F,G,H,A, W[1].w+0x682e6ff3U);
+	RND(B,C,D,E,F,G,H,A, W[1].w+ K[54]);
 
 	W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y);
-	RND(A,B,C,D,E,F,G,H, W[2].x+0x748f82eeU);
+	RND(A,B,C,D,E,F,G,H, W[2].x+ K[55]);
 
 	W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z);
-	RND(H,A,B,C,D,E,F,G, W[2].y+0x78a5636fU);
+	RND(H,A,B,C,D,E,F,G, W[2].y+ K[56]);
 
 	W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w);
-	RND(G,H,A,B,C,D,E,F, W[2].z+0x84c87814U);
+	RND(G,H,A,B,C,D,E,F, W[2].z+ K[57]);
 
 	W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x);
-	RND(F,G,H,A,B,C,D,E, W[2].w+0x8cc70208U);
+	RND(F,G,H,A,B,C,D,E, W[2].w+ K[58]);
 
 	W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y);
-	RND(E,F,G,H,A,B,C,D, W[3].x+0x90befffaU);
+	RND(E,F,G,H,A,B,C,D, W[3].x+ K[59]);
 
 	W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z);
-	RND(D,E,F,G,H,A,B,C, W[3].y+0xa4506cebU);
+	RND(D,E,F,G,H,A,B,C, W[3].y+ K[60]);
 
 	W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w);
-	RND(C,D,E,F,G,H,A,B, W[3].z+0xbef9a3f7U);
+	RND(C,D,E,F,G,H,A,B, W[3].z+ K[61]);
 
 	W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x);
-	RND(B,C,D,E,F,G,H,A, W[3].w+0xc67178f2U);
+	RND(B,C,D,E,F,G,H,A, W[3].w+ K[62]);
 	
 #undef A
 #undef B
@@ -432,8 +558,8 @@ void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block
 #undef G
 #undef H
 
-	*state0 += (uint4)(0x6A09E667U,0xBB67AE85U,0x3C6EF372U,0xA54FF53AU);
-	*state1 += (uint4)(0x510E527FU,0x9B05688CU,0x1F83D9ABU,0x5BE0CD19U);
+	*state0 += (uint4)(K[73], K[77], K[78], K[79]);
+	*state1 += (uint4)(K[66], K[67], K[80], K[81]);
 }
 
 __constant uint fixedW[64] =
@@ -658,7 +784,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
 	for (uint i=0; i<1024; ++i) 
 	{
 		uint4 V[8];
-		uint j = X[7].x & 0x3FF;
+		uint j = X[7].x & K[85];
 		uint y = (j/LOOKUP_GAP);
 #pragma unroll
 		for(uint z=0; z<zSIZE; ++z)
@@ -696,9 +822,9 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
 	uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid);
 	uint4 pad0 = midstate0, pad1 = midstate16;
 
-	SHA256(&pad0,&pad1, data, (uint4)(0x80000000U,0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0,0x280));
-	SHA256_fresh(&ostate0,&ostate1, pad0^0x5C5C5C5CU, pad1^0x5C5C5C5CU, 0x5C5C5C5CU, 0x5C5C5C5CU);
-	SHA256_fresh(&tstate0,&tstate1, pad0^0x36363636U, pad1^0x36363636U, 0x36363636U, 0x36363636U);
+	SHA256(&pad0,&pad1, data, (uint4)(K[84],0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[86]));
+	SHA256_fresh(&ostate0,&ostate1, pad0^ K[82], pad1^ K[82], K[82], K[82]);
+	SHA256_fresh(&tstate0,&tstate1, pad0^ K[83], pad1^ K[83], K[83], K[83]);
 
 	tmp0 = tstate0;
 	tmp1 = tstate1;
@@ -712,46 +838,16 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
 		X[i*2 ] = ostate0;
 		X[i*2+1] = ostate1;
 
-		SHA256(&pad0,&pad1, data, (uint4)(i+1,0x80000000U,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0,0x4a0U));
-		SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U));
+		SHA256(&pad0,&pad1, data, (uint4)(i+1,K[84],0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[87]));
+		SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
 	}
 	scrypt_core(X,padcache);
 	SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]);
 	SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7]);
 	SHA256_fixed(&tmp0,&tmp1);
-	SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U));
+	SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
 
 	bool result = (EndianSwap(ostate1.w) <= target);
 	if (result)
 		SETFOUND(gid);
 }
-
-/*-
- * Copyright 2009 Colin Percival, 2011 ArtForz, 2011 pooler, 2012 mtrlt,
- * 2012 Con Kolivas.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * This file was originally written by Colin Percival as part of the Tarsnap
- * online backup system.
- */

Some files were not shown because too many files changed in this diff