Browse Source

Remove GPU mining code.

Con Kolivas 12 years ago
parent
commit
2b621b6bc1
25 changed files with 76 additions and 10363 deletions
  1. 0 3
      ADL_SDK/.gitignore
  2. 0 3
      ADL_SDK/readme.txt
  3. 0 514
      GPU-README
  4. 7 23
      Makefile.am
  5. 21 125
      README
  6. 0 1442
      adl.c
  7. 0 28
      adl.h
  8. 0 274
      adl_functions.h
  9. 38 344
      cgminer.c
  10. 1 132
      configure.ac
  11. 0 1361
      diablo130302.cl
  12. 0 599
      diakgcn121016.cl
  13. 7 7
      driver-bitforce.c
  14. 0 2
      driver-icarus.c
  15. 1 1
      driver-modminer.c
  16. 0 1591
      driver-opencl.c
  17. 0 35
      driver-opencl.h
  18. 0 234
      findnonce.c
  19. 0 19
      findnonce.h
  20. 1 78
      miner.h
  21. 0 848
      ocl.c
  22. 0 42
      ocl.h
  23. 0 417
      phatk121016.cl
  24. 0 1388
      poclbm130302.cl
  25. 0 853
      scrypt130511.cl

+ 0 - 3
ADL_SDK/.gitignore

@@ -1,3 +0,0 @@
-adl_defines.h
-adl_sdk.h
-adl_structures.h

+ 0 - 3
ADL_SDK/readme.txt

@@ -1,3 +0,0 @@
-Please insert AMD ADL files adl_defines.h adl_sdk.h adl_structures.h here.
-They can be found here:
-http://developer.amd.com/tools/graphics-development/display-library-adl-sdk/

+ 0 - 514
GPU-README

@@ -1,514 +0,0 @@
-EXECUTIVE SUMMARY ON GPU USAGE (SEE ALSO SCRYPT-README FOR SCRYPT MINING):
-
-Single pool, regular desktop:
-
-cgminer -o http://pool:port -u username -p password
-
-By default if you have configured your system properly, cgminer will mine on
-ALL GPUs, but in "dynamic" mode which is designed to keep your system usable
-and sacrifice some mining performance.
-
-Single pool, dedicated miner:
-
-cgminer -o http://pool:port -u username -p password -I 9
-
-Single pool, first card regular desktop, 3 other dedicated cards:
-
-cgminer -o http://pool:port -u username -p password -I d,9,9,9
-
-Multiple pool, dedicated miner:
-
-cgminer -o http://pool1:port -u pool1username -p pool1password -o http://pool2:port -u pool2usernmae -p pool2password -I 9
-
-Add overclocking settings, GPU and fan control for all cards:
-
-cgminer -o http://pool:port -u username -p password -I 9 --auto-fan --auto-gpu --gpu-engine 750-950 --gpu-memclock 300
-
-Add overclocking settings, GPU and fan control with different engine settings for 4 cards:
-
-cgminer -o http://pool:port -u username -p password -I 9 --auto-fan --auto-gpu --gpu-engine 750-950,945,700-930,960 --gpu-memclock 300
-
-READ WARNINGS AND DOCUMENTATION BELOW ABOUT OVERCLOCKING
-
-To configure multiple displays on linux you need to configure your Xorg cleanly
-to use them all:
-
-sudo aticonfig --adapter=all -f --initial
-
-On Linux you virtually always need to export your display settings before
-starting to get all the cards recognised and/or temperature+clocking working:
-
-export DISPLAY=:0
-
----
-BUILDING FOR GPU SUPPORT:
-
-	To build with GPU mining support:
-	Install AMD APP sdk, ideal version (see FAQ!) - no official place to
-	install it so just keep track of where it is if you're not installing
-	the include files and library files into the system directory.
-	(Do NOT install the ati amd sdk if you are on nvidia.)
-	To build with GPU monitoring & clocking support:
-	Extract the AMD ADL SDK, latest version - there is also no official
-	place for these files. Copy all the *.h files in the "include"
-	directory into cgminer's ADL_SDK directory.
-
-The easiest way to install the ATI AMD SPP sdk on linux is to actually put it
-into a system location. Then building will be simpler. Download the correct
-version for either 32 bit or 64 bit from here:
-	http://developer.amd.com/tools/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/downloads/
-
-The best version for Radeon 5xxx and 6xxx is v2.5, while 7xxx cards need
-v2.6 or later, 2.7 seems the best.
-
-For versions 2.4 or earlier you will need to manually install them:
-This will give you a file with a name like:
- AMD-APP-SDK-v2.4-lnx64.tgz (64-bit)
-or
- AMD-APP-SDK-v2.4-lnx32.tgz (32-bit)
-
-Then:
-
-sudo su
-cd /opt
-tar xf /path/to/AMD-APP-SDK-v2.4-lnx##.tgz
-cd /
-tar xf /opt/AMD-APP-SDK-v2.4-lnx##/icd-registration.tgz
-ln -s /opt/AMD-APP-SDK-v2.4-lnx##/include/CL /usr/include
-ln -s /opt/AMD-APP-SDK-v2.4-lnx##/lib/x86_64/* /usr/lib/
-ldconfig
-
-Where ## is 32 or 64, depending on the bitness of the SDK you downloaded.
-If you are on 32 bit, x86_64 in the 2nd last line should be x86
-
-Basic *nix build instructions:
-	CFLAGS="-O2 -Wall -march=native" ./configure <options>
-	or if you haven't installed the AMD files in system locations:
-	CFLAGS="-O2 -Wall -march=native -I<path to AMD APP include>" LDFLAGS="-L<path to AMD APP lib/x86_64> ./configure <options>
-	make
-
-	If it finds the opencl files it will inform you with
-	"OpenCL: FOUND. GPU mining support enabled."
-
-
----
-INTENSITY INFORMATION:
-
-Intensity correlates with the size of work being submitted at any one time to
-a GPU. The higher the number the larger the size of work. Generally speaking
-finding an optimal value rather than the highest value is the correct approach
-as hash rate rises up to a point with higher intensities but above that, the
-device may be very slow to return responses, or produce errors.
-
-NOTE: Running BTC intensities above 9 with current hardware is likely to only
-diminish return performance even if the hash rate might appear better. A good
-starting baseline intensity to try on dedicated miners is 9. 11 is the upper
-limit for intensity while BTC mining, if the GPU_USE_SYNC_OBJECTS variable
-is set (see FAQ). The upper limit for sha256 mining is 14 and 20 for scrypt.
-
-
----
-OVERCLOCKING WARNING AND INFORMATION
-
-AS WITH ALL OVERCLOCKING TOOLS YOU ARE ENTIRELY RESPONSIBLE FOR ANY HARM YOU
-MAY CAUSE TO YOUR HARDWARE. OVERCLOCKING CAN INVALIDATE WARRANTIES, DAMAGE
-HARDWARE AND EVEN CAUSE FIRES. THE AUTHOR ASSUMES NO RESPONSIBILITY FOR ANY
-DAMAGE YOU MAY CAUSE OR UNPLANNED CHILDREN THAT MAY OCCUR AS A RESULT.
-
-The GPU monitoring, clocking and fanspeed control incorporated into cgminer
-comes through use of the ATI Display Library. As such, it only supports ATI
-GPUs. Even if ADL support is successfully built into cgminer, unless the card
-and driver supports it, no GPU monitoring/settings will be available.
-
-Cgminer supports initial setting of GPU engine clock speed, memory clock
-speed, voltage, fanspeed, and the undocumented powertune feature of 69x0+ GPUs.
-The setting passed to cgminer is used by all GPUs unless separate values are
-specified. All settings can all be changed within the menu on the fly on a
-per-GPU basis.
-
-For example:
---gpu-engine 950 --gpu-memclock 825
-
-will try to set all GPU engine clocks to 950 and all memory clocks to 825,
-while:
---gpu-engine 950,945,930,960 --gpu-memclock 300
-
-will try to set the engine clock of card 0 to 950, 1 to 945, 2 to 930, 3 to
-960 and all memory clocks to 300.
-
-AUTO MODES:
-There are two "auto" modes in cgminer, --auto-fan and --auto-gpu. These can
-be used independently of each other and are complementary. Both auto modes
-are designed to safely change settings while trying to maintain a target
-temperature. By default this is set to 75 degrees C but can be changed with:
-
---temp-target
-e.g.
---temp-target 80
-Sets all cards' target temperature to 80 degrees.
-
---temp-target 75,85
-Sets card 0 target temperature to 75, and card 1 to 85 degrees.
-
-AUTO FAN:
-e.g.
---auto-fan (implies 85% upper limit)
---gpu-fan 25-85,65 --auto-fan
-
-Fan control in auto fan works off the theory that the minimum possible fan
-required to maintain an optimal temperature will use less power, make less
-noise, and prolong the life of the fan. In auto-fan mode, the fan speed is
-limited to 85% if the temperature is below "overheat" intentionally, as
-higher fanspeeds on GPUs do not produce signficantly more cooling, yet
-significanly shorten the lifespan of the fans. If temperature reaches the
-overheat value, fanspeed will still be increased to 100%. The overheat value
-is set to 85 degrees by default and can be changed with:
-
---temp-overheat
-e.g.
---temp-overheat 75,85
-Sets card 0 overheat threshold to 75 degrees and card 1 to 85.
-
-AUTO GPU:
-e.g.
---auto-gpu --gpu-engine 750-950
---auto-gpu --gpu-engine 750-950,945,700-930,960
-
-GPU control in auto gpu tries to maintain as high a clock speed as possible
-while not reaching overheat temperatures. As a lower clock speed limit,
-the auto-gpu mode checks the GPU card's "normal" clock speed and will not go
-below this unless you have manually set a lower speed in the range. Also,
-unless a higher clock speed was specified at startup, it will not raise the
-clockspeed. If the temperature climbs, fanspeed is adjusted and optimised
-before GPU engine clockspeed is adjusted. If fan speed control is not available
-or already optimal, then GPU clock speed is only decreased if it goes over
-the target temperature by the hysteresis amount, which is set to 3 by default
-and can be changed with:
---temp-hysteresis
-If the temperature drops below the target temperature, and engine clock speed
-is not at the highest level set at startup, cgminer will raise the clock speed.
-If at any time you manually set an even higher clock speed successfully in
-cgminer, it will record this value and use it as its new upper limit (and the
-same for low clock speeds and lower limits). If the temperature goes over the
-cutoff limit (95 degrees by default), cgminer will completely disable the GPU
-from mining and it will not be re-enabled unless manually done so. The cutoff
-temperature can be changed with:
-
---temp-cutoff
-e.g.
---temp-cutoff 95,105
-Sets card 0 cutoff temperature to 95 and card 1 to 105.
-
---gpu-memdiff -125
-This setting will modify the memory speed whenever the GPU clock speed is
-modified by --auto-gpu. In this example, it will set the memory speed to
-be 125 Mhz lower than the GPU speed. This is useful for some cards like the
-6970 which normally don't allow a bigger clock speed difference. The 6970 is
-known to only allow -125, while the 7970 only allows -150.
-
-
-CHANGING SETTINGS:
-When setting values, it is important to realise that even though the driver
-may report the value was changed successfully, and the new card power profile
-information contains the values you set it to, that the card itself may
-refuse to use those settings. As the performance profile changes dynamically,
-querying the "current" value on the card can be wrong as well. So when changing
-values in cgminer, after a pause of 1 second, it will report to you the current
-values where you should check that your change has taken. An example is that
-6970 reference cards will accept low memory values but refuse to actually run
-those lower memory values unless they're within 125 of the engine clock speed.
-In that scenario, they usually set their real speed back to their default.
-
-Cgminer reports the so-called "safe" range of whatever it is you are modifying
-when you ask to modify it on the fly. However, you can change settings to values
-outside this range. Despite this, the card can easily refuse to accept your
-changes, or worse, to accept your changes and then silently ignore them. So
-there is absolutely to know how far to/from where/to it can set things safely or
-otherwise, and there is nothing stopping you from at least trying to set them
-outside this range. Being very conscious of these possible failures is why
-cgminer will report back the current values for you to examine how exactly the
-card has responded. Even within the reported range of accepted values by the
-card, it is very easy to crash just about any card, so it cannot use those
-values to determine what range to set. You have to provide something meaningful
-manually for cgminer to work with through experimentation.
-
-STARTUP / SHUTDOWN:
-When cgminer starts up, it tries to read off the current profile information
-for clock and fan speeds and stores these values. When quitting cgminer, it
-will then try to restore the original values. Changing settings outside of
-cgminer while it's running may be reset to the startup cgminer values when
-cgminer shuts down because of this.
-
----
-
-GPU DEVICE ISSUES and use of --gpu-map
-
-GPUs mine with OpenCL software via the GPU device driver. This means you need
-to have both an OpenCL SDK installed, and the GPU device driver RUNNING (i.e.
-Xorg up and running configured for all devices that will mine on linux etc.)
-Meanwhile, the hardware monitoring that cgminer offers for AMD devices relies
-on the ATI Display Library (ADL) software to work. OpenCL DOES NOT TALK TO THE
-ADL. There is no 100% reliable way to know that OpenCL devices are identical
-to the ADL devices, as neither give off the same information. cgminer does its
-best to correlate these devices based on the order that OpenCL and ADL numbers
-them. It is possible that this will fail for the following reasons:
-
-1. The device order is listed differently by OpenCL and ADL (rare), even if the
-number of devices is the same.
-2. There are more OpenCL devices than ADL. OpenCL stupidly sees one GPU as two
-devices if you have two monitors connected to the one GPU.
-3. There are more ADL devices than OpenCL. ADL devices include any ATI GPUs,
-including ones that can't mine, like some older R4xxx cards.
-
-To cope with this, the ADVANCED option for --gpu-map is provided with cgminer.
-DO NOT USE THIS UNLESS YOU KNOW WHAT YOU ARE DOING. The default will work the
-vast majority of the time unless you know you have a problem already.
-
-To get useful information, start cgminer with just the -n option. You will get
-output that looks like this:
-
-[2012-04-25 13:17:34] CL Platform 0 vendor: Advanced Micro Devices, Inc.
-[2012-04-25 13:17:34] CL Platform 0 name: AMD Accelerated Parallel Processing
-[2012-04-25 13:17:34] CL Platform 0 version: OpenCL 1.1 AMD-APP (844.4)
-[2012-04-25 13:17:34] Platform 0 devices: 3
-[2012-04-25 13:17:34]   0       Tahiti
-[2012-04-25 13:17:34]   1       Tahiti
-[2012-04-25 13:17:34]   2       Cayman
-[2012-04-25 13:17:34] GPU 0 AMD Radeon HD 7900 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 1 AMD Radeon HD 7900 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 2 AMD Radeon HD 6900 Series hardware monitoring enabled
-[2012-04-25 13:17:34] 3 GPU devices max detected
-
-Note the number of devices here match, and the order is the same. If devices 1
-and 2 were different between Tahiti and Cayman, you could run cgminer with:
---gpu-map 2:1,1:2
-And it would swap the monitoring it received from ADL device 1 and put it to
-opencl device 2 and vice versa.
-
-If you have 2 monitors connected to the first device it would look like this:
-
-[2012-04-25 13:17:34] Platform 0 devices: 4
-[2012-04-25 13:17:34]   0       Tahiti
-[2012-04-25 13:17:34]   1       Tahiti
-[2012-04-25 13:17:34]   2       Tahiti
-[2012-04-25 13:17:34]   3       Cayman
-[2012-04-25 13:17:34] GPU 0 AMD Radeon HD 7900 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 1 AMD Radeon HD 7900 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 2 AMD Radeon HD 6900 Series hardware monitoring enabled
-
-To work around this, you would use:
--d 0 -d 2 -d 3 --gpu-map 2:1,3:2
-
-If you have an older card as well as the rest it would look like this:
-
-[2012-04-25 13:17:34] Platform 0 devices: 3
-[2012-04-25 13:17:34]   0       Tahiti
-[2012-04-25 13:17:34]   1       Tahiti
-[2012-04-25 13:17:34]   2       Cayman
-[2012-04-25 13:17:34] GPU 0 AMD Radeon HD 4500 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 1 AMD Radeon HD 7900 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 2 AMD Radeon HD 7900 Series  hardware monitoring enabled
-[2012-04-25 13:17:34] GPU 3 AMD Radeon HD 6900 Series hardware monitoring enabled
-
-To work around this you would use:
---gpu-map 0:1,1:2,2:3
-
-
----
-GPU FAQ:
-
-Q: Can I change the intensity settings individually for each GPU?
-A: Yes, pass a list separated by commas such as -I d,4,9,9
-
-Q: The CPU usage is high.
-A: The ATI drivers after 11.6 have a bug that makes them consume 100% of one
-CPU core unnecessarily so downgrade to 11.6. Binding cgminer to one CPU core on
-windows can minimise it to 100% (instead of more than one core). Driver version
-11.11 on linux and 11.12 on windows appear to have fixed this issue. Note that
-later drivers may have an apparent return of high CPU usage. Try
-'export GPU_USE_SYNC_OBJECTS=1' on Linux before starting cgminer. You can also
-set this variable in windows via a batch file or on the command line before
-starting cgminer with 'setx GPU_USE_SYNC_OBJECTS 1'
-
-Q: My GPU hangs and I have to reboot it to get it going again?
-A: The more aggressively the mining software uses your GPU, the less overclock
-you will be able to run. You are more likely to hit your limits with cgminer
-and you will find you may need to overclock your GPU less aggressively. The
-software cannot be responsible and make your GPU hang directly. If you simply
-cannot get it to ever stop hanging, try decreasing the intensity, and if even
-that fails, try changing to the poclbm kernel with -k poclbm, though you will
-sacrifice performance. cgminer is designed to try and safely restart GPUs as
-much as possible, but NOT if that restart might actually crash the rest of the
-GPUs mining, or even the machine. It tries to restart them with a separate
-thread and if that separate thread dies, it gives up trying to restart any more
-GPUs.
-
-Q: Can you change the autofan/autogpu to change speeds in a different manner?
-A: The defaults are sane and safe. I'm not interested in changing them
-further. The starting fan speed is set to 50% in auto-fan mode as a safety
-precaution.
-
-Q: I upgraded cgminer version and my hashrate suddenly dropped!
-A: No, you upgraded your SDK version unwittingly between upgrades of cgminer
-and that caused  your hashrate to drop. See the next question.
-
-Q: I upgraded my ATI driver/SDK/cgminer and my hashrate suddenly dropped!
-A: The hashrate performance in cgminer is tied to the version of the ATI SDK
-that is installed only for the very first time cgminer is run. This generates
-binaries that are used by the GPU every time after that. Any upgrades to the
-SDK after that time will have no effect on the binaries. However, if you
-install a fresh version of cgminer, and have since upgraded your SDK, new
-binaries will be built. It is known that the 2.6 ATI SDK has a huge hashrate
-penalty on generating new binaries. It is recommended to not use this SDK at
-this time unless you are using an ATI 7xxx card that needs it.
-
-Q: Which AMD SDK is the best for cgminer?
-A: At the moment, versions 2.4 and 2.5 work the best for R5xxx and R6xxx GPUS.
-SDK 2.6 or 2.7 works best for R7xxx. SDK 2.8 is known to have many problems.
-If you are need to use the 2.6+ SDK or R7xxx or later, the phatk kernel will
-perform poorly, while the diablo or my custom modified poclbm kernel are
-optimised for it.
-
-Q: Which AMD driver is the best?
-A: Unfortunately AMD has a history of having quite a few releases with issues
-when it comes to mining, either in terms of breaking mining, increasing CPU
-usage or very low hashrates. Only experimentation can tell you for sure, but
-some good releases were 11.6, 11.12, 12.4 and 12.8. Note that older cards may
-not work with the newer drivers.
-
-Q: I have multiple SDKs installed, can I choose which one it uses?
-A: Run cgminer with the -n option and it will list all the platforms currently
-installed. Then you can tell cgminer which platform to use with --gpu-platform.
-
-Q: cgminer reports no devices or only one device on startup on Linux although
-I have multiple devices and drivers+SDK installed properly?
-A: Try "export DISPLAY=:0" before running cgminer.
-
-Q: cgminer crashes immediately on startup.
-A: One of the common reasons for this is that you have mixed files on your
-machine for the driver or SDK. Windows has a nasty history of not cleanly
-uninstalling files so you may have to use third party tools like driversweeper
-to remove old versions. The other common reason for this is windows
-antivirus software is disabling one of the DLLs from working. If cgminer
-starts with the -T option but never starts without it, this is a sure fire
-sign you have this problem and will have to disable your antivirus or make
-exceptions.
-
-Q: Cgminer cannot see any of my GPUs even though I have configured them all
-to be enabled and installed OpenCL (+/- Xorg is running and the DISPLAY
-variable is exported on linux)?
-A: Check the output of 'cgminer -n', it will list what OpenCL devices your
-installed SDK recognises. If it lists none, you have a problem with your
-version or installation of the SDK.
-
-Q: Cgminer is mining on the wrong GPU, I want it on the AMD but it's mining
-on my on board GPU?
-A: Make sure the AMD OpenCL SDK is installed, check the output of 'cgminer -n'
-and use the appropriate parameter with --gpu-platform.
-
-Q: I'm getting much lower hashrates than I should be for my GPU?
-A: Look at your driver/SDK combination and disable power saving options for
-your GPU. Specifically look to disable ULPS. Make sure not to set intensity
-above 11 for BTC mining.
-
-Q: Can I mine with AMD while running Nvidia or Intel GPUs at the same time?
-A: If you can install both drivers successfully (easier on windows) then
-yes, using the --gpu-platform option.
-
-Q: Can I mine with Nvidia or Intel GPUs?
-A: Yes but their hashrate is very poor and likely you'll be using much more
-energy than you'll be earning in coins.
-
-Q: Can I mine on both Nvidia and AMD GPUs at the same time?
-A: No, you must run one instance of cgminer with the --gpu-platform option for
-each.
-
-Q: Can I mine on Linux without running Xorg?
-A: With Nvidia you can, but with AMD you cannot.
-
-Q: I can't get anywhere near enough hashrate for scrypt compared to other
-people?
-A: You may not have enough system RAM as this is also required.
-
-Q: My scrypt hashrate is high but the pool reports only a tiny proportion of
-my hashrate?
-A: You are generating garbage hashes due to your choice of settings. Your
-Work Utility (WU) value will confirm you are not generating garbage. You
-should be getting about .9WU per kHash. If not, then try decreasing your
-intensity, do not increase the number of gpu-threads, and consider adding
-system RAM to match your GPU ram. You may also be using a bad combination
-of driver and/or SDK. If you are getting a lot more HW errors with the
-current version of cgminer but were not on an older version, chances are that
-the older version simply wasn't reporting them so going back to and older
-version is not a real solution.
-
-Q: Scrypt fails to initialise the kernel every time?
-A: Your parameters are too high. Don't add GPU threads, don't set intensity
-too high, decrease thread concurrency. See the SCRYPT-README for a lot more
-help.
-
-Q: Cgminer stops mining (or my GPUs go DEAD) and I can't close it?
-A: Once the driver has crashed, there is no way for cgminer to close cleanly.
-You will have to kill it, and depending on how corrupted your driver state
-has gotten, you may even need to reboot. Windows is known to reset drivers
-when they fail and cgminer will be stuck trying to use the old driver instance.
-GPUs going SICK or DEAD is a sign of overclocking too much, overheating,
-driver or hardware instability.
-
-Q: I can't get any monitoring of temperatures or fanspeed with cgminer when
-I start it remotely?
-A: With linux, make sure to export the DISPLAY variable. On windows, you
-cannot access these monitoring values via RDP. This should work with tightVNC
-or teamviewer though.
-
-Q: I change my GPU engine/memory/voltage and cgminer reports back no change?
-A: Cgminer asks the GPU using the ATI Display Library to change settings, but
-the driver and hardware are free to do what it wants with that query, including
-ignoring it. Some GPUs are locked with one or more of those properties as well.
-The most common of these is that many GPUs only allow a fixed difference
-between the engine clock speed and the memory clock speed (such as the memory
-being no lower than the engine - 150). Other 3rd party tools have unofficial
-data on these devices on windows and can get the memory clock speed down
-further but cgminer does not have access to these means.
-
-Q: I have multiple GPUs and although many devices show up, it appears to be
-working only on one GPU splitting it up.
-A: Your driver setup is failing to properly use the accessory GPUs. Your
-driver may be configured wrong or you have a driver version that needs a dummy
-plug on all the GPUs that aren't connected to a monitor.
-
-Q: Should I use crossfire/SLI?
-A: It does not benefit mining at all and depending on the GPU may actually
-worsen performance.
-
-Q: I have some random GPU performance related problem not addressed above.
-A: Seriously, it's the driver and/or SDK. Uninstall them and start again,
-noting there is no clean way to uninstall them so you have to use extra tools
-or do it manually.
-
-Q: Do I need to recompile after updating my driver/SDK?
-A: No. The software is unchanged regardless of which driver/SDK/ADL_SDK version
-you are running. However if you change SDKs you should delete any generated
-.bin files for them to be recreated with the new SDK.
-
-Q: I do not want cgminer to modify my engine/clock/fanspeed?
-A: Cgminer only modifies values if you tell it to via some parameters.
-Otherwise it will just monitor the values.
-
-Q: Cgminer does not disable my GPU even though it hit the overheat temperature?
-A: It only disables GPUs if you enable the --auto-gpu option. If you don't give
-it parameters for engine clock it will not adjust engine clocks with this
-option.
-
-Q: Can I use the open source radeon driver for AMD GPUs or the nouveau driver
-for NVIDIA GPUs?
-A: None of them currently support OpenCL, so no you cannot.
-
----
-
-This code is provided entirely free of charge by the programmer in his spare
-time so donations would be greatly appreciated. Please consider donating to the
-address below.
-
-Con Kolivas <kernel@kolivas.org>
-15qSxP1SQcUX3o4nhkfdbgyoWEFMomJ4rZ

+ 7 - 23
Makefile.am

@@ -9,12 +9,12 @@ else
 USBUTILS_INCLUDES =
 USBUTILS_INCLUDES =
 endif
 endif
 
 
-EXTRA_DIST	= example.conf m4/gnulib-cache.m4 linux-usb-cgminer \
-		  ADL_SDK/readme.txt api-example.php miner.php	\
+EXTRA_DIST	= example.conf linux-usb-cgminer \
+		  api-example.php miner.php	\
 		  API.class API.java api-example.c windows-build.txt \
 		  API.class API.java api-example.c windows-build.txt \
-		  bitstreams/* API-README FPGA-README SCRYPT-README \
+		  bitstreams/* API-README FPGA-README \
 		  bitforce-firmware-flash.c hexdump.c ASIC-README \
 		  bitforce-firmware-flash.c hexdump.c ASIC-README \
-		  01-cgminer.rules GPU-README
+		  01-cgminer.rules
 
 
 SUBDIRS		= lib compat ccan
 SUBDIRS		= lib compat ccan
 
 
@@ -24,18 +24,16 @@ bin_PROGRAMS	= cgminer
 
 
 cgminer_LDFLAGS	= $(PTHREAD_FLAGS)
 cgminer_LDFLAGS	= $(PTHREAD_FLAGS)
 cgminer_LDADD	= $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \
 cgminer_LDADD	= $(DLOPEN_FLAGS) @LIBCURL_LIBS@ @JANSSON_LIBS@ @PTHREAD_LIBS@ \
-		  @OPENCL_LIBS@ @NCURSES_LIBS@ @PDCURSES_LIBS@ @WS2_LIBS@ \
+		  @NCURSES_LIBS@ @PDCURSES_LIBS@ @WS2_LIBS@ \
 		  @LIBUSB_LIBS@ @MM_LIBS@ @RT_LIBS@ \
 		  @LIBUSB_LIBS@ @MM_LIBS@ @RT_LIBS@ \
 		  @MATH_LIBS@ lib/libgnu.a ccan/libccan.a
 		  @MATH_LIBS@ lib/libgnu.a ccan/libccan.a
 
 
 if HAVE_WINDOWS
 if HAVE_WINDOWS
-cgminer_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib @OPENCL_FLAGS@
+cgminer_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib
 else
 else
-cgminer_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib @OPENCL_FLAGS@ @LIBCURL_CFLAGS@
+cgminer_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib @LIBCURL_CFLAGS@
 endif
 endif
 
 
-cgminer_CPPFLAGS += $(ADL_CPPFLAGS)
-
 # common sources
 # common sources
 cgminer_SOURCES := cgminer.c
 cgminer_SOURCES := cgminer.c
 
 
@@ -45,20 +43,6 @@ cgminer_SOURCES	+= elist.h miner.h compat.h bench_block.h	\
 
 
 cgminer_SOURCES	+= logging.c
 cgminer_SOURCES	+= logging.c
 
 
-if HAS_OPENCL
-bin_SCRIPTS	= $(top_srcdir)/*.cl
-
-cgminer_SOURCES += driver-opencl.h driver-opencl.c
-cgminer_SOURCES += ocl.c ocl.h findnonce.c findnonce.h
-cgminer_SOURCES += adl.c adl.h adl_functions.h
-cgminer_SOURCES += *.cl
-
-if HAS_SCRYPT
-cgminer_SOURCES += scrypt.c scrypt.h
-endif
-
-endif
-
 if NEED_FPGAUTILS
 if NEED_FPGAUTILS
 cgminer_SOURCES += fpgautils.c fpgautils.h
 cgminer_SOURCES += fpgautils.c fpgautils.h
 endif
 endif

+ 21 - 125
README

@@ -1,6 +1,4 @@
-This is a multi-threaded multi-pool GPU, FPGA and ASIC miner with ATI GPU
-monitoring, (over)clocking and fanspeed support for bitcoin and derivative
-coins. Do not use on multiple block chains at the same time!
+This is a multi-threaded multi-pool FPGA and ASIC miner for bitcoin.
 
 
 This code is provided entirely free of charge by the programmer in his spare
 This code is provided entirely free of charge by the programmer in his spare
 time so donations would be greatly appreciated. Please consider donating to the
 time so donations would be greatly appreciated. Please consider donating to the
@@ -27,8 +25,7 @@ irc://irc.freenode.net/cgminer
 
 
 License: GPLv3.  See COPYING for details.
 License: GPLv3.  See COPYING for details.
 
 
-SEE ALSO API-README, ASIC-README, FGPA-README, GPU-README AND SCRYPT-README FOR
-MORE INFORMATION ON EACH.
+SEE ALSO API-README, ASIC-README and FGPA-README FOR MORE INFORMATION ON EACH.
 
 
 ---
 ---
 
 
@@ -91,12 +88,6 @@ Optional:
 	curses dev library
 	curses dev library
 	(libncurses5-dev or libpdcurses on WIN32 for text user interface)
 	(libncurses5-dev or libpdcurses on WIN32 for text user interface)
 
 
-	AMD APP SDK		http://developer.amd.com/sdks/AMDAPPSDK
-	(This sdk is mandatory for GPU mining)
-
-	AMD ADL SDK		http://developer.amd.com/sdks/ADLSDK
-	(This sdk is mandatory for ATI GPU monitoring & clocking)
-
 	libudev dev library (libudev-dev)
 	libudev dev library (libudev-dev)
 	(This is only required for ASIC+FPGA support and is linux only)
 	(This is only required for ASIC+FPGA support and is linux only)
 
 
@@ -109,10 +100,6 @@ If building on Red Hat:
                          curl libcurl libcurl-devel openssh
                          curl libcurl libcurl-devel openssh
 
 
 CGMiner specific configuration options:
 CGMiner specific configuration options:
-  --enable-opencl         Enable support for GPU mining with opencl
-  --disable-adl           Override detection and disable building with adl
-  --enable-scrypt         Compile support for scrypt litecoin mining (default
-                          disabled)
   --enable-avalon         Compile support for Avalon (default disabled)
   --enable-avalon         Compile support for Avalon (default disabled)
   --enable-bflsc          Compile support for BFL ASICs (default disabled)
   --enable-bflsc          Compile support for BFL ASICs (default disabled)
   --enable-bitforce       Compile support for BitForce FPGAs (default
   --enable-bitforce       Compile support for BitForce FPGAs (default
@@ -161,8 +148,6 @@ Options for both config file and command line:
 --api-mcast-code <arg> Code expected in the API Multicast message, don't use '-' (default: "FTW")
 --api-mcast-code <arg> Code expected in the API Multicast message, don't use '-' (default: "FTW")
 --api-mcast-port <arg> API Multicast listen port, (default: 4028)
 --api-mcast-port <arg> API Multicast listen port, (default: 4028)
 --api-port          Port number of miner API (default: 4028)
 --api-port          Port number of miner API (default: 4028)
---auto-fan          Automatically adjust all GPU fan speeds to maintain a target temperature
---auto-gpu          Automatically adjust all GPU engine clock speeds to maintain a target temperature
 --balance           Change multipool strategy from failover to even share balance
 --balance           Change multipool strategy from failover to even share balance
 --benchmark         Run cgminer in benchmark mode - produces no shares
 --benchmark         Run cgminer in benchmark mode - produces no shares
 --compact           Use compact display without per device statistics
 --compact           Use compact display without per device statistics
@@ -173,7 +158,7 @@ Options for both config file and command line:
 --failover-only     Don't leak work to backup pools when primary pool is lagging
 --failover-only     Don't leak work to backup pools when primary pool is lagging
 --fix-protocol      Do not redirect to a different getwork protocol (eg. stratum)
 --fix-protocol      Do not redirect to a different getwork protocol (eg. stratum)
 --hotplug <arg>     Set hotplug check time to <arg> seconds (0=never default: 5) - only with libusb
 --hotplug <arg>     Set hotplug check time to <arg> seconds (0=never default: 5) - only with libusb
---kernel-path|-K <arg> Specify a path to where bitstream and kernel files are (default: "/usr/local/bin")
+--kernel-path|-K <arg> Specify a path to where bitstream files are (default: "/usr/local/bin")
 --load-balance      Change multipool strategy from failover to quota based balance
 --load-balance      Change multipool strategy from failover to quota based balance
 --log|-l <arg>      Interval in seconds between log output (default: 5)
 --log|-l <arg>      Interval in seconds between log output (default: 5)
 --lowmem            Minimise caching of shares for low memory applications
 --lowmem            Minimise caching of shares for low memory applications
@@ -192,7 +177,6 @@ Options for both config file and command line:
 --scan-time|-s <arg> Upper bound on time spent scanning current work, in seconds (default: 60)
 --scan-time|-s <arg> Upper bound on time spent scanning current work, in seconds (default: 60)
 --sched-start <arg> Set a time of day in HH:MM to start mining (a once off without a stop time)
 --sched-start <arg> Set a time of day in HH:MM to start mining (a once off without a stop time)
 --sched-stop <arg>  Set a time of day in HH:MM to stop mining (will quit without a start time)
 --sched-stop <arg>  Set a time of day in HH:MM to stop mining (will quit without a start time)
---scrypt            Use the scrypt algorithm for mining (litecoin only)
 --sharelog <arg>    Append share log to file
 --sharelog <arg>    Append share log to file
 --shares <arg>      Quit after mining N shares (default: unlimited)
 --shares <arg>      Quit after mining N shares (default: unlimited)
 --socks-proxy <arg> Set socks4 proxy (host:port) for all pools without a proxy specified
 --socks-proxy <arg> Set socks4 proxy (host:port) for all pools without a proxy specified
@@ -244,43 +228,6 @@ FPGA only options:
 See FGPA-README for more information regarding this.
 See FGPA-README for more information regarding this.
 
 
 
 
-GPU only options:
-
---auto-fan          Automatically adjust all GPU fan speeds to maintain a target temperature
---auto-gpu          Automatically adjust all GPU engine clock speeds to maintain a target temperature
---disable-gpu|-G    Disable GPU mining even if suitable devices exist
---gpu-threads|-g <arg> Number of threads per GPU (1 - 10) (default: 2)
---gpu-dyninterval <arg> Set the refresh interval in ms for GPUs using dynamic intensity (default: 7)
---gpu-engine <arg>  GPU engine (over)clock range in Mhz - one value, range and/or comma separated list (e.g. 850-900,900,750-850)
---gpu-fan <arg>     GPU fan percentage range - one value, range and/or comma separated list (e.g. 25-85,85,65)
---gpu-map <arg>     Map OpenCL to ADL device order manually, paired CSV (e.g. 1:0,2:1 maps OpenCL 1 to ADL 0, 2 to 1)
---gpu-memclock <arg> Set the GPU memory (over)clock in Mhz - one value for all or separate by commas for per card.
---gpu-memdiff <arg> Set a fixed difference in clock speed between the GPU and memory in auto-gpu mode
---gpu-powertune <arg> Set the GPU powertune percentage - one value for all or separate by commas for per card.
---gpu-reorder       Attempt to reorder GPU devices according to PCI Bus ID
---gpu-vddc <arg>    Set the GPU voltage in Volts - one value for all or separate by commas for per card.
---intensity|-I <arg> Intensity of GPU scanning (d or -10 -> 10, default: d to maintain desktop interactivity)
---kernel|-k <arg>   Override kernel to use (diablo, poclbm, phatk or diakgcn) - one value or comma separated
---ndevs|-n          Enumerate number of detected GPUs and exit
---no-restart        Do not attempt to restart GPUs that hang
---temp-hysteresis <arg> Set how much the temperature can fluctuate outside limits when automanaging speeds (default: 3)
---temp-overheat <arg> Overheat temperature when automatically managing fan and GPU speeds (default: 85)
---temp-target <arg> Target temperature when automatically managing fan and GPU speeds (default: 75)
---vectors|-v <arg>  Override detected optimal vector (1, 2 or 4) - one value or comma separated list
---worksize|-w <arg> Override detected optimal worksize - one value or comma separated list
-
-See GPU-README for more information regarding GPU mining.
-
-
-SCRYPT only options:
-
---lookup-gap <arg>  Set GPU lookup gap for scrypt mining, comma separated
---shaders <arg>     GPU shaders per card for tuning scrypt, comma separated
---thread-concurrency <arg> Set GPU thread concurrency for scrypt mining, comma separated
-
-See SCRYPT-README for more information regarding litecoin mining.
-
-
 Cgminer should automatically find all of your Avalon ASIC, BFL ASIC, BitForce
 Cgminer should automatically find all of your Avalon ASIC, BFL ASIC, BitForce
 FPGAs, Icarus bitstream FPGAs, Klondike ASIC, ASICMINER usb block erupters,
 FPGAs, Icarus bitstream FPGAs, Klondike ASIC, ASICMINER usb block erupters,
 KnC ASICs, Hashfast ASICs and ModMiner FPGAs.
 KnC ASICs, Hashfast ASICs and ModMiner FPGAs.
@@ -298,7 +245,7 @@ drivers for you and then once you plug in your device you can choose the
 "list all devices" from the "option" menu and you should be able to see the
 "list all devices" from the "option" menu and you should be able to see the
 device as something like: "BitFORCE SHA256 SC". Choose the install or replace
 device as something like: "BitFORCE SHA256 SC". Choose the install or replace
 driver option and select WinUSB. You can either google for zadig or download
 driver option and select WinUSB. You can either google for zadig or download
-it from the cgminer directoy in the DOWNLOADS link above.
+it from the cgminer directory in the DOWNLOADS link above.
 
 
 LINUX:
 LINUX:
 
 
@@ -387,7 +334,7 @@ WHILE RUNNING:
 
 
 The following options are available while running with a single keypress:
 The following options are available while running with a single keypress:
 
 
-[P]ool management [G]PU management [S]ettings [D]isplay options [Q]uit
+[P]ool management [S]ettings [D]isplay options [Q]uit
 
 
 P gives you:
 P gives you:
 
 
@@ -422,33 +369,15 @@ co[M]pact: off
 Q quits the application.
 Q quits the application.
 
 
 
 
-G gives you something like:
-
-GPU 0: [124.2 / 191.3 Mh/s] [A:77  R:33  HW:0  U:1.73/m  WU 1.73/m]
-Temp: 67.0 C
-Fan Speed: 35% (2500 RPM)
-Engine Clock: 960 MHz
-Memory Clock: 480 Mhz
-Vddc: 1.200 V
-Activity: 93%
-Powertune: 0%
-Last initialised: [2011-09-06 12:03:56]
-Thread 0: 62.4 Mh/s Enabled ALIVE
-Thread 1: 60.2 Mh/s Enabled ALIVE
-
-[E]nable [D]isable [R]estart GPU [C]hange settings
-Or press any other key to continue
-
-
 The running log shows output like this:
 The running log shows output like this:
 
 
- [2012-10-12 18:02:20] Accepted f0c05469 Diff 1/1 GPU 0 pool 1
- [2012-10-12 18:02:22] Accepted 218ac982 Diff 7/1 GPU 1 pool 1
- [2012-10-12 18:02:23] Accepted d8300795 Diff 1/1 GPU 3 pool 1
- [2012-10-12 18:02:24] Accepted 122c1ff1 Diff 14/1 GPU 1 pool 1
+ [2013-11-09 11:04:41] Accepted 01b3bde7 Diff 150/128 AVA 1 pool 0
+ [2013-11-09 11:04:49] Accepted 015df995 Diff 187/128 AVA 1 pool 0
+ [2013-11-09 11:04:50] Accepted 01163b68 Diff 236/128 AVA 1 pool 0
+ [2013-11-09 11:04:53] Accepted 9f745840 Diff 411/128 BAS 1 pool 0
 
 
-The 8 byte hex value are the 2nd 8 bytes of the share being submitted to the
-pool. The 2 diff values are the actual difficulty target that share reached
+The 8 byte hex value are the 1st nonzero bytes of the share being submitted to
+the pool. The 2 diff values are the actual difficulty target that share reached
 followed by the difficulty target the pool is currently asking for.
 followed by the difficulty target the pool is currently asking for.
 
 
 ---
 ---
@@ -468,7 +397,7 @@ HW:  The number of HardWare errors
 WU:  The Work Utility defined as the number of diff1 shares work / minute
 WU:  The Work Utility defined as the number of diff1 shares work / minute
      (accepted or rejected).
      (accepted or rejected).
 
 
- GPU 1: 73.5C 2551RPM | 427.3/443.0Mh/s | A:8 R:0 HW:0 WU:4.39/m
+ BAS 1:  max 67C 3.27V | 62.29G/62.19Gh/s | A:140813 R:256 HW:2860 WU: 852.0/m
 
 
 Each column is as follows:
 Each column is as follows:
 Temperature (if supported)
 Temperature (if supported)
@@ -650,7 +579,7 @@ For RPC API details see the API-README file
 
 
 FAQ
 FAQ
 
 
-Q: Can I mine on servers from different networks (eg smartcoin and bitcoin) at
+Q: Can I mine on servers from different networks (eg xxxcoin and bitcoin) at
 the same time?
 the same time?
 A: No, cgminer keeps a database of the block it's working on to ensure it does
 A: No, cgminer keeps a database of the block it's working on to ensure it does
 not work on stale blocks, and having different blocks from two networks would
 not work on stale blocks, and having different blocks from two networks would
@@ -701,15 +630,10 @@ A: Virtually always, the DEFAULT parameters give the best results. Most user
 defined settings lead to worse performance. The ONLY thing most users should
 defined settings lead to worse performance. The ONLY thing most users should
 need to set is the Intensity for GPUs.
 need to set is the Intensity for GPUs.
 
 
-Q: What happened to CPU mining?
-A: Being increasingly irrelevant for most users, and a maintenance issue, it is
-no longer under active development and will not be supported. No binary builds
-supporting CPU mining will be released. Virtually all remaining users of CPU
-mining are as back ends for illegal botnets. The main reason cgminer is being
-inappopriately tagged as a virus by antivirus software is due to the trojans
-packaging a CPU mining capable version of it. There is no longer ANY CPU mining
-code in cgminer. If you are mining bitcoin with CPU today, you are spending
-1000x more in electricity costs than you are earning in bitcoin.
+Q: What happened to CPU and GPU mining?
+A: Their efficiency makes them irrelevant in the bitcoin mining world today
+and the author has no interest in supporting alternative coins that are better
+mined by these devices.
 
 
 Q: GUI version?
 Q: GUI version?
 A: No. The RPC interface makes it possible for someone else to write one
 A: No. The RPC interface makes it possible for someone else to write one
@@ -717,8 +641,7 @@ though.
 
 
 Q: I'm having an issue. What debugging information should I provide?
 Q: I'm having an issue. What debugging information should I provide?
 A: Start cgminer with your regular commands and add -D -T --verbose and provide
 A: Start cgminer with your regular commands and add -D -T --verbose and provide
-the full startup output and a summary of your hardware, operating system, ATI
-driver version and ATI stream version.
+the full startup output and a summary of your hardware and operating system.
 
 
 Q: Why don't you provide win64 builds?
 Q: Why don't you provide win64 builds?
 A: Win32 builds work everywhere and there is precisely zero advantage to a
 A: Win32 builds work everywhere and there is precisely zero advantage to a
@@ -730,36 +653,13 @@ their various features. Linux offers much better long term stability and
 remote monitoring and security, while windows offers you overclocking tools
 remote monitoring and security, while windows offers you overclocking tools
 that can achieve much more than cgminer can do on linux.
 that can achieve much more than cgminer can do on linux.
 
 
-Q: Can I mine with cgminer on a MAC?
-A: cgminer will compile on OSX, but the performance of GPU mining is
-compromised due to the opencl implementation on OSX, there is no temperature
-or fanspeed monitoring, and the cooling design of most MACs, despite having
-powerful GPUs, will usually not cope with constant usage leading to a high
-risk of thermal damage. It is highly recommended not to mine on a MAC unless
-it is to a USB device.
-
-Q: I'm trying to mine litecoin but cgminer shows MH values instead of kH and
-submits no shares?
-A: Add the --scrypt parameter.
-
-Q: I switch users on windows and my mining stops working?
-A: That's correct, it does. It's a permissions issue that there is no known
-fix for due to monitoring of GPU fanspeeds and temperatures. If you disable
-the monitoring with --no-adl it should switch okay.
-
 Q: My network gets slower and slower and then dies for a minute?
 Q: My network gets slower and slower and then dies for a minute?
 A; Try the --net-delay option.
 A; Try the --net-delay option.
 
 
 Q: How do I tune for p2pool?
 Q: How do I tune for p2pool?
-A: p2pool has very rapid expiration of work and new blocks, it is suggested you
-decrease intensity by 1 from your optimal value, and decrease GPU threads to 1
-with -g 1. It is also recommended to use --failover-only since the work is
-effectively like a different block chain. If mining with a minirig, it is worth
-adding the --bfl-range option.
-
-Q: Are OpenCL kernels from other mining software useable in cgminer?
-A: No, the APIs are slightly different between the different software and they
-will not work.
+A: It is also recommended to use --failover-only since the work is effectively
+like a different block chain, and not enabling --no-submit-stale. If mining with
+a BFL (fpga) minirig, it is worth adding the --bfl-range option.
 
 
 Q: I run PHP on windows to access the API with the example miner.php. Why does
 Q: I run PHP on windows to access the API with the example miner.php. Why does
 it fail when php is installed properly but I only get errors about Sockets not
 it fail when php is installed properly but I only get errors about Sockets not
@@ -802,10 +702,6 @@ average to find one 8 difficulty share, per 8 single difficulty shares found.
 However, the number is actually random and converges over time, it is an average,
 However, the number is actually random and converges over time, it is an average,
 not an exact value, thus you may find more or less than the expected average.
 not an exact value, thus you may find more or less than the expected average.
 
 
-Q: Can I make a donation in litecoin?
-A: Yes, see SCRYPT-README for the address, but the author prefers bitcoin if
-possible.
-
 Q: My keyboard input momentarily pauses or repeats keys every so often on
 Q: My keyboard input momentarily pauses or repeats keys every so often on
 windows while mining?
 windows while mining?
 A: The USB implementation on windows can be very flaky on some hardware and
 A: The USB implementation on windows can be very flaky on some hardware and

+ 0 - 1442
adl.c

@@ -1,1442 +0,0 @@
-/*
- * Copyright 2011-2012 Con Kolivas
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.  See COPYING for more details.
- */
-
-#include "config.h"
-
-#if defined(HAVE_ADL) && (defined(__linux) || defined (WIN32))
-
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-
-#ifdef HAVE_CURSES
-#include <curses.h>
-#endif
-
-#include "miner.h"
-#include "ADL_SDK/adl_sdk.h"
-#include "compat.h"
-
-#if defined (__linux)
-#include <dlfcn.h>
-#include <stdlib.h>
-#include <unistd.h>
-#else /* WIN32 */
-#include <windows.h>
-#include <tchar.h>
-#endif
-#include "adl_functions.h"
-
-#ifndef HAVE_CURSES
-#define wlogprint(...)  applog(LOG_WARNING, __VA_ARGS__)
-#endif
-
-bool adl_active;
-bool opt_reorder = false;
-
-int opt_hysteresis = 3;
-const int opt_targettemp = 75;
-const int opt_overheattemp = 85;
-static pthread_mutex_t adl_lock;
-
-struct gpu_adapters {
-	int iAdapterIndex;
-	int iBusNumber;
-	int virtual_gpu;
-	int id;
-};
-
-// Memory allocation function
-static void * __stdcall ADL_Main_Memory_Alloc(int iSize)
-{
-	void *lpBuffer = malloc(iSize);
-
-	return lpBuffer;
-}
-
-// Optional Memory de-allocation function
-static void __stdcall ADL_Main_Memory_Free (void **lpBuffer)
-{
-	if (*lpBuffer) {
-		free (*lpBuffer);
-		*lpBuffer = NULL;
-	}
-}
-
-#if defined (LINUX)
-// equivalent functions in linux
-static void *GetProcAddress(void *pLibrary, const char *name)
-{
-	return dlsym( pLibrary, name);
-}
-#endif
-
-static	ADL_MAIN_CONTROL_CREATE		ADL_Main_Control_Create;
-static	ADL_MAIN_CONTROL_DESTROY	ADL_Main_Control_Destroy;
-static	ADL_ADAPTER_NUMBEROFADAPTERS_GET	ADL_Adapter_NumberOfAdapters_Get;
-static	ADL_ADAPTER_ADAPTERINFO_GET	ADL_Adapter_AdapterInfo_Get;
-static	ADL_ADAPTER_ID_GET		ADL_Adapter_ID_Get;
-static	ADL_OVERDRIVE5_TEMPERATURE_GET	ADL_Overdrive5_Temperature_Get;
-static	ADL_OVERDRIVE5_CURRENTACTIVITY_GET	ADL_Overdrive5_CurrentActivity_Get;
-static	ADL_OVERDRIVE5_ODPARAMETERS_GET	ADL_Overdrive5_ODParameters_Get;
-static	ADL_OVERDRIVE5_FANSPEEDINFO_GET	ADL_Overdrive5_FanSpeedInfo_Get;
-static	ADL_OVERDRIVE5_FANSPEED_GET	ADL_Overdrive5_FanSpeed_Get;
-static	ADL_OVERDRIVE5_FANSPEED_SET	ADL_Overdrive5_FanSpeed_Set;
-static	ADL_OVERDRIVE5_ODPERFORMANCELEVELS_GET	ADL_Overdrive5_ODPerformanceLevels_Get;
-static	ADL_OVERDRIVE5_ODPERFORMANCELEVELS_SET	ADL_Overdrive5_ODPerformanceLevels_Set;
-static	ADL_MAIN_CONTROL_REFRESH	ADL_Main_Control_Refresh;
-static	ADL_OVERDRIVE5_POWERCONTROL_GET	ADL_Overdrive5_PowerControl_Get;
-static	ADL_OVERDRIVE5_POWERCONTROL_SET	ADL_Overdrive5_PowerControl_Set;
-static	ADL_OVERDRIVE5_FANSPEEDTODEFAULT_SET	ADL_Overdrive5_FanSpeedToDefault_Set;
-
-#if defined (LINUX)
-	static void *hDLL;	// Handle to .so library
-#else
-	HINSTANCE hDLL;		// Handle to DLL
-#endif
-static int iNumberAdapters;
-static LPAdapterInfo lpInfo = NULL;
-
-int set_fanspeed(int gpu, int iFanSpeed);
-static float __gpu_temp(struct gpu_adl *ga);
-
-static inline void lock_adl(void)
-{
-	mutex_lock(&adl_lock);
-}
-
-static inline void unlock_adl(void)
-{
-	mutex_unlock(&adl_lock);
-}
-
-/* This looks for the twin GPU that has the fanspeed control of a non fanspeed
- * control GPU on dual GPU cards */
-static bool fanspeed_twin(struct gpu_adl *ga, struct gpu_adl *other_ga)
-{
-	if (!other_ga->has_fanspeed)
-		return false;
-	if (abs(ga->iBusNumber - other_ga->iBusNumber) != 1)
-		return false;
-	if (strcmp(ga->strAdapterName, other_ga->strAdapterName))
-		return false;
-	return true;
-}
-
-static bool prepare_adl(void)
-{
-	int result;
-
-#if defined (LINUX)
-	hDLL = dlopen( "libatiadlxx.so", RTLD_LAZY|RTLD_GLOBAL);
-#else
-	hDLL = LoadLibrary("atiadlxx.dll");
-	if (hDLL == NULL)
-		// A 32 bit calling application on 64 bit OS will fail to LoadLIbrary.
-		// Try to load the 32 bit library (atiadlxy.dll) instead
-		hDLL = LoadLibrary("atiadlxy.dll");
-#endif
-	if (hDLL == NULL) {
-		applog(LOG_INFO, "Unable to load ati adl library");
-		return false;
-	}
-	ADL_Main_Control_Create = (ADL_MAIN_CONTROL_CREATE) GetProcAddress(hDLL,"ADL_Main_Control_Create");
-	ADL_Main_Control_Destroy = (ADL_MAIN_CONTROL_DESTROY) GetProcAddress(hDLL,"ADL_Main_Control_Destroy");
-	ADL_Adapter_NumberOfAdapters_Get = (ADL_ADAPTER_NUMBEROFADAPTERS_GET) GetProcAddress(hDLL,"ADL_Adapter_NumberOfAdapters_Get");
-	ADL_Adapter_AdapterInfo_Get = (ADL_ADAPTER_ADAPTERINFO_GET) GetProcAddress(hDLL,"ADL_Adapter_AdapterInfo_Get");
-	ADL_Adapter_ID_Get = (ADL_ADAPTER_ID_GET) GetProcAddress(hDLL,"ADL_Adapter_ID_Get");
-	ADL_Overdrive5_Temperature_Get = (ADL_OVERDRIVE5_TEMPERATURE_GET) GetProcAddress(hDLL,"ADL_Overdrive5_Temperature_Get");
-	ADL_Overdrive5_CurrentActivity_Get = (ADL_OVERDRIVE5_CURRENTACTIVITY_GET) GetProcAddress(hDLL, "ADL_Overdrive5_CurrentActivity_Get");
-	ADL_Overdrive5_ODParameters_Get = (ADL_OVERDRIVE5_ODPARAMETERS_GET) GetProcAddress(hDLL, "ADL_Overdrive5_ODParameters_Get");
-	ADL_Overdrive5_FanSpeedInfo_Get = (ADL_OVERDRIVE5_FANSPEEDINFO_GET) GetProcAddress(hDLL, "ADL_Overdrive5_FanSpeedInfo_Get");
-	ADL_Overdrive5_FanSpeed_Get = (ADL_OVERDRIVE5_FANSPEED_GET) GetProcAddress(hDLL, "ADL_Overdrive5_FanSpeed_Get");
-	ADL_Overdrive5_FanSpeed_Set = (ADL_OVERDRIVE5_FANSPEED_SET) GetProcAddress(hDLL, "ADL_Overdrive5_FanSpeed_Set");
-	ADL_Overdrive5_ODPerformanceLevels_Get = (ADL_OVERDRIVE5_ODPERFORMANCELEVELS_GET) GetProcAddress(hDLL, "ADL_Overdrive5_ODPerformanceLevels_Get");
-	ADL_Overdrive5_ODPerformanceLevels_Set = (ADL_OVERDRIVE5_ODPERFORMANCELEVELS_SET) GetProcAddress(hDLL, "ADL_Overdrive5_ODPerformanceLevels_Set");
-	ADL_Main_Control_Refresh = (ADL_MAIN_CONTROL_REFRESH) GetProcAddress(hDLL, "ADL_Main_Control_Refresh");
-	ADL_Overdrive5_PowerControl_Get = (ADL_OVERDRIVE5_POWERCONTROL_GET) GetProcAddress(hDLL, "ADL_Overdrive5_PowerControl_Get");
-	ADL_Overdrive5_PowerControl_Set = (ADL_OVERDRIVE5_POWERCONTROL_SET) GetProcAddress(hDLL, "ADL_Overdrive5_PowerControl_Set");
-	ADL_Overdrive5_FanSpeedToDefault_Set = (ADL_OVERDRIVE5_FANSPEEDTODEFAULT_SET) GetProcAddress(hDLL, "ADL_Overdrive5_FanSpeedToDefault_Set");
-
-	if (!ADL_Main_Control_Create || !ADL_Main_Control_Destroy ||
-		!ADL_Adapter_NumberOfAdapters_Get || !ADL_Adapter_AdapterInfo_Get ||
-		!ADL_Adapter_ID_Get || !ADL_Overdrive5_Temperature_Get ||
-		!ADL_Overdrive5_CurrentActivity_Get ||
-		!ADL_Overdrive5_ODParameters_Get || !ADL_Overdrive5_FanSpeedInfo_Get ||
-		!ADL_Overdrive5_FanSpeed_Get || !ADL_Overdrive5_FanSpeed_Set ||
-		!ADL_Overdrive5_ODPerformanceLevels_Get || !ADL_Overdrive5_ODPerformanceLevels_Set ||
-		!ADL_Main_Control_Refresh || !ADL_Overdrive5_PowerControl_Get ||
-		!ADL_Overdrive5_PowerControl_Set || !ADL_Overdrive5_FanSpeedToDefault_Set) {
-			applog(LOG_WARNING, "ATI ADL's API is missing");
-		return false;
-	}
-
-	// Initialise ADL. The second parameter is 1, which means:
-	// retrieve adapter information only for adapters that are physically present and enabled in the system
-	result = ADL_Main_Control_Create (ADL_Main_Memory_Alloc, 1);
-	if (result != ADL_OK) {
-		applog(LOG_INFO, "ADL Initialisation Error! Error %d!", result);
-		return false;
-	}
-
-	result = ADL_Main_Control_Refresh();
-	if (result != ADL_OK) {
-		applog(LOG_INFO, "ADL Refresh Error! Error %d!", result);
-		return false;
-	}
-
-	return true;
-}
-
-void init_adl(int nDevs)
-{
-	int result, i, j, devices = 0, last_adapter = -1, gpu = 0, dummy = 0;
-	struct gpu_adapters adapters[MAX_GPUDEVICES], vadapters[MAX_GPUDEVICES];
-	bool devs_match = true;
-
-	if (unlikely(pthread_mutex_init(&adl_lock, NULL))) {
-		applog(LOG_ERR, "Failed to init adl_lock in init_adl");
-		return;
-	}
-
-	if (!prepare_adl())
-		return;
-
-	// Obtain the number of adapters for the system
-	result = ADL_Adapter_NumberOfAdapters_Get (&iNumberAdapters);
-	if (result != ADL_OK) {
-		applog(LOG_INFO, "Cannot get the number of adapters! Error %d!", result);
-		return ;
-	}
-
-	if (iNumberAdapters > 0) {
-		lpInfo = malloc ( sizeof (AdapterInfo) * iNumberAdapters );
-		memset ( lpInfo,'\0', sizeof (AdapterInfo) * iNumberAdapters );
-
-		lpInfo->iSize = sizeof(lpInfo);
-		// Get the AdapterInfo structure for all adapters in the system
-		result = ADL_Adapter_AdapterInfo_Get (lpInfo, sizeof (AdapterInfo) * iNumberAdapters);
-		if (result != ADL_OK) {
-			applog(LOG_INFO, "ADL_Adapter_AdapterInfo_Get Error! Error %d", result);
-			return ;
-		}
-	} else {
-		applog(LOG_INFO, "No adapters found");
-		return;
-	}
-
-	/* Iterate over iNumberAdapters and find the lpAdapterID of real devices */
-	for (i = 0; i < iNumberAdapters; i++) {
-		int iAdapterIndex;
-		int lpAdapterID;
-
-		iAdapterIndex = lpInfo[i].iAdapterIndex;
-		/* Get unique identifier of the adapter, 0 means not AMD */
-		result = ADL_Adapter_ID_Get(iAdapterIndex, &lpAdapterID);
-		if (result != ADL_OK) {
-			applog(LOG_INFO, "Failed to ADL_Adapter_ID_Get. Error %d", result);
-			if (result == -10)
-				applog(LOG_INFO, "This error says the device is not enabled");
-			continue;
-		}
-
-		/* Each adapter may have multiple entries */
-		if (lpAdapterID == last_adapter)
-			continue;
-
-		applog(LOG_DEBUG, "GPU %d "
-		       "iAdapterIndex %d "
-		       "strUDID %s "
-		       "iBusNumber %d "
-		       "iDeviceNumber %d "
-		       "iFunctionNumber %d "
-		       "iVendorID %d "
-		       "strAdapterName  %s ",
-		       devices,
-		       iAdapterIndex,
-		       lpInfo[i].strUDID,
-		       lpInfo[i].iBusNumber,
-		       lpInfo[i].iDeviceNumber,
-		       lpInfo[i].iFunctionNumber,
-		       lpInfo[i].iVendorID,
-		       lpInfo[i].strAdapterName);
-
-		adapters[devices].iAdapterIndex = iAdapterIndex;
-		adapters[devices].iBusNumber = lpInfo[i].iBusNumber;
-		adapters[devices].id = i;
-
-		/* We found a truly new adapter instead of a logical
-		 * one. Now since there's no way of correlating the
-		 * opencl enumerated devices and the ADL enumerated
-		 * ones, we have to assume they're in the same order.*/
-		if (++devices > nDevs && devs_match) {
-			applog(LOG_ERR, "ADL found more devices than opencl!");
-			applog(LOG_ERR, "There is possibly at least one GPU that doesn't support OpenCL");
-			applog(LOG_ERR, "Use the gpu map feature to reliably map OpenCL to ADL");
-			devs_match = false;
-		}
-		last_adapter = lpAdapterID;
-
-		if (!lpAdapterID) {
-			applog(LOG_INFO, "Adapter returns ID 0 meaning not AMD. Card order might be confused");
-			continue;
-		}
-	}
-
-	if (devices < nDevs) {
-		applog(LOG_ERR, "ADL found less devices than opencl!");
-		applog(LOG_ERR, "There is possibly more than one display attached to a GPU");
-		applog(LOG_ERR, "Use the gpu map feature to reliably map OpenCL to ADL");
-		devs_match = false;
-	}
-
-	for (i = 0; i < devices; i++) {
-		vadapters[i].virtual_gpu = i;
-		vadapters[i].id = adapters[i].id;
-	}
-
-	/* Apply manually provided OpenCL to ADL mapping, if any */
-	for (i = 0; i < nDevs; i++) {
-		if (gpus[i].mapped) {
-			vadapters[gpus[i].virtual_adl].virtual_gpu = i;
-			applog(LOG_INFO, "Mapping OpenCL device %d to ADL device %d", i, gpus[i].virtual_adl);
-		} else
-			gpus[i].virtual_adl = i;
-	}
-
-	if (!devs_match) {
-		applog(LOG_ERR, "WARNING: Number of OpenCL and ADL devices did not match!");
-		applog(LOG_ERR, "Hardware monitoring may NOT match up with devices!");
-	} else if (opt_reorder) {
-		/* Windows has some kind of random ordering for bus number IDs and
-		 * ordering the GPUs according to ascending order fixes it. Linux
-		 * has usually sequential but decreasing order instead! */
-		for (i = 0; i < devices; i++) {
-			int j, virtual_gpu;
-
-			virtual_gpu = 0;
-			for (j = 0; j < devices; j++) {
-				if (i == j)
-					continue;
-#ifdef WIN32
-				if (adapters[j].iBusNumber < adapters[i].iBusNumber)
-#else
-				if (adapters[j].iBusNumber > adapters[i].iBusNumber)
-#endif
-					virtual_gpu++;
-			}
-			if (virtual_gpu != i) {
-				applog(LOG_INFO, "Mapping device %d to GPU %d according to Bus Number order",
-				       i, virtual_gpu);
-				vadapters[virtual_gpu].virtual_gpu = i;
-				vadapters[virtual_gpu].id = adapters[i].id;
-			}
-		}
-	}
-
-	if (devices > nDevs)
-		devices = nDevs;
-
-	for (gpu = 0; gpu < devices; gpu++) {
-		struct gpu_adl *ga;
-		int iAdapterIndex;
-		int lpAdapterID;
-		ADLODPerformanceLevels *lpOdPerformanceLevels;
-		int lev, adlGpu;
-
-		adlGpu = gpus[gpu].virtual_adl;
-		i = vadapters[adlGpu].id;
-		iAdapterIndex = lpInfo[i].iAdapterIndex;
-		gpus[gpu].virtual_gpu = vadapters[adlGpu].virtual_gpu;
-
-		/* Get unique identifier of the adapter, 0 means not AMD */
-		result = ADL_Adapter_ID_Get(iAdapterIndex, &lpAdapterID);
-		if (result != ADL_OK) {
-			applog(LOG_INFO, "Failed to ADL_Adapter_ID_Get. Error %d", result);
-			continue;
-		}
-
-		if (gpus[gpu].deven == DEV_DISABLED) {
-			gpus[gpu].gpu_engine =
-			gpus[gpu].gpu_memclock =
-			gpus[gpu].gpu_vddc =
-			gpus[gpu].gpu_fan =
-			gpus[gpu].gpu_powertune = 0;
-			continue;
-		}
-
-		applog(LOG_INFO, "GPU %d %s hardware monitoring enabled", gpu, lpInfo[i].strAdapterName);
-		if (gpus[gpu].name)
-			free(gpus[gpu].name);
-		gpus[gpu].name = lpInfo[i].strAdapterName;
-		gpus[gpu].has_adl = true;
-		/* Flag adl as active if any card is successfully activated */
-		adl_active = true;
-
-		/* From here on we know this device is a discrete device and
-		 * should support ADL */
-		ga = &gpus[gpu].adl;
-		ga->gpu = gpu;
-		ga->iAdapterIndex = iAdapterIndex;
-		ga->lpAdapterID = lpAdapterID;
-		strcpy(ga->strAdapterName, lpInfo[i].strAdapterName);
-		ga->DefPerfLev = NULL;
-		ga->twin = NULL;
-
-		ga->lpOdParameters.iSize = sizeof(ADLODParameters);
-		if (ADL_Overdrive5_ODParameters_Get(iAdapterIndex, &ga->lpOdParameters) != ADL_OK)
-			applog(LOG_INFO, "Failed to ADL_Overdrive5_ODParameters_Get");
-
-		lev = ga->lpOdParameters.iNumberOfPerformanceLevels - 1;
-		/* We're only interested in the top performance level */
-		lpOdPerformanceLevels = malloc(sizeof(ADLODPerformanceLevels) + (lev * sizeof(ADLODPerformanceLevel)));
-		lpOdPerformanceLevels->iSize = sizeof(ADLODPerformanceLevels) + sizeof(ADLODPerformanceLevel) * lev;
-
-		/* Get default performance levels first */
-		if (ADL_Overdrive5_ODPerformanceLevels_Get(iAdapterIndex, 1, lpOdPerformanceLevels) != ADL_OK)
-			applog(LOG_INFO, "Failed to ADL_Overdrive5_ODPerformanceLevels_Get");
-		/* Set the limits we'd use based on default gpu speeds */
-		ga->maxspeed = ga->minspeed = lpOdPerformanceLevels->aLevels[lev].iEngineClock;
-
-		ga->lpTemperature.iSize = sizeof(ADLTemperature);
-		ga->lpFanSpeedInfo.iSize = sizeof(ADLFanSpeedInfo);
-		ga->lpFanSpeedValue.iSize = ga->DefFanSpeedValue.iSize = sizeof(ADLFanSpeedValue);
-		/* Now get the current performance levels for any existing overclock */
-		ADL_Overdrive5_ODPerformanceLevels_Get(iAdapterIndex, 0, lpOdPerformanceLevels);
-		/* Save these values as the defaults in case we wish to reset to defaults */
-		ga->DefPerfLev = lpOdPerformanceLevels;
-
-		if (gpus[gpu].gpu_engine) {
-			int setengine = gpus[gpu].gpu_engine * 100;
-
-			/* Lower profiles can't have a higher setting */
-			for (j = 0; j < lev; j++) {
-				if (lpOdPerformanceLevels->aLevels[j].iEngineClock > setengine)
-					lpOdPerformanceLevels->aLevels[j].iEngineClock = setengine;
-			}
-			lpOdPerformanceLevels->aLevels[lev].iEngineClock = setengine;
-			applog(LOG_INFO, "Setting GPU %d engine clock to %d", gpu, gpus[gpu].gpu_engine);
-			ADL_Overdrive5_ODPerformanceLevels_Set(iAdapterIndex, lpOdPerformanceLevels);
-			ga->maxspeed = setengine;
-			if (gpus[gpu].min_engine)
-				ga->minspeed = gpus[gpu].min_engine * 100;
-			ga->managed = true;
-			if (gpus[gpu].gpu_memdiff)
-				set_memoryclock(gpu, gpus[gpu].gpu_engine + gpus[gpu].gpu_memdiff);
-		}
-
-		if (gpus[gpu].gpu_memclock) {
-			int setmem = gpus[gpu].gpu_memclock * 100;
-
-			for (j = 0; j < lev; j++) {
-				if (lpOdPerformanceLevels->aLevels[j].iMemoryClock > setmem)
-					lpOdPerformanceLevels->aLevels[j].iMemoryClock = setmem;
-			}
-			lpOdPerformanceLevels->aLevels[lev].iMemoryClock = setmem;
-			applog(LOG_INFO, "Setting GPU %d memory clock to %d", gpu, gpus[gpu].gpu_memclock);
-			ADL_Overdrive5_ODPerformanceLevels_Set(iAdapterIndex, lpOdPerformanceLevels);
-			ga->managed = true;
-		}
-
-		if (gpus[gpu].gpu_vddc) {
-			int setv = gpus[gpu].gpu_vddc * 1000;
-
-			for (j = 0; j < lev; j++) {
-				if (lpOdPerformanceLevels->aLevels[j].iVddc > setv)
-					lpOdPerformanceLevels->aLevels[j].iVddc = setv;
-			}
-			lpOdPerformanceLevels->aLevels[lev].iVddc = setv;
-			applog(LOG_INFO, "Setting GPU %d voltage to %.3f", gpu, gpus[gpu].gpu_vddc);
-			ADL_Overdrive5_ODPerformanceLevels_Set(iAdapterIndex, lpOdPerformanceLevels);
-			ga->managed = true;
-		}
-
-		ADL_Overdrive5_ODPerformanceLevels_Get(iAdapterIndex, 0, lpOdPerformanceLevels);
-		ga->iEngineClock = lpOdPerformanceLevels->aLevels[lev].iEngineClock;
-		ga->iMemoryClock = lpOdPerformanceLevels->aLevels[lev].iMemoryClock;
-		ga->iVddc = lpOdPerformanceLevels->aLevels[lev].iVddc;
-		ga->iBusNumber = lpInfo[i].iBusNumber;
-
-		if (ADL_Overdrive5_FanSpeedInfo_Get(iAdapterIndex, 0, &ga->lpFanSpeedInfo) != ADL_OK)
-			applog(LOG_INFO, "Failed to ADL_Overdrive5_FanSpeedInfo_Get");
-		else
-			ga->has_fanspeed = true;
-
-		/* Save the fanspeed values as defaults in case we reset later */
-		ADL_Overdrive5_FanSpeed_Get(ga->iAdapterIndex, 0, &ga->DefFanSpeedValue);
-		if (gpus[gpu].gpu_fan)
-			set_fanspeed(gpu, gpus[gpu].gpu_fan);
-		else
-			gpus[gpu].gpu_fan = 85; /* Set a nominal upper limit of 85% */
-
-		/* Not fatal if powercontrol get fails */
-		if (ADL_Overdrive5_PowerControl_Get(ga->iAdapterIndex, &ga->iPercentage, &dummy) != ADL_OK)
-			applog(LOG_INFO, "Failed to ADL_Overdrive5_PowerControl_get");
-
-		if (gpus[gpu].gpu_powertune) {
-			ADL_Overdrive5_PowerControl_Set(ga->iAdapterIndex, gpus[gpu].gpu_powertune);
-			ADL_Overdrive5_PowerControl_Get(ga->iAdapterIndex, &ga->iPercentage, &dummy);
-			ga->managed = true;
-		}
-
-		/* Set some default temperatures for autotune when enabled */
-		if (!ga->targettemp)
-			ga->targettemp = opt_targettemp;
-		if (!ga->overtemp)
-			ga->overtemp = opt_overheattemp;
-		if (!gpus[gpu].cutofftemp)
-			gpus[gpu].cutofftemp = opt_cutofftemp;
-		if (opt_autofan) {
-			/* Set a safe starting default if we're automanaging fan speeds */
-			int nominal = 50;
-
-			ga->autofan = true;
-			/* Clamp fanspeed values to range provided */
-			if (nominal > gpus[gpu].gpu_fan)
-				nominal = gpus[gpu].gpu_fan;
-			if (nominal < gpus[gpu].min_fan)
-				nominal = gpus[gpu].min_fan;
-			set_fanspeed(gpu, nominal);
-		}
-		if (opt_autoengine) {
-			ga->autoengine = true;
-			ga->managed = true;
-		}
-		ga->lasttemp = __gpu_temp(ga);
-	}
-
-	for (gpu = 0; gpu < devices; gpu++) {
-		struct gpu_adl *ga = &gpus[gpu].adl;
-		int j;
-
-		for (j = 0; j < devices; j++) {
-			struct gpu_adl *other_ga;
-
-			if (j == gpu)
-				continue;
-
-			other_ga = &gpus[j].adl;
-
-			/* Search for twin GPUs on a single card. They will be
-			 * separated by one bus id and one will have fanspeed
-			 * while the other won't. */
-			if (!ga->has_fanspeed) {
-				if (fanspeed_twin(ga, other_ga)) {
-					applog(LOG_INFO, "Dual GPUs detected: %d and %d",
-						ga->gpu, other_ga->gpu);
-					ga->twin = other_ga;
-					other_ga->twin = ga;
-				}
-			}
-		}
-	}
-}
-
-static float __gpu_temp(struct gpu_adl *ga)
-{
-	if (ADL_Overdrive5_Temperature_Get(ga->iAdapterIndex, 0, &ga->lpTemperature) != ADL_OK)
-		return -1;
-	return (float)ga->lpTemperature.iTemperature / 1000;
-}
-
-float gpu_temp(int gpu)
-{
-	struct gpu_adl *ga;
-	float ret = -1;
-
-	if (!gpus[gpu].has_adl || !adl_active)
-		return ret;
-
-	ga = &gpus[gpu].adl;
-	lock_adl();
-	ret = __gpu_temp(ga);
-	unlock_adl();
-	gpus[gpu].temp = ret;
-	return ret;
-}
-
-static inline int __gpu_engineclock(struct gpu_adl *ga)
-{
-	return ga->lpActivity.iEngineClock / 100;
-}
-
-int gpu_engineclock(int gpu)
-{
-	struct gpu_adl *ga;
-	int ret = -1;
-
-	if (!gpus[gpu].has_adl || !adl_active)
-		return ret;
-
-	ga = &gpus[gpu].adl;
-	lock_adl();
-	if (ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity) != ADL_OK)
-		goto out;
-	ret = __gpu_engineclock(ga);
-out:
-	unlock_adl();
-	return ret;
-}
-
-static inline int __gpu_memclock(struct gpu_adl *ga)
-{
-	return ga->lpActivity.iMemoryClock / 100;
-}
-
-int gpu_memclock(int gpu)
-{
-	struct gpu_adl *ga;
-	int ret = -1;
-
-	if (!gpus[gpu].has_adl || !adl_active)
-		return ret;
-
-	ga = &gpus[gpu].adl;
-	lock_adl();
-	if (ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity) != ADL_OK)
-		goto out;
-	ret = __gpu_memclock(ga);
-out:
-	unlock_adl();
-	return ret;
-}
-
-static inline float __gpu_vddc(struct gpu_adl *ga)
-{
-	return (float)ga->lpActivity.iVddc / 1000;
-}
-
-float gpu_vddc(int gpu)
-{
-	struct gpu_adl *ga;
-	float ret = -1;
-
-	if (!gpus[gpu].has_adl || !adl_active)
-		return ret;
-
-	ga = &gpus[gpu].adl;
-	lock_adl();
-	if (ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity) != ADL_OK)
-		goto out;
-	ret = __gpu_vddc(ga);
-out:
-	unlock_adl();
-	return ret;
-}
-
-static inline int __gpu_activity(struct gpu_adl *ga)
-{
-	if (!ga->lpOdParameters.iActivityReportingSupported)
-		return -1;
-	return ga->lpActivity.iActivityPercent;
-}
-
-int gpu_activity(int gpu)
-{
-	struct gpu_adl *ga;
-	int ret = -1;
-
-	if (!gpus[gpu].has_adl || !adl_active)
-		return ret;
-
-	ga = &gpus[gpu].adl;
-	lock_adl();
-	ret = ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity);
-	unlock_adl();
-	if (ret != ADL_OK)
-		return ret;
-	if (!ga->lpOdParameters.iActivityReportingSupported)
-		return ret;
-	return ga->lpActivity.iActivityPercent;
-}
-
-static inline int __gpu_fanspeed(struct gpu_adl *ga)
-{
-	if (!ga->has_fanspeed && ga->twin)
-		return __gpu_fanspeed(ga->twin);
-
-	if (!(ga->lpFanSpeedInfo.iFlags & ADL_DL_FANCTRL_SUPPORTS_RPM_READ))
-		return -1;
-	ga->lpFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_RPM;
-	if (ADL_Overdrive5_FanSpeed_Get(ga->iAdapterIndex, 0, &ga->lpFanSpeedValue) != ADL_OK)
-		return -1;
-	return ga->lpFanSpeedValue.iFanSpeed;
-}
-
-int gpu_fanspeed(int gpu)
-{
-	struct gpu_adl *ga;
-	int ret = -1;
-
-	if (!gpus[gpu].has_adl || !adl_active)
-		return ret;
-
-	ga = &gpus[gpu].adl;
-	lock_adl();
-	ret = __gpu_fanspeed(ga);
-	unlock_adl();
-	return ret;
-}
-
-static int __gpu_fanpercent(struct gpu_adl *ga)
-{
-	if (!ga->has_fanspeed && ga->twin)
-		return __gpu_fanpercent(ga->twin);
-
-	if (!(ga->lpFanSpeedInfo.iFlags & ADL_DL_FANCTRL_SUPPORTS_PERCENT_READ ))
-		return -1;
-	ga->lpFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_PERCENT;
-	if (ADL_Overdrive5_FanSpeed_Get(ga->iAdapterIndex, 0, &ga->lpFanSpeedValue) != ADL_OK)
-		return -1;
-	return ga->lpFanSpeedValue.iFanSpeed;
-}
-
-int gpu_fanpercent(int gpu)
-{
-	struct gpu_adl *ga;
-	int ret = -1;
-
-	if (!gpus[gpu].has_adl || !adl_active)
-		return ret;
-
-	ga = &gpus[gpu].adl;
-	lock_adl();
-	ret = __gpu_fanpercent(ga);
-	unlock_adl();
-	if (unlikely(ga->has_fanspeed && ret == -1)) {
-#if 0
-		/* Recursive calling applog causes a hang, so disable messages */
-		applog(LOG_WARNING, "GPU %d stopped reporting fanspeed due to driver corruption", gpu);
-		if (opt_restart) {
-			applog(LOG_WARNING, "Restart enabled, will attempt to restart cgminer");
-			applog(LOG_WARNING, "You can disable this with the --no-restart option");
-			app_restart();
-		}
-		applog(LOG_WARNING, "Disabling fanspeed monitoring on this device");
-		ga->has_fanspeed = false;
-		if (ga->twin) {
-			applog(LOG_WARNING, "Disabling fanspeed linking on GPU twins");
-			ga->twin->twin = NULL;;
-			ga->twin = NULL;
-		}
-#endif
-		if (opt_restart)
-			app_restart();
-		ga->has_fanspeed = false;
-		if (ga->twin) {
-			ga->twin->twin = NULL;;
-			ga->twin = NULL;
-		}
-	}
-	return ret;
-}
-
-static inline int __gpu_powertune(struct gpu_adl *ga)
-{
-	int dummy = 0;
-
-	if (ADL_Overdrive5_PowerControl_Get(ga->iAdapterIndex, &ga->iPercentage, &dummy) != ADL_OK)
-		return -1;
-	return ga->iPercentage;
-}
-
-int gpu_powertune(int gpu)
-{
-	struct gpu_adl *ga;
-	int ret = -1;
-
-	if (!gpus[gpu].has_adl || !adl_active)
-		return ret;
-
-	ga = &gpus[gpu].adl;
-	lock_adl();
-	ret = __gpu_powertune(ga);
-	unlock_adl();
-	return ret;
-}
-
-bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vddc,
-	       int *activity, int *fanspeed, int *fanpercent, int *powertune)
-{
-	struct gpu_adl *ga;
-
-	if (!gpus[gpu].has_adl || !adl_active)
-		return false;
-
-	ga = &gpus[gpu].adl;
-
-	lock_adl();
-	*temp = __gpu_temp(ga);
-	if (ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity) != ADL_OK) {
-		*engineclock = 0;
-		*memclock = 0;
-		*vddc = 0;
-		*activity = 0;
-	} else {
-		*engineclock = __gpu_engineclock(ga);
-		*memclock = __gpu_memclock(ga);
-		*vddc = __gpu_vddc(ga);
-		*activity = __gpu_activity(ga);
-	}
-	*fanspeed = __gpu_fanspeed(ga);
-	*fanpercent = __gpu_fanpercent(ga);
-	*powertune = __gpu_powertune(ga);
-	unlock_adl();
-
-	return true;
-}
-
-#ifdef HAVE_CURSES
-static void get_enginerange(int gpu, int *imin, int *imax)
-{
-	struct gpu_adl *ga;
-
-	if (!gpus[gpu].has_adl || !adl_active) {
-		wlogprint("Get enginerange not supported\n");
-		return;
-	}
-	ga = &gpus[gpu].adl;
-	*imin = ga->lpOdParameters.sEngineClock.iMin / 100;
-	*imax = ga->lpOdParameters.sEngineClock.iMax / 100;
-}
-#endif
-
-int set_engineclock(int gpu, int iEngineClock)
-{
-	ADLODPerformanceLevels *lpOdPerformanceLevels;
-	struct cgpu_info *cgpu;
-	int i, lev, ret = 1;
-	struct gpu_adl *ga;
-
-	if (!gpus[gpu].has_adl || !adl_active) {
-		wlogprint("Set engineclock not supported\n");
-		return ret;
-	}
-
-	iEngineClock *= 100;
-	ga = &gpus[gpu].adl;
-
-	/* Keep track of intended engine clock in case the device changes
-	 * profile and drops while idle, not taking the new engine clock */
-	ga->lastengine = iEngineClock;
-
-	lev = ga->lpOdParameters.iNumberOfPerformanceLevels - 1;
-	lpOdPerformanceLevels = alloca(sizeof(ADLODPerformanceLevels) + (lev * sizeof(ADLODPerformanceLevel)));
-	lpOdPerformanceLevels->iSize = sizeof(ADLODPerformanceLevels) + sizeof(ADLODPerformanceLevel) * lev;
-
-	lock_adl();
-	if (ADL_Overdrive5_ODPerformanceLevels_Get(ga->iAdapterIndex, 0, lpOdPerformanceLevels) != ADL_OK)
-		goto out;
-	for (i = 0; i < lev; i++) {
-		if (lpOdPerformanceLevels->aLevels[i].iEngineClock > iEngineClock)
-			lpOdPerformanceLevels->aLevels[i].iEngineClock = iEngineClock;
-	}
-	lpOdPerformanceLevels->aLevels[lev].iEngineClock = iEngineClock;
-	ADL_Overdrive5_ODPerformanceLevels_Set(ga->iAdapterIndex, lpOdPerformanceLevels);
-	ADL_Overdrive5_ODPerformanceLevels_Get(ga->iAdapterIndex, 0, lpOdPerformanceLevels);
-	if (lpOdPerformanceLevels->aLevels[lev].iEngineClock == iEngineClock)
-		ret = 0;
-	ga->iEngineClock = lpOdPerformanceLevels->aLevels[lev].iEngineClock;
-	if (ga->iEngineClock > ga->maxspeed)
-		ga->maxspeed = ga->iEngineClock;
-	if (ga->iEngineClock < ga->minspeed)
-		ga->minspeed = ga->iEngineClock;
-	ga->iMemoryClock = lpOdPerformanceLevels->aLevels[lev].iMemoryClock;
-	ga->iVddc = lpOdPerformanceLevels->aLevels[lev].iVddc;
-	ga->managed = true;
-out:
-	unlock_adl();
-
-	cgpu = &gpus[gpu];
-	if (cgpu->gpu_memdiff)
-		set_memoryclock(gpu, iEngineClock / 100 + cgpu->gpu_memdiff);
-
-	return ret;
-}
-
-#ifdef HAVE_CURSES
-static void get_memoryrange(int gpu, int *imin, int *imax)
-{
-	struct gpu_adl *ga;
-
-	if (!gpus[gpu].has_adl || !adl_active) {
-		wlogprint("Get memoryrange not supported\n");
-		return;
-	}
-	ga = &gpus[gpu].adl;
-	*imin = ga->lpOdParameters.sMemoryClock.iMin / 100;
-	*imax = ga->lpOdParameters.sMemoryClock.iMax / 100;
-}
-#endif
-
-int set_memoryclock(int gpu, int iMemoryClock)
-{
-	ADLODPerformanceLevels *lpOdPerformanceLevels;
-	int i, lev, ret = 1;
-	struct gpu_adl *ga;
-
-	if (!gpus[gpu].has_adl || !adl_active) {
-		wlogprint("Set memoryclock not supported\n");
-		return ret;
-	}
-
-	iMemoryClock *= 100;
-	ga = &gpus[gpu].adl;
-
-	lev = ga->lpOdParameters.iNumberOfPerformanceLevels - 1;
-	lpOdPerformanceLevels = alloca(sizeof(ADLODPerformanceLevels) + (lev * sizeof(ADLODPerformanceLevel)));
-	lpOdPerformanceLevels->iSize = sizeof(ADLODPerformanceLevels) + sizeof(ADLODPerformanceLevel) * lev;
-
-	lock_adl();
-	if (ADL_Overdrive5_ODPerformanceLevels_Get(ga->iAdapterIndex, 0, lpOdPerformanceLevels) != ADL_OK)
-		goto out;
-	lpOdPerformanceLevels->aLevels[lev].iMemoryClock = iMemoryClock;
-	for (i = 0; i < lev; i++) {
-		if (lpOdPerformanceLevels->aLevels[i].iMemoryClock > iMemoryClock)
-			lpOdPerformanceLevels->aLevels[i].iMemoryClock = iMemoryClock;
-	}
-	ADL_Overdrive5_ODPerformanceLevels_Set(ga->iAdapterIndex, lpOdPerformanceLevels);
-	ADL_Overdrive5_ODPerformanceLevels_Get(ga->iAdapterIndex, 0, lpOdPerformanceLevels);
-	if (lpOdPerformanceLevels->aLevels[lev].iMemoryClock == iMemoryClock)
-		ret = 0;
-	ga->iEngineClock = lpOdPerformanceLevels->aLevels[lev].iEngineClock;
-	ga->iMemoryClock = lpOdPerformanceLevels->aLevels[lev].iMemoryClock;
-	ga->iVddc = lpOdPerformanceLevels->aLevels[lev].iVddc;
-	ga->managed = true;
-out:
-	unlock_adl();
-	return ret;
-}
-
-#ifdef HAVE_CURSES
-static void get_vddcrange(int gpu, float *imin, float *imax)
-{
-	struct gpu_adl *ga;
-
-	if (!gpus[gpu].has_adl || !adl_active) {
-		wlogprint("Get vddcrange not supported\n");
-		return;
-	}
-	ga = &gpus[gpu].adl;
-	*imin = (float)ga->lpOdParameters.sVddc.iMin / 1000;
-	*imax = (float)ga->lpOdParameters.sVddc.iMax / 1000;
-}
-
-static float curses_float(const char *query)
-{
-	float ret;
-	char *cvar;
-
-	cvar = curses_input(query);
-	ret = atof(cvar);
-	free(cvar);
-	return ret;
-}
-#endif
-
-int set_vddc(int gpu, float fVddc)
-{
-	ADLODPerformanceLevels *lpOdPerformanceLevels;
-	int i, iVddc, lev, ret = 1;
-	struct gpu_adl *ga;
-
-	if (!gpus[gpu].has_adl || !adl_active) {
-		wlogprint("Set vddc not supported\n");
-		return ret;
-	}
-
-	iVddc = 1000 * fVddc;
-	ga = &gpus[gpu].adl;
-
-	lev = ga->lpOdParameters.iNumberOfPerformanceLevels - 1;
-	lpOdPerformanceLevels = alloca(sizeof(ADLODPerformanceLevels) + (lev * sizeof(ADLODPerformanceLevel)));
-	lpOdPerformanceLevels->iSize = sizeof(ADLODPerformanceLevels) + sizeof(ADLODPerformanceLevel) * lev;
-
-	lock_adl();
-	if (ADL_Overdrive5_ODPerformanceLevels_Get(ga->iAdapterIndex, 0, lpOdPerformanceLevels) != ADL_OK)
-		goto out;
-	for (i = 0; i < lev; i++) {
-		if (lpOdPerformanceLevels->aLevels[i].iVddc > iVddc)
-			lpOdPerformanceLevels->aLevels[i].iVddc = iVddc;
-	}
-	lpOdPerformanceLevels->aLevels[lev].iVddc = iVddc;
-	ADL_Overdrive5_ODPerformanceLevels_Set(ga->iAdapterIndex, lpOdPerformanceLevels);
-	ADL_Overdrive5_ODPerformanceLevels_Get(ga->iAdapterIndex, 0, lpOdPerformanceLevels);
-	if (lpOdPerformanceLevels->aLevels[lev].iVddc == iVddc)
-		ret = 0;
-	ga->iEngineClock = lpOdPerformanceLevels->aLevels[lev].iEngineClock;
-	ga->iMemoryClock = lpOdPerformanceLevels->aLevels[lev].iMemoryClock;
-	ga->iVddc = lpOdPerformanceLevels->aLevels[lev].iVddc;
-	ga->managed = true;
-out:
-	unlock_adl();
-	return ret;
-}
-
-static void get_fanrange(int gpu, int *imin, int *imax)
-{
-	struct gpu_adl *ga;
-
-	if (!gpus[gpu].has_adl || !adl_active) {
-		wlogprint("Get fanrange not supported\n");
-		return;
-	}
-	ga = &gpus[gpu].adl;
-	*imin = ga->lpFanSpeedInfo.iMinPercent;
-	*imax = ga->lpFanSpeedInfo.iMaxPercent;
-}
-
-int set_fanspeed(int gpu, int iFanSpeed)
-{
-	struct gpu_adl *ga;
-	int ret = 1;
-
-	if (!gpus[gpu].has_adl || !adl_active) {
-		wlogprint("Set fanspeed not supported\n");
-		return ret;
-	}
-
-	ga = &gpus[gpu].adl;
-	if (!(ga->lpFanSpeedInfo.iFlags & (ADL_DL_FANCTRL_SUPPORTS_RPM_WRITE | ADL_DL_FANCTRL_SUPPORTS_PERCENT_WRITE ))) {
-		applog(LOG_DEBUG, "GPU %d doesn't support rpm or percent write", gpu);
-		return ret;
-	}
-
-	/* Store what fanspeed we're actually aiming for for re-entrant changes
-	 * in case this device does not support fine setting changes */
-	ga->targetfan = iFanSpeed;
-
-	lock_adl();
-	if (ADL_Overdrive5_FanSpeed_Get(ga->iAdapterIndex, 0, &ga->lpFanSpeedValue) != ADL_OK) {
-		applog(LOG_DEBUG, "GPU %d call to fanspeed get failed", gpu);
-	}
-	if (!(ga->lpFanSpeedInfo.iFlags & ADL_DL_FANCTRL_SUPPORTS_PERCENT_WRITE)) {
-		/* Must convert speed to an RPM */
-		iFanSpeed = ga->lpFanSpeedInfo.iMaxRPM * iFanSpeed / 100;
-		ga->lpFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_RPM;
-	} else
-		ga->lpFanSpeedValue.iSpeedType = ADL_DL_FANCTRL_SPEED_TYPE_PERCENT;
-	if (!(ga->lpFanSpeedValue.iFlags & ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED)) {
-		/* If user defined is not already specified, set it first */
-		ga->lpFanSpeedValue.iFlags = ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED;
-		ADL_Overdrive5_FanSpeed_Set(ga->iAdapterIndex, 0, &ga->lpFanSpeedValue);
-	}
-	ga->lpFanSpeedValue.iFanSpeed = iFanSpeed;
-	ret = ADL_Overdrive5_FanSpeed_Set(ga->iAdapterIndex, 0, &ga->lpFanSpeedValue);
-	ga->managed = true;
-	unlock_adl();
-
-	return ret;
-}
-
-#ifdef HAVE_CURSES
-static int set_powertune(int gpu, int iPercentage)
-{
-	struct gpu_adl *ga;
-	int dummy, ret = 1;
-
-	if (!gpus[gpu].has_adl || !adl_active) {
-		wlogprint("Set powertune not supported\n");
-		return ret;
-	}
-
-	ga = &gpus[gpu].adl;
-
-	lock_adl();
-	ADL_Overdrive5_PowerControl_Set(ga->iAdapterIndex, iPercentage);
-	ADL_Overdrive5_PowerControl_Get(ga->iAdapterIndex, &ga->iPercentage, &dummy);
-	if (ga->iPercentage == iPercentage)
-		ret = 0;
-	ga->managed = true;
-	unlock_adl();
-	return ret;
-}
-#endif
-
-/* Returns whether the fanspeed is optimal already or not. The fan_window bool
- * tells us whether the current fanspeed is in the target range for fanspeeds.
- */
-static bool fan_autotune(int gpu, int temp, int fanpercent, int lasttemp, bool *fan_window)
-{
-	struct cgpu_info *cgpu = &gpus[gpu];
-	int tdiff = round(temp - lasttemp);
-	struct gpu_adl *ga = &cgpu->adl;
-	int top = gpus[gpu].gpu_fan;
-	int bot = gpus[gpu].min_fan;
-	int newpercent = fanpercent;
-	int iMin = 0, iMax = 100;
-
-	get_fanrange(gpu, &iMin, &iMax);
-	if (temp > ga->overtemp && fanpercent < iMax) {
-		applog(LOG_WARNING, "Overheat detected on GPU %d, increasing fan to 100%%", gpu);
-		newpercent = iMax;
-
-		dev_error(cgpu, REASON_DEV_OVER_HEAT);
-	} else if (temp > ga->targettemp && fanpercent < top && tdiff >= 0) {
-		applog(LOG_DEBUG, "Temperature over target, increasing fanspeed");
-		if (temp > ga->targettemp + opt_hysteresis)
-			newpercent = ga->targetfan + 10;
-		else
-			newpercent = ga->targetfan + 5;
-		if (newpercent > top)
-			newpercent = top;
-	} else if (fanpercent > bot && temp < ga->targettemp - opt_hysteresis) {
-		/* Detect large swings of 5 degrees or more and change fan by
-		 * a proportion more */
-		if (tdiff <= 0) {
-			applog(LOG_DEBUG, "Temperature %d degrees below target, decreasing fanspeed", opt_hysteresis);
-			newpercent = ga->targetfan - 1 + tdiff / 5;
-		} else if (tdiff >= 5) {
-			applog(LOG_DEBUG, "Temperature climbed %d while below target, increasing fanspeed", tdiff);
-			newpercent = ga->targetfan + tdiff / 5;
-		}
-	} else {
-
-		/* We're in the optimal range, make minor adjustments if the
-		 * temp is still drifting */
-		if (fanpercent > bot && tdiff < 0 && lasttemp < ga->targettemp) {
-			applog(LOG_DEBUG, "Temperature dropping while in target range, decreasing fanspeed");
-			newpercent = ga->targetfan + tdiff;
-		} else if (fanpercent < top && tdiff > 0 && temp > ga->targettemp - opt_hysteresis) {
-			applog(LOG_DEBUG, "Temperature rising while in target range, increasing fanspeed");
-			newpercent = ga->targetfan + tdiff;
-		}
-	}
-
-	if (newpercent > iMax)
-		newpercent = iMax;
-	else if (newpercent < iMin)
-		newpercent = iMin;
-
-	if (newpercent <= top)
-		*fan_window = true;
-	else
-		*fan_window = false;
-
-	if (newpercent != fanpercent) {
-		applog(LOG_INFO, "Setting GPU %d fan percentage to %d", gpu, newpercent);
-		set_fanspeed(gpu, newpercent);
-		/* If the fanspeed is going down and we're below the top speed,
-		 * consider the fan optimal to prevent minute changes in
-		 * fanspeed delaying GPU engine speed changes */
-		if (newpercent < fanpercent && *fan_window)
-			return true;
-		return false;
-	}
-	return true;
-}
-
-void gpu_autotune(int gpu, enum dev_enable *denable)
-{
-	int temp, fanpercent, engine, newengine, twintemp = 0;
-	bool fan_optimal = true, fan_window = true;
-	struct cgpu_info *cgpu;
-	struct gpu_adl *ga;
-
-	cgpu = &gpus[gpu];
-	ga = &cgpu->adl;
-
-	lock_adl();
-	ADL_Overdrive5_CurrentActivity_Get(ga->iAdapterIndex, &ga->lpActivity);
-	temp = __gpu_temp(ga);
-	if (ga->twin)
-		twintemp = __gpu_temp(ga->twin);
-	fanpercent = __gpu_fanpercent(ga);
-	unlock_adl();
-
-	newengine = engine = gpu_engineclock(gpu) * 100;
-
-	if (temp && fanpercent >= 0 && ga->autofan) {
-		if (!ga->twin)
-			fan_optimal = fan_autotune(gpu, temp, fanpercent, ga->lasttemp, &fan_window);
-		else if (ga->autofan && (ga->has_fanspeed || !ga->twin->autofan)) {
-			/* On linked GPUs, we autotune the fan only once, based
-			 * on the highest temperature from either GPUs */
-			int hightemp, fan_gpu;
-			int lasttemp;
-
-			if (twintemp > temp) {
-				lasttemp = ga->twin->lasttemp;
-				hightemp = twintemp;
-			} else {
-				lasttemp = ga->lasttemp;
-				hightemp = temp;
-			}
-			if (ga->has_fanspeed)
-				fan_gpu = gpu;
-			else
-				fan_gpu = ga->twin->gpu;
-			fan_optimal = fan_autotune(fan_gpu, hightemp, fanpercent, lasttemp, &fan_window);
-		}
-	}
-
-	if (engine && ga->autoengine) {
-		if (temp > cgpu->cutofftemp) {
-			applog(LOG_WARNING, "Hit thermal cutoff limit on GPU %d, disabling!", gpu);
-			*denable = DEV_RECOVER;
-			newengine = ga->minspeed;
-			dev_error(cgpu, REASON_DEV_THERMAL_CUTOFF);
-		} else if (temp > ga->overtemp && engine > ga->minspeed) {
-			applog(LOG_WARNING, "Overheat detected, decreasing GPU %d clock speed", gpu);
-			newengine = ga->minspeed;
-
-			dev_error(cgpu, REASON_DEV_OVER_HEAT);
-		} else if (temp > ga->targettemp + opt_hysteresis && engine > ga->minspeed && fan_optimal) {
-			applog(LOG_DEBUG, "Temperature %d degrees over target, decreasing clock speed", opt_hysteresis);
-			newengine = engine - ga->lpOdParameters.sEngineClock.iStep;
-			/* Only try to tune engine speed up if this GPU is not disabled */
-		} else if (temp < ga->targettemp && engine < ga->maxspeed && fan_window && *denable == DEV_ENABLED) {
-			int iStep = ga->lpOdParameters.sEngineClock.iStep;
-
-			applog(LOG_DEBUG, "Temperature below target, increasing clock speed");
-			if (temp < ga->targettemp - opt_hysteresis)
-				iStep *= 2;
-			newengine = engine + iStep;
-		} else if (temp < ga->targettemp && *denable == DEV_RECOVER && opt_restart) {
-			applog(LOG_NOTICE, "Device recovered to temperature below target, re-enabling");
-			*denable = DEV_ENABLED;
-		}
-
-		if (newengine > ga->maxspeed)
-			newengine = ga->maxspeed;
-		else if (newengine < ga->minspeed)
-			newengine = ga->minspeed;
-
-		/* Adjust engine clock speed if it's lower, or if it's higher
-		 * but higher than the last intended value as well as the
-		 * current speed, to avoid setting the engine clock speed to
-		 * a speed relateive to a lower profile during idle periods. */
-		if (newengine < engine || (newengine > engine && newengine > ga->lastengine)) {
-			newengine /= 100;
-			applog(LOG_INFO, "Setting GPU %d engine clock to %d", gpu, newengine);
-			set_engineclock(gpu, newengine);
-		}
-	}
-	ga->lasttemp = temp;
-}
-
-void set_defaultfan(int gpu)
-{
-	struct gpu_adl *ga;
-	if (!gpus[gpu].has_adl || !adl_active)
-		return;
-
-	ga = &gpus[gpu].adl;
-	lock_adl();
-	ADL_Overdrive5_FanSpeed_Set(ga->iAdapterIndex, 0, &ga->DefFanSpeedValue);
-	unlock_adl();
-}
-
-void set_defaultengine(int gpu)
-{
-	struct gpu_adl *ga;
-	if (!gpus[gpu].has_adl || !adl_active)
-		return;
-
-	ga = &gpus[gpu].adl;
-	lock_adl();
-	ADL_Overdrive5_ODPerformanceLevels_Set(ga->iAdapterIndex, ga->DefPerfLev);
-	unlock_adl();
-}
-
-#ifdef HAVE_CURSES
-void change_autosettings(int gpu)
-{
-	struct gpu_adl *ga = &gpus[gpu].adl;
-	char input;
-	int val;
-
-	wlogprint("Target temperature: %d\n", ga->targettemp);
-	wlogprint("Overheat temperature: %d\n", ga->overtemp);
-	wlogprint("Cutoff temperature: %d\n", gpus[gpu].cutofftemp);
-	wlogprint("Toggle [F]an auto [G]PU auto\nChange [T]arget [O]verheat [C]utoff\n");
-	wlogprint("Or press any other key to continue\n");
-	input = getch();
-	if (!strncasecmp(&input, "f", 1)) {
-		ga->autofan ^= true;
-		wlogprint("Fan autotune is now %s\n", ga->autofan ? "enabled" : "disabled");
-		if (!ga->autofan) {
-			wlogprint("Resetting fan to startup settings\n");
-			set_defaultfan(gpu);
-		}
-	} else if (!strncasecmp(&input, "g", 1)) {
-		ga->autoengine ^= true;
-		wlogprint("GPU engine clock autotune is now %s\n", ga->autoengine ? "enabled" : "disabled");
-		if (!ga->autoengine) {
-			wlogprint("Resetting GPU engine clock to startup settings\n");
-			set_defaultengine(gpu);
-		}
-	} else if (!strncasecmp(&input, "t", 1)) {
-		val = curses_int("Enter target temperature for this GPU in C (0-200)");
-		if (val < 0 || val > 200)
-			wlogprint("Invalid temperature");
-		else
-			ga->targettemp = val;
-	} else if (!strncasecmp(&input, "o", 1)) {
-		wlogprint("Enter overheat temperature for this GPU in C (%d+)", ga->targettemp);
-		val = curses_int("");
-		if (val <= ga->targettemp || val > 200)
-			wlogprint("Invalid temperature");
-		else
-			ga->overtemp = val;
-	} else if (!strncasecmp(&input, "c", 1)) {
-		wlogprint("Enter cutoff temperature for this GPU in C (%d+)", ga->overtemp);
-		val = curses_int("");
-		if (val <= ga->overtemp || val > 200)
-			wlogprint("Invalid temperature");
-		else
-			gpus[gpu].cutofftemp = val;
-	}
-}
-
-void change_gpusettings(int gpu)
-{
-	struct gpu_adl *ga = &gpus[gpu].adl;
-	float fval, fmin = 0, fmax = 0;
-	int val, imin = 0, imax = 0;
-	char input;
-	int engineclock = 0, memclock = 0, activity = 0, fanspeed = 0, fanpercent = 0, powertune = 0;
-	float temp = 0, vddc = 0;
-
-updated:
-	if (gpu_stats(gpu, &temp, &engineclock, &memclock, &vddc, &activity, &fanspeed, &fanpercent, &powertune))
-	wlogprint("Temp: %.1f C\n", temp);
-	if (fanpercent >= 0 || fanspeed >= 0) {
-		wlogprint("Fan Speed: ");
-		if (fanpercent >= 0)
-			wlogprint("%d%% ", fanpercent);
-		if (fanspeed >= 0)
-			wlogprint("(%d RPM)", fanspeed);
-		wlogprint("\n");
-	}
-	wlogprint("Engine Clock: %d MHz\nMemory Clock: %d Mhz\nVddc: %.3f V\nActivity: %d%%\nPowertune: %d%%\n",
-		engineclock, memclock, vddc, activity, powertune);
-	wlogprint("Fan autotune is %s (%d-%d)\n", ga->autofan ? "enabled" : "disabled",
-		  gpus[gpu].min_fan, gpus[gpu].gpu_fan);
-	wlogprint("GPU engine clock autotune is %s (%d-%d)\n", ga->autoengine ? "enabled" : "disabled",
-		ga->minspeed / 100, ga->maxspeed / 100);
-	wlogprint("Change [A]utomatic [E]ngine [F]an [M]emory [V]oltage [P]owertune\n");
-	wlogprint("Or press any other key to continue\n");
-	input = getch();
-
-	if (!strncasecmp(&input, "a", 1)) {
-		change_autosettings(gpu);
-	} else if (!strncasecmp(&input, "e", 1)) {
-		get_enginerange(gpu, &imin, &imax);
-		wlogprint("Enter GPU engine clock speed (%d - %d Mhz)", imin, imax);
-		val = curses_int("");
-		if (val < imin || val > imax) {
-			wlogprint("Value is outside safe range, are you sure?\n");
-			input = getch();
-			if (strncasecmp(&input, "y", 1))
-				return;
-		}
-		if (!set_engineclock(gpu, val))
-			wlogprint("Driver reports success but check values below\n");
-		else
-			wlogprint("Failed to modify engine clock speed\n");
-	} else if (!strncasecmp(&input, "f", 1)) {
-		get_fanrange(gpu, &imin, &imax);
-		wlogprint("Enter fan percentage (%d - %d %%)", imin, imax);
-		val = curses_int("");
-		if (val < imin || val > imax) {
-			wlogprint("Value is outside safe range, are you sure?\n");
-			input = getch();
-			if (strncasecmp(&input, "y", 1))
-				return;
-		}
-		if (!set_fanspeed(gpu, val))
-			wlogprint("Driver reports success but check values below\n");
-		else
-			wlogprint("Failed to modify fan speed\n");
-	} else if (!strncasecmp(&input, "m", 1)) {
-		get_memoryrange(gpu, &imin, &imax);
-		wlogprint("Enter GPU memory clock speed (%d - %d Mhz)", imin, imax);
-		val = curses_int("");
-		if (val < imin || val > imax) {
-			wlogprint("Value is outside safe range, are you sure?\n");
-			input = getch();
-			if (strncasecmp(&input, "y", 1))
-				return;
-		}
-		if (!set_memoryclock(gpu, val))
-			wlogprint("Driver reports success but check values below\n");
-		else
-			wlogprint("Failed to modify memory clock speed\n");
-	} else if (!strncasecmp(&input, "v", 1)) {
-		get_vddcrange(gpu, &fmin, &fmax);
-		wlogprint("Enter GPU voltage (%.3f - %.3f V)", fmin, fmax);
-		fval = curses_float("");
-		if (fval < fmin || fval > fmax) {
-			wlogprint("Value is outside safe range, are you sure?\n");
-			input = getch();
-			if (strncasecmp(&input, "y", 1))
-				return;
-		}
-		if (!set_vddc(gpu, fval))
-			wlogprint("Driver reports success but check values below\n");
-		else
-			wlogprint("Failed to modify voltage\n");
-	} else if (!strncasecmp(&input, "p", 1)) {
-		val = curses_int("Enter powertune value (-20 - 20)");
-		if (val < -20 || val > 20) {
-			wlogprint("Value is outside safe range, are you sure?\n");
-			input = getch();
-			if (strncasecmp(&input, "y", 1))
-				return;
-		}
-		if (!set_powertune(gpu, val))
-			wlogprint("Driver reports success but check values below\n");
-		else
-			wlogprint("Failed to modify powertune value\n");
-	} else {
-		clear_logwin();
-		return;
-	}
-	cgsleep_ms(1000);
-	goto updated;
-}
-#endif
-
-static void free_adl(void)
-{
-	ADL_Main_Memory_Free ((void **)&lpInfo);
-	ADL_Main_Control_Destroy ();
-#if defined (LINUX)
-	dlclose(hDLL);
-#else
-	FreeLibrary(hDLL);
-#endif
-}
-
-void clear_adl(int nDevs)
-{
-	struct gpu_adl *ga;
-	int i;
-
-	if (!adl_active)
-		return;
-
-	lock_adl();
-	/* Try to reset values to their defaults */
-	for (i = 0; i < nDevs; i++) {
-		ga = &gpus[i].adl;
-		/*  Only reset the values if we've changed them at any time */
-		if (!gpus[i].has_adl || !ga->managed)
-			continue;
-		ADL_Overdrive5_ODPerformanceLevels_Set(ga->iAdapterIndex, ga->DefPerfLev);
-		free(ga->DefPerfLev);
-		ADL_Overdrive5_FanSpeed_Set(ga->iAdapterIndex, 0, &ga->DefFanSpeedValue);
-		ADL_Overdrive5_FanSpeedToDefault_Set(ga->iAdapterIndex, 0);
-	}
-	adl_active = false;
-	unlock_adl();
-	free_adl();
-}
-#endif /* HAVE_ADL */

+ 0 - 28
adl.h

@@ -1,28 +0,0 @@
-#ifndef __ADL_H__
-#define __ADL_H__
-#ifdef HAVE_ADL
-bool adl_active;
-bool opt_reorder;
-int opt_hysteresis;
-const int opt_targettemp;
-const int opt_overheattemp;
-void init_adl(int nDevs);
-float gpu_temp(int gpu);
-int gpu_engineclock(int gpu);
-int gpu_memclock(int gpu);
-float gpu_vddc(int gpu);
-int gpu_activity(int gpu);
-int gpu_fanspeed(int gpu);
-int gpu_fanpercent(int gpu);
-bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vddc,
-	       int *activity, int *fanspeed, int *fanpercent, int *powertune);
-void change_gpusettings(int gpu);
-void gpu_autotune(int gpu, enum dev_enable *denable);
-void clear_adl(int nDevs);
-#else /* HAVE_ADL */
-#define adl_active (0)
-static inline void init_adl(__maybe_unused int nDevs) {}
-static inline void change_gpusettings(__maybe_unused int gpu) { }
-static inline void clear_adl(__maybe_unused int nDevs) {}
-#endif
-#endif

+ 0 - 274
adl_functions.h

@@ -1,274 +0,0 @@
-/*******************************************************************************
-
- * This program reads HW information from your ATI Radeon card and displays them
- * You can also change frequencies and voltages.
-
- * THIS PROGRAM MAY DAMAGE YOUR VIDEO CARD, IF YOU APPLY NONSENSIAL VALUES.
- * e.g. INCREASING THE VOLTAGES AND FREQUENCIES IN CONJUNCTION WITH LOWERING THE
- *      FAN SPEED IS NOT ADVISABLE!
-
- * Copyright(C) Thorsten Gilling (tgilling@web.de)
-
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
-
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
-
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-
-*******************************************************************************/
-
-// ------------------------------------------------------------------------------------------------------------
-// AMD ADL function types from Version 3.0
-// ------------------------------------------------------------------------------------------------------------
-
-#if defined (linux)
- #include <dlfcn.h>	//dyopen, dlsym, dlclose
- #include <stdlib.h>
- #include <string.h>	//memeset
-#else
- #include <windows.h>
- #include <tchar.h>
-#endif
-
-#include "ADL_SDK/adl_sdk.h"
-
-// Definitions of the used function pointers. Add more if you use other ADL APIs
-
-// ------------------------------------------------------------------------------------------------------------
-
-// ADL Main
-typedef int ( *ADL_MAIN_CONTROL_CREATE ) (ADL_MAIN_MALLOC_CALLBACK callback, int iEnumConnectedAdapters);
-typedef int ( *ADL_MAIN_CONTROL_REFRESH ) ();
-typedef int ( *ADL_MAIN_CONTROL_DESTROY ) ();
-typedef int ( *ADL_GRAPHICS_PLATFORM_GET ) (int *lpPlatForm);
-
-// ------------------------------------------------------------------------------------------------------------
-
-// ADL Adapter/General
-typedef int ( *ADL_ADAPTER_ACTIVE_GET ) (int iAdapterIndex, int *lpStatus);
-typedef int ( *ADL_ADAPTER_NUMBEROFADAPTERS_GET ) (int *lpNumAdapters);
-typedef int ( *ADL_ADAPTER_ADAPTERINFO_GET ) (LPAdapterInfo lpInfo, int iInputSize);
-typedef int ( *ADL_ADAPTER_ASICFAMILYTYPE_GET ) (int iAdapterIndex, int *lpAsicTypes, int *lpValids);
-typedef int ( *ADL_ADAPTER_SPEED_CAPS )	(int iAdapterIndex, int *lpCaps, int *lpValid);
-typedef int ( *ADL_ADAPTER_SPEED_GET ) (int iAdapterIndex, int *lpCurrent, int *lpDefault);
-typedef int ( *ADL_ADAPTER_SPEED_SET ) (int iAdapterIndex, int iSpeed);
-typedef int ( *ADL_ADAPTER_ACCESSIBILITY_GET ) (int iAdapterIndex, int *lpAccessibility);
-typedef int ( *ADL_ADAPTER_VIDEOBIOSINFO_GET ) (int iAdapterIndex, ADLBiosInfo *lpBiosInfo);
-typedef int ( *ADL_ADAPTER_ID_GET ) (int iAdapterIndex, int *lpAdapterID);
-
-// ADL Adapter/CrossDisplay
-typedef int ( *ADL_ADAPTER_CROSSDISPLAYADAPTERROLE_CAPS ) (int iAdapterIndex, int *lpCrossDisplaySupport, int *lpAdapterRole, int *lpNumPossDisplayAdapters, int **lppPossDisplayAdapters, int *lpNnumPosRenderingAdapters, int **lppPosRenderingAdapters, int *lpErrorStatus);
-typedef int ( *ADL_ADAPTER_CROSSDISPLAYINFO_GET ) (int iAdapterIndex, int *lpAdapterRole, int *lpCrossdisplayMode, int *lpNumDisplayAdapters, int **lppDisplayAdapters, int *lpNumRenderingAdapters, int **lppRenderingAdapters, int *lpErrorCodeStatus);
-typedef int ( *ADL_ADAPTER_CROSSDISPLAYINFO_SET ) (int iAdapterIndex, int iDisplayAdapterIndex, int iRenderingAdapterIndex, int crossdisplayMode, int *lpErrorCode);
-
-// ADL Adapter/CrossFire
-typedef int ( *ADL_ADAPTER_CROSSFIRE_CAPS ) (int iAdapterIndex, int *lpPreferred, int *lpNumComb, ADLCrossfireComb **ppCrossfireComb);
-typedef int ( *ADL_ADAPTER_CROSSFIRE_GET ) (int iAdapterIndex, ADLCrossfireComb *lpCrossfireComb, ADLCrossfireInfo *lpCrossfireInfo);
-typedef int ( *ADL_ADAPTER_CROSSFIRE_SET ) (int iAdapterIndex, ADLCrossfireComb *lpCrossfireComb);
-
-// ------------------------------------------------------------------------------------------------------------
-
-// ADL Display/Misc
-
-typedef int ( *ADL_DISPLAY_DISPLAYINFO_GET ) (int iAdapterIndex, int *lpNumDisplays, ADLDisplayInfo **lppInfo, int iForceDetect);
-typedef int ( *ADL_DISPLAY_NUMBEROFDISPLAYS_GET ) (int iAdapterIndex, int *lpNumDisplays);
-typedef int ( *ADL_DISPLAY_PRESERVEDASPECTRATIO_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpSupport, int *lpCurrent, int *lpDefault);
-typedef int ( *ADL_DISPLAY_PRESERVEDASPECTRATIO_SET ) (int iAdapterIndex, int iDisplayIndex, int iCurrent);
-typedef int ( *ADL_DISPLAY_IMAGEEXPANSION_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpSupport, int *lpCurrent, int *lpDefault);
-typedef int ( *ADL_DISPLAY_IMAGEEXPANSION_SET ) (int iAdapterIndex, int iDisplayIndex, int iCurrent);
-typedef int ( *ADL_DISPLAY_POSITION_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpX, int *lpY, int *lpXDefault, int *lpYDefault, int *lpMinX, int *lpMinY, int *lpMaxX, int *lpMaxY, int *lpStepX, int *lpStepY);
-typedef int ( *ADL_DISPLAY_POSITION_SET ) (int iAdapterIndex, int iDisplayIndex, int iX, int iY);
-typedef int ( *ADL_DISPLAY_SIZE_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpWidth, int *lpHeight, int *lpDefaultWidth, int *lpDefaultHeight, int *lpMinWidth, int *lpMinHeight, int *lpMaxWidth, int *lpMaxHeight, int *lpStepWidth, int *lpStepHeight);
-typedef int ( *ADL_DISPLAY_SIZE_SET ) (int iAdapterIndex, int iDisplayIndex, int iWidth, int iHeight);
-typedef int ( *ADL_DISPLAY_ADJUSTCAPS_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpInfo);
-typedef int ( *ADL_DISPLAY_CAPABILITIES_GET ) (int iAdapterIndex, int *lpNumberOfControlers, int *lpNumberOfDisplays);
-typedef int ( *ADL_DISPLAY_CONNECTEDDISPLAYS_GET ) (int iAdapterIndex, int *lpConnections);
-typedef int ( *ADL_DISPLAY_DEVICECONFIG_GET ) (int iAdapterIndex, int iDisplayIndex, ADLDisplayConfig *lpDisplayConfig);
-typedef int ( *ADL_DISPLAY_PROPERTY_GET ) (int iAdapterIndex, int iDisplayIndex, ADLDisplayProperty *lpDisplayProperty);
-typedef int ( *ADL_DISPLAY_PROPERTY_SET ) (int iAdapterIndex, int iDisplayIndex, ADLDisplayProperty *lpDisplayProperty);
-typedef int ( *ADL_DISPLAY_SWITCHINGCAPABILITY_GET ) (int iAdapterIndex, int *lpResult);
-typedef int ( *ADL_DISPLAY_DITHERSTATE_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpDitherState);
-typedef int ( *ADL_DISPLAY_DITHERSTATE_SET ) (int iAdapterIndex, int iDisplayIndex, int iDitherState);
-typedef int ( *ADL_DISPLAY_SUPPORTEDPIXELFORMAT_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpPixelFormat);
-typedef int ( *ADL_DISPLAY_PIXELFORMAT_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpPixelFormat);
-typedef int ( *ADL_DISPLAY_PIXELFORMAT_SET ) (int iAdapterIndex, int iDisplayIndex, int iPixelFormat);
-typedef int ( *ADL_DISPLAY_ODCLOCKINFO_GET ) (int iAdapterIndex, ADLAdapterODClockInfo *lpOdClockInfo);
-typedef int ( *ADL_DISPLAY_ODCLOCKCONFIG_SET ) (int iAdapterIndex, ADLAdapterODClockConfig *lpOdClockConfig);
-typedef int ( *ADL_DISPLAY_ADJUSTMENTCOHERENT_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpAdjustmentCoherentCurrent, int *lpAdjustmentCoherentDefault);
-typedef int ( *ADL_DISPLAY_ADJUSTMENTCOHERENT_SET ) (int iAdapterIndex, int iDisplayIndex, int iAdjustmentCoherent);
-typedef int ( *ADL_DISPLAY_REDUCEDBLANKING_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpReducedBlankingCurrent, int *lpReducedBlankingDefault);
-typedef int ( *ADL_DISPLAY_REDUCEDBLANKING_SET ) (int iAdapterIndex, int iDisplayIndex, int iReducedBlanking);
-typedef int ( *ADL_DISPLAY_FORMATSOVERRIDE_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpSettingsSupported, int *lpSettingsSupportedEx, int *lpCurSettings);
-typedef int ( *ADL_DISPLAY_FORMATSOVERRIDE_SET ) (int iAdapterIndex, int iDisplayIndex, int iOverrideSettings);
-typedef int ( *ADL_DISPLAY_MVPUCAPS_GET ) (int iAdapterIndex, ADLMVPUCaps *lpMvpuCaps);
-typedef int ( *ADL_DISPLAY_MVPUSTATUS_GET ) (int iAdapterIndex, ADLMVPUStatus *lpMvpuStatus);
-
-// ADL Display/Eyefinity
-typedef int ( *ADL_ADAPTER_ACTIVE_SET ) (int iAdapterIndex, int iStatus, int *lpNewlyActivate);
-typedef int ( *ADL_ADAPTER_ACTIVE_SETPREFER ) (int iAdapterIndex, int iStatus, int iNumPreferTarget, ADLDisplayTarget *lpPreferTarget, int *lpNewlyActivate);
-typedef int ( *ADL_ADAPTER_PRIMARY_GET ) (int *lpPrimaryAdapterIndex);
-typedef int ( *ADL_ADAPTER_PRIMARY_SET ) (int iAdapterIndex);
-typedef int ( *ADL_ADAPTER_MODESWITCH ) (int iAdapterIndex);
-typedef int ( *ADL_DISPLAY_MODES_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpNumModes, ADLMode **lppModes);
-typedef int ( *ADL_DISPLAY_MODES_SET ) (int iAdapterIndex, int iDisplayIndex, int iNumModes, ADLMode *lpModes);
-typedef int ( *ADL_DISPLAY_POSSIBLEMODE_GET ) (int iAdapterIndex, int *lpNumModes, ADLMode **lppModes);
-typedef int ( *ADL_DISPLAY_FORCIBLEDISPLAY_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpStatus);
-typedef int ( *ADL_DISPLAY_FORCIBLEDISPLAY_SET ) (int iAdapterIndex, int iDisplayIndex, int iStatus);
-typedef int ( *ADL_ADAPTER_NUMBEROFACTIVATABLESOURCES_GET ) (int iAdapterIndex, int *lpNumSources, ADLActivatableSource **lppSources);
-typedef int ( *ADL_ADAPTER_DISPLAY_CAPS ) (int iAdapterIndex, int *lpNumDisplayCaps, ADLAdapterDisplayCap **lppAdapterDisplayCaps);
-typedef int ( *ADL_DISPLAY_DISPLAYMAPCONFIG_GET ) (int iAdapterIndex, int *lpNumDisplayMap, ADLDisplayMap **lppDisplayMap, int *lpNumDisplayTarget, ADLDisplayTarget **lppDisplayTarget, int iOptions);
-typedef int ( *ADL_DISPLAY_DISPLAYMAPCONFIG_SET ) (int iAdapterIndex, int iNumDisplayMap, ADLDisplayMap *lpDisplayMap, int iNumDisplayTarget, ADLDisplayTarget *lpDisplayTarget);
-typedef int ( *ADL_DISPLAY_POSSIBLEMAPPING_GET ) (int iAdapterIndex, int iNumberOfPresetMapping, ADLPossibleMapping *lpPresetMappings, int iEnquiryControllerIndex, int *lpNumberOfEnquiryPossibleMappings, ADLPossibleMapping **lppEnquiryPossibleMappings);
-typedef int ( *ADL_DISPLAY_DISPLAYMAPCONFIG_VALIDATE ) (int iAdapterIndex, int iNumPossibleMap, ADLPossibleMap *lpPossibleMaps, int *lpNumPossibleMapResult, ADLPossibleMapResult **lppPossibleMapResult);
-typedef int ( *ADL_DISPLAY_DISPLAYMAPCONFIG_POSSIBLEADDANDREMOVE ) (int iAdapterIndex, int iNumDisplayMap, ADLDisplayMap *lpDisplayMap, int iNumDisplayTarget, ADLDisplayTarget *lpDisplayTarget, int *lpNumPossibleAddTarget, ADLDisplayTarget **lppPossibleAddTarget, int *lpNumPossibleRemoveTarget, ADLDisplayTarget **lppPossibleRemoveTarget);
-typedef int ( *ADL_DISPLAY_SLSGRID_CAPS ) (int iAdapterIndex, int *lpNumSLSGrid, ADLSLSGrid **lppSLSGrid, int iOption);
-typedef int ( *ADL_DISPLAY_SLSMAPINDEXLIST_GET ) (int iAdapterIndex, int *lpNumSLSMapIndexList, int **lppSLSMapIndexList, int iOptions);
-typedef int ( *ADL_DISPLAY_SLSMAPINDEX_GET ) (int iAdapterIndex, int iADLNumDisplayTarget, ADLDisplayTarget *lpDisplayTarget, int *lpSLSMapIndex);
-typedef int ( *ADL_DISPLAY_SLSMAPCONFIG_GET ) (int iAdapterIndex, int iSLSMapIndex, ADLSLSMap *lpSLSMap, int *lpNumSLSTarget, ADLSLSTarget **lppSLSTarget, int *lpNumNativeMode, ADLSLSMode **lppNativeMode, int *lpNumBezelMode, ADLBezelTransientMode **lppBezelMode, int *lpNumTransientMode, ADLBezelTransientMode **lppTransientMode, int *lpNumSLSOffset, ADLSLSOffset **lppSLSOffset, int iOption);
-typedef int ( *ADL_DISPLAY_SLSMAPCONFIG_CREATE ) (int iAdapterIndex, ADLSLSMap SLSMap, int iNumTargetTarget, ADLSLSTarget *lpSLSTarget, int iBezelModePercent, int *lpSLSMapIndex, int iOption);
-typedef int ( *ADL_DISPLAY_SLSMAPCONFIG_DELETE ) (int iAdapterIndex, int iSLSMapIndex);
-typedef int ( *ADL_DISPLAY_SLSMAPCONFIG_SETSTATE ) (int iAdapterIndex, int iSLSMapIndex, int iState);
-typedef int ( *ADL_DISPLAY_SLSMAPCONFIG_REARRANGE ) (int iAdapterIndex, int iSLSMapIndex, int iNumDisplayTarget, ADLSLSTarget *lpSLSTarget, ADLSLSMap slsMap, int iOption);
-typedef int ( *ADL_DISPLAY_POSSIBLEMODE_WINXP_GET ) (int iAdapterIndex, int iNumDisplayTargets, ADLDisplayTarget *lpDisplayTargets, int iLargeDesktopSupportedType, int iDevicePanningControl, int *lpNumModes, ADLMode **lppModes);
-typedef int ( *ADL_DISPLAY_BEZELOFFSETSTEPPINGSIZE_GET ) (int iAdapterIndex, int *lpNumBezelOffsetSteppingSize, ADLBezelOffsetSteppingSize **lppBezelOffsetSteppingSize);
-typedef int ( *ADL_DISPLAY_BEZELOFFSET_SET ) (int iAdapterIndex, int iSLSMapIndex, int iNumBezelOffset, LPADLSLSOffset lpBezelOffset, ADLSLSMap SLSMap, int iOption);
-typedef int ( *ADL_DISPLAY_BEZELSUPPORTED_VALIDATE ) (int iAdapterIndex, int iNumPossibleSLSMap, LPADLPossibleSLSMap lpPossibleSLSMaps, int *lpNumPossibleSLSMapResult, LPADLPossibleMapResult *lppPossibleMapResult);
-
-// ADL Display/Color
-typedef int ( *ADL_DISPLAY_COLORCAPS_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpCaps, int *lpValids);
-typedef int ( *ADL_DISPLAY_COLOR_SET ) (int iAdapterIndex, int iDisplayIndex, int iColorType, int iCurrent);
-typedef int ( *ADL_DISPLAY_COLOR_GET ) (int iAdapterIndex, int iDisplayIndex, int iColorType, int *lpCurrent, int *lpDefault, int *lpMin, int *lpMax, int *lpStep);
-typedef int ( *ADL_DISPLAY_COLORTEMPERATURESOURCE_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpTempSource);
-typedef int ( *ADL_DISPLAY_COLORTEMPERATURESOURCE_SET ) (int iAdapterIndex, int iDisplayIndex, int iTempSource);
-
-// ADL Display/Timing
-typedef int ( *ADL_DISPLAY_MODETIMINGOVERRIDE_GET ) (int iAdapterIndex, int iDisplayIndex, ADLDisplayMode *lpModeIn, ADLDisplayModeInfo *lpModeInfoOut);
-typedef int ( *ADL_DISPLAY_MODETIMINGOVERRIDE_SET ) (int iAdapterIndex, int iDisplayIndex, ADLDisplayModeInfo *lpMode, int iForceUpdate);
-typedef int ( *ADL_DISPLAY_MODETIMINGOVERRIDELIST_GET ) (int iAdapterIndex, int iDisplayIndex, int iMaxNumOfOverrides, ADLDisplayModeInfo *lpModeInfoList, int *lpNumOfOverrides);
-
-// ADL Display/Customize
-typedef int ( *ADL_DISPLAY_CUSTOMIZEDMODELISTNUM_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpListNum);
-typedef int ( *ADL_DISPLAY_CUSTOMIZEDMODELIST_GET ) (int iAdapterIndex, int iDisplayIndex, ADLCustomMode *lpCustomModeList, int iBuffSize);
-typedef int ( *ADL_DISPLAY_CUSTOMIZEDMODE_ADD ) (int iAdapterIndex, int iDisplayIndex, ADLCustomMode customMode);
-typedef int ( *ADL_DISPLAY_CUSTOMIZEDMODE_DELETE ) (int iAdapterIndex, int iDisplayIndex, int iIndex);
-typedef int ( *ADL_DISPLAY_CUSTOMIZEDMODE_VALIDATE ) (int iAdapterIndex, int iDisplayIndex, ADLCustomMode customMode, int *lpValid);
-
-// ADL Display/Over-Underscan
-typedef int ( *ADL_DISPLAY_UNDERSCAN_SET ) (int iAdapterIndex, int iDisplayIndex, int iCurrent);
-typedef int ( *ADL_DISPLAY_UNDERSCAN_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpCurrent, int *lpDefault, int *lpMin, int *lpMax, int *lpStep);
-typedef int ( *ADL_DISPLAY_OVERSCAN_SET ) (int iAdapterIndex, int iDisplayIndex, int iCurrent);
-typedef int ( *ADL_DISPLAY_OVERSCAN_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpCurrent, int *lpDefualt, int *lpMin, int *lpMax, int *lpStep);
-
-// ADL Display/Overlay
-typedef int ( *ADL_DISPLAY_CONTROLLEROVERLAYADJUSTMENTCAPS_GET ) (int iAdapterIndex, ADLControllerOverlayInput *lpOverlayInput, ADLControllerOverlayInfo *lpCapsInfo);
-typedef int ( *ADL_DISPLAY_CONTROLLEROVERLAYADJUSTMENTDATA_GET ) (int iAdapterIndex, ADLControllerOverlayInput *lpOverlay);
-typedef int ( *ADL_DISPLAY_CONTROLLEROVERLAYADJUSTMENTDATA_SET ) (int iAdapterIndex, ADLControllerOverlayInput *lpOverlay);
-
-// ADL Display/PowerXpress
-typedef int ( *ADL_DISPLAY_POWERXPRESSVERSION_GET ) (int iAdapterIndex, int *lpVersion);
-typedef int ( *ADL_DISPLAY_POWERXPRESSACTIVEGPU_GET ) (int iAdapterIndex, int *lpActiveGPU);
-typedef int ( *ADL_DISPLAY_POWERXPRESSACTIVEGPU_SET ) (int iAdapterIndex, int iActiveGPU, int *lpOperationResult);
-typedef int ( *ADL_DISPLAY_POWERXPRESS_AUTOSWITCHCONFIG_GET ) (int iAdapterIndex, int *lpAutoSwitchOnACDCEvent, int *lpAutoSwitchOnDCACEvent);
-typedef int ( *ADL_DISPLAY_POWERXPRESS_AUTOSWITCHCONFIG_SET ) (int iAdapterIndex, int iAutoSwitchOnACDCEvent, int iAutoSwitchOnDCACEvent);
-
-// ------------------------------------------------------------------------------------------------------------
-
-// ADL DFP
-typedef int ( *ADL_DFP_BASEAUDIOSUPPORT_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpSupport);
-typedef int ( *ADL_DFP_HDMISUPPORT_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpSupport);
-typedef int ( *ADL_DFP_MVPUANALOGSUPPORT_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpSupport);
-typedef int ( *ADL_DFP_PIXELFORMAT_CAPS ) (int iAdapterIndex, int iDisplayIndex, int *lpValidBits, int *lpValidCaps);
-typedef int ( *ADL_DFP_PIXELFORMAT_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpCurState, int *lpDefault);
-typedef int ( *ADL_DFP_PIXELFORMAT_SET ) (int iAdapterIndex, int iDisplayIndex, int iState);
-typedef int ( *ADL_DFP_GPUSCALINGENABLE_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpSupport, int *lpCurrent, int *lpDefault);
-typedef int ( *ADL_DFP_GPUSCALINGENABLE_SET ) (int iAdapterIndex, int iDisplayIndex, int iCurrent);
-typedef int ( *ADL_DFP_ALLOWONLYCETIMINGS_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpSupport, int *lpCurrent, int *lpDefault);
-typedef int ( *ADL_DFP_ALLOWONLYCETIMINGS_SET ) (int iAdapterIndex, int iDisplayIndex, int iCurrent);
-
-// ADl TV
-typedef int ( *ADL_DISPLAY_TVCAPS_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpcaps);
-typedef int ( *ADL_TV_STANDARD_SET ) (int iAdapterIndex, int iDisplayIndex, int iCurrent);
-typedef int ( *ADL_TV_STANDARD_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpCurrent, int *lpDefault, int *lpSupportedStandards);
-
-// ADL Component Video
-typedef int ( *ADL_CV_DONGLESETTINGS_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpDongleSetting, int *lpOverrideSettingsSupported, int *lpCurOverrideSettings);
-typedef int ( *ADL_CV_DONGLESETTINGS_SET ) (int iAdapterIndex, int iDisplayIndex, int iOverrideSettings);
-typedef int ( *ADL_CV_DONGLESETTINGS_RESET ) (int iAdapterIndex, int iDisplayIndex);
-
-// ------------------------------------------------------------------------------------------------------------
-
-// ADL Overdrive 5
-typedef int ( *ADL_OVERDRIVE5_CURRENTACTIVITY_GET ) (int iAdapterIndex, ADLPMActivity *lpActivity);
-typedef int ( *ADL_OVERDRIVE5_THERMALDEVICES_ENUM ) (int iAdapterIndex, int iThermalControllerIndex, ADLThermalControllerInfo *lpThermalControllerInfo);
-typedef int ( *ADL_OVERDRIVE5_TEMPERATURE_GET ) (int iAdapterIndex, int iThermalControllerIndex, ADLTemperature *lpTemperature);
-typedef int ( *ADL_OVERDRIVE5_FANSPEEDINFO_GET ) (int iAdapterIndex, int iThermalControllerIndex, ADLFanSpeedInfo *lpFanSpeedInfo);
-typedef int ( *ADL_OVERDRIVE5_FANSPEED_GET ) (int iAdapterIndex, int iThermalControllerIndex, ADLFanSpeedValue *lpFanSpeedValue);
-typedef int ( *ADL_OVERDRIVE5_FANSPEED_SET ) (int iAdapterIndex, int iThermalControllerIndex, ADLFanSpeedValue *lpFanSpeedValue);
-typedef int ( *ADL_OVERDRIVE5_FANSPEEDTODEFAULT_SET ) (int iAdapterIndex, int iThermalControllerIndex);
-typedef int ( *ADL_OVERDRIVE5_ODPARAMETERS_GET ) (int iAdapterIndex, ADLODParameters *lpOdParameters);
-typedef int ( *ADL_OVERDRIVE5_ODPERFORMANCELEVELS_GET ) (int iAdapterIndex, int iDefault, ADLODPerformanceLevels *lpOdPerformanceLevels);
-typedef int ( *ADL_OVERDRIVE5_ODPERFORMANCELEVELS_SET ) (int iAdapterIndex, ADLODPerformanceLevels *lpOdPerformanceLevels);
-
-// ------------------------------------------------------------------------------------------------------------
-
-// ADL I2C
-typedef int ( *ADL_DISPLAY_WRITEANDREADI2CREV_GET ) (int iAdapterIndex, int *lpMajor, int *lpMinor);
-typedef int ( *ADL_DISPLAY_WRITEANDREADI2C ) (int iAdapterIndex, ADLI2C *plI2C);
-typedef int ( *ADL_DISPLAY_DDCBLOCKACCESS_GET ) (int iAdapterIndex, int iDisplayIndex, int iOption, int iCommandIndex, int iSendMsgLen, char *lpucSendMsgBuf, int *lpulRecvMsgLen, char *lpucRecvMsgBuf);
-typedef int ( *ADL_DISPLAY_DDCINFO_GET ) (int iAdapterIndex, int iDisplayIndex, ADLDDCInfo *lpInfo);
-typedef int ( *ADL_DISPLAY_EDIDDATA_GET ) (int iAdapterIndex, int iDisplayIndex, ADLDisplayEDIDData *lpEDIDData);
-
-// ------------------------------------------------------------------------------------------------------------
-
-// ADL Workstation
-typedef int ( *ADL_WORKSTATION_CAPS ) (int iAdapterIndex, int *lpValidBits, int *lpCaps);
-typedef int ( *ADL_WORKSTATION_STEREO_GET ) (int iAdapterIndex, int *lpDefState, int *lpCurState);
-typedef int ( *ADL_WORKSTATION_STEREO_SET ) (int iAdapterIndex, int iCurState);
-typedef int ( *ADL_WORKSTATION_ADAPTERNUMOFGLSYNCCONNECTORS_GET ) (int iAdapterIndex, int *lpNumOfGLSyncConnectors);
-typedef int ( *ADL_WORKSTATION_DISPLAYGENLOCKCAPABLE_GET ) (int iAdapterIndex, int iDisplayIndex, int *lpCanGenlock);
-typedef int ( *ADL_WORKSTATION_GLSYNCMODULEDETECT_GET ) (int iAdapterIndex, int iGlSyncConnector, ADLGLSyncModuleID *lpGlSyncModuleID);
-typedef int ( *ADL_WORKSTATION_GLSYNCMODULEINFO_GET ) (int iAdapterIndex, int iGlSyncConnector, int *lpNumGLSyncGPUPorts, int *lpNumGlSyncPorts, int *lpMaxSyncDelay, int *lpMaxSampleRate, ADLGLSyncPortCaps **ppGlSyncPorts);
-typedef int ( *ADL_WORKSTATION_GLSYNCGENLOCKCONFIGURATION_GET ) (int iAdapterIndex, int iGlSyncConnector, int iGlValidMask, ADLGLSyncGenlockConfig *lpGlSyncGenlockConfig);
-typedef int ( *ADL_WORKSTATION_GLSYNCGENLOCKCONFIGURATION_SET ) (int iAdapterIndex, int iGlSyncConnector, ADLGLSyncGenlockConfig glSyncGenlockConfig);
-typedef int ( *ADL_WORKSTATION_GLSYNCPORTSTATE_GET ) (int iAdapterIndex, int iGlSyncConnector, int iGlSyncPortType, int iNumLEDs, ADLGlSyncPortInfo *lpGlSyncPortInfo, int **ppGlSyncLEDs);
-typedef int ( *ADL_WORKSTATION_GLSYNCPORTSTATE_SET ) (int iAdapterIndex, int iGlSyncConnector, ADLGlSyncPortControl glSyncPortControl);
-typedef int ( *ADL_WORKSTATION_DISPLAYGLSYNCMODE_GET ) (int iAdapterIndex, int iDisplayIndex, ADLGlSyncMode *lpGlSyncMode);
-typedef int ( *ADL_WORKSTATION_DISPLAYGLSYNCMODE_SET ) (int iAdapterIndex, int iDisplayIndex, ADLGlSyncMode glSyncMode);
-typedef int ( *ADL_WORKSTATION_GLSYNCSUPPORTEDTOPOLOGY_GET ) (int iAdapterIndex, int iNumSyncModes, ADLGlSyncMode2 *glSyncModes, int *iNumSugSyncModes, ADLGlSyncMode2 **glSugSyncModes);
-typedef int ( *ADL_WORKSTATION_LOADBALANCING_GET ) (int *lpResultMask, int *lpCurResultValue, int *lpDefResultValue);
-typedef int ( *ADL_WORKSTATION_LOADBALANCING_SET ) (int iCurState);
-typedef int ( *ADL_WORKSTATION_LOADBALANCING_CAPS ) (int iAdapterIndex, int *lpResultMask, int *lpResultValue);
-
-// ------------------------------------------------------------------------------------------------------------
-
-#ifdef LINUX
-// ADL Linux
-typedef int ( *ADL_ADAPTER_MEMORYINFO_GET ) (int iAdapterIndex, ADLMemoryInfo *lpMemoryInfo);
-typedef int ( *ADL_CONTROLLER_COLOR_SET ) (int iAdapterIndex, int iControllerIndex, ADLGamma adlGamma);
-typedef int ( *ADL_CONTROLLER_COLOR_GET ) (int iAdapterIndex, int iControllerIndex, ADLGamma *lpGammaCurrent, ADLGamma *lpGammaDefault, ADLGamma *lpGammaMin, ADLGamma *lpGammaMax);
-typedef int ( *ADL_DESKTOPCONFIG_GET ) (int iAdapterIndex, int *lpDesktopConfig);
-typedef int ( *ADL_DESKTOPCONFIG_SET ) (int iAdapterIndex, int iDesktopConfig);
-typedef int ( *ADL_NUMBEROFDISPLAYENABLE_GET ) (int iAdapterIndex, int *lpNumberOfDisplays);
-typedef int ( *ADL_DISPLAYENABLE_SET ) (int iAdapterIndex, int *lpDisplayIndexList, int iDisplayListSize, int bPersistOnly);
-typedef int ( *ADL_DISPLAY_IDENTIFYDISPLAY ) (int iAdapterIndex, int iDisplayIndex, int iDisplayControllerIndex, int iShow, int iDisplayNum, int iPosX, int iPosY);
-typedef int ( *ADL_DISPLAY_LUTCOLOR_SET ) (int iAdapterIndex, int iDisplayIndex, ADLGamma adlGamma);
-typedef int ( *ADL_DISPLAY_LUTCOLOR_GET ) (int iAdapterIndex, int iDisplayIndex, ADLGamma *lpGammaCurrent, ADLGamma *lpGammaDefault, ADLGamma *lpGammaMin, ADLGamma *lpGammaMax);
-typedef int ( *ADL_ADAPTER_XSCREENINFO_GET ) (LPXScreenInfo lpXScreenInfo, int iInputSize);
-typedef int ( *ADL_DISPLAY_XRANDRDISPLAYNAME_GET ) (int iAdapterIndex, int iDisplayIndex, char *lpXrandrDisplayName, int iBuffSize);
-#endif
-// ------------------------------------------------------------------------------------------------------------
-
-
-// experimental undocumented
-typedef int ( *ADL_OVERDRIVE5_POWERCONTROL_GET ) (int iAdapterIndex, int* iPercentage, int* whatever);
-typedef int ( *ADL_OVERDRIVE5_POWERCONTROL_SET ) (int iAdapterIndex, int iPercentage);
-//typedef int ( *ADL_OVERDRIVE5_POWERCONTROL_CAPS ) (int iAdapterIndex, int* lpCaps, int* lpValid);
-//typedef int ( *ADL_OVERDRIVE5_POWERCONTROLINFO_GET) (int iAdapterIndex, ...)

+ 38 - 344
cgminer.c

@@ -53,11 +53,7 @@ char *curly = ":D";
 
 
 #include "compat.h"
 #include "compat.h"
 #include "miner.h"
 #include "miner.h"
-#include "findnonce.h"
-#include "adl.h"
-#include "driver-opencl.h"
 #include "bench_block.h"
 #include "bench_block.h"
-#include "scrypt.h"
 #ifdef USE_USBUTILS
 #ifdef USE_USBUTILS
 #include "usbutils.h"
 #include "usbutils.h"
 #endif
 #endif
@@ -113,17 +109,9 @@ static const bool opt_time = true;
 unsigned long long global_hashrate;
 unsigned long long global_hashrate;
 unsigned long global_quota_gcd = 1;
 unsigned long global_quota_gcd = 1;
 
 
-#if defined(HAVE_OPENCL) || defined(USE_USBUTILS)
+#if defined(USE_USBUTILS)
 int nDevs;
 int nDevs;
 #endif
 #endif
-#ifdef HAVE_OPENCL
-int opt_dynamic_interval = 7;
-int opt_g_threads = -1;
-int gpu_threads;
-#ifdef USE_SCRYPT
-bool opt_scrypt;
-#endif
-#endif
 bool opt_restart = true;
 bool opt_restart = true;
 bool opt_nogpu;
 bool opt_nogpu;
 
 
@@ -136,7 +124,6 @@ int total_devices;
 int zombie_devs;
 int zombie_devs;
 static int most_devices;
 static int most_devices;
 struct cgpu_info **devices;
 struct cgpu_info **devices;
-bool have_opencl;
 int mining_threads;
 int mining_threads;
 int num_processors;
 int num_processors;
 #ifdef HAVE_CURSES
 #ifdef HAVE_CURSES
@@ -1146,14 +1133,6 @@ static struct opt_table opt_config_table[] = {
 	OPT_WITH_ARG("--device|-d",
 	OPT_WITH_ARG("--device|-d",
 		     set_devices, NULL, NULL,
 		     set_devices, NULL, NULL,
 	             "Select device to use, one value, range and/or comma separated (e.g. 0-2,4) default: all"),
 	             "Select device to use, one value, range and/or comma separated (e.g. 0-2,4) default: all"),
-	OPT_WITHOUT_ARG("--disable-gpu|-G",
-			opt_set_bool, &opt_nogpu,
-#ifdef HAVE_OPENCL
-			"Disable GPU mining even if suitable devices exist"
-#else
-			opt_hidden
-#endif
-	),
 	OPT_WITHOUT_ARG("--disable-rejecting",
 	OPT_WITHOUT_ARG("--disable-rejecting",
 			opt_set_bool, &opt_disable_pool,
 			opt_set_bool, &opt_disable_pool,
 			"Automatically disable pools that continually reject shares"),
 			"Automatically disable pools that continually reject shares"),
@@ -1166,59 +1145,6 @@ static struct opt_table opt_config_table[] = {
 	OPT_WITHOUT_ARG("--fix-protocol",
 	OPT_WITHOUT_ARG("--fix-protocol",
 			opt_set_bool, &opt_fix_protocol,
 			opt_set_bool, &opt_fix_protocol,
 			"Do not redirect to a different getwork protocol (eg. stratum)"),
 			"Do not redirect to a different getwork protocol (eg. stratum)"),
-#ifdef HAVE_OPENCL
-	OPT_WITH_ARG("--gpu-dyninterval",
-		     set_int_1_to_65535, opt_show_intval, &opt_dynamic_interval,
-		     "Set the refresh interval in ms for GPUs using dynamic intensity"),
-	OPT_WITH_ARG("--gpu-platform",
-		     set_int_0_to_9999, opt_show_intval, &opt_platform_id,
-		     "Select OpenCL platform ID to use for GPU mining"),
-	OPT_WITH_ARG("--gpu-threads|-g",
-		     set_int_1_to_10, opt_show_intval, &opt_g_threads,
-		     "Number of threads per GPU (1 - 10)"),
-#ifdef HAVE_ADL
-	OPT_WITH_ARG("--gpu-engine",
-		     set_gpu_engine, NULL, NULL,
-		     "GPU engine (over)clock range in Mhz - one value, range and/or comma separated list (e.g. 850-900,900,750-850)"),
-	OPT_WITH_ARG("--gpu-fan",
-		     set_gpu_fan, NULL, NULL,
-		     "GPU fan percentage range - one value, range and/or comma separated list (e.g. 0-85,85,65)"),
-	OPT_WITH_ARG("--gpu-map",
-		     set_gpu_map, NULL, NULL,
-		     "Map OpenCL to ADL device order manually, paired CSV (e.g. 1:0,2:1 maps OpenCL 1 to ADL 0, 2 to 1)"),
-	OPT_WITH_ARG("--gpu-memclock",
-		     set_gpu_memclock, NULL, NULL,
-		     "Set the GPU memory (over)clock in Mhz - one value for all or separate by commas for per card"),
-	OPT_WITH_ARG("--gpu-memdiff",
-		     set_gpu_memdiff, NULL, NULL,
-		     "Set a fixed difference in clock speed between the GPU and memory in auto-gpu mode"),
-	OPT_WITH_ARG("--gpu-powertune",
-		     set_gpu_powertune, NULL, NULL,
-		     "Set the GPU powertune percentage - one value for all or separate by commas for per card"),
-	OPT_WITHOUT_ARG("--gpu-reorder",
-			opt_set_bool, &opt_reorder,
-			"Attempt to reorder GPU devices according to PCI Bus ID"),
-	OPT_WITH_ARG("--gpu-vddc",
-		     set_gpu_vddc, NULL, NULL,
-		     "Set the GPU voltage in Volts - one value for all or separate by commas for per card"),
-#endif
-#ifdef USE_SCRYPT
-	OPT_WITH_ARG("--lookup-gap",
-		     set_lookup_gap, NULL, NULL,
-		     "Set GPU lookup gap for scrypt mining, comma separated"),
-	OPT_WITH_ARG("--intensity|-I",
-		     set_intensity, NULL, NULL,
-		     "Intensity of GPU scanning (d or " MIN_SHA_INTENSITY_STR
-		     " -> " MAX_SCRYPT_INTENSITY_STR
-		     ",default: d to maintain desktop interactivity)"),
-#else
-	OPT_WITH_ARG("--intensity|-I",
-		     set_intensity, NULL, NULL,
-		     "Intensity of GPU scanning (d or " MIN_SHA_INTENSITY_STR
-		     " -> " MAX_SHA_INTENSITY_STR
-		     ",default: d to maintain desktop interactivity)"),
-#endif
-#endif
 	OPT_WITH_ARG("--hotplug",
 	OPT_WITH_ARG("--hotplug",
 		     set_int_0_to_9999, NULL, &hotplug_time,
 		     set_int_0_to_9999, NULL, &hotplug_time,
 #ifdef USE_USBUTILS
 #ifdef USE_USBUTILS
@@ -1227,15 +1153,10 @@ static struct opt_table opt_config_table[] = {
 		     opt_hidden
 		     opt_hidden
 #endif
 #endif
 		    ),
 		    ),
-#if defined(HAVE_OPENCL) || defined(HAVE_MODMINER)
+#if defined(HAVE_MODMINER)
 	OPT_WITH_ARG("--kernel-path|-K",
 	OPT_WITH_ARG("--kernel-path|-K",
 		     opt_set_charp, opt_show_charp, &opt_kernel_path,
 		     opt_set_charp, opt_show_charp, &opt_kernel_path,
-	             "Specify a path to where bitstream and kernel files are"),
-#endif
-#ifdef HAVE_OPENCL
-	OPT_WITH_ARG("--kernel|-k",
-		     set_kernel, NULL, NULL,
-		     "Override sha256 kernel to use (diablo, poclbm, phatk or diakgcn) - one value or comma separated"),
+	             "Specify a path to where bitstream files are"),
 #endif
 #endif
 #ifdef USE_ICARUS
 #ifdef USE_ICARUS
 	OPT_WITH_ARG("--icarus-options",
 	OPT_WITH_ARG("--icarus-options",
@@ -1307,14 +1228,6 @@ static struct opt_table opt_config_table[] = {
 	OPT_WITHOUT_ARG("--no-pool-disable",
 	OPT_WITHOUT_ARG("--no-pool-disable",
 			opt_set_invbool, &opt_disable_pool,
 			opt_set_invbool, &opt_disable_pool,
 			opt_hidden),
 			opt_hidden),
-	OPT_WITHOUT_ARG("--no-restart",
-			opt_set_invbool, &opt_restart,
-#ifdef HAVE_OPENCL
-			"Do not attempt to restart GPUs that hang"
-#else
-			opt_hidden
-#endif
-	),
 	OPT_WITHOUT_ARG("--no-submit-stale",
 	OPT_WITHOUT_ARG("--no-submit-stale",
 			opt_set_invbool, &opt_submit_stale,
 			opt_set_invbool, &opt_submit_stale,
 		        "Don't submit shares if they are detected as stale"),
 		        "Don't submit shares if they are detected as stale"),
@@ -1368,14 +1281,6 @@ static struct opt_table opt_config_table[] = {
 	OPT_WITH_ARG("--sched-stop",
 	OPT_WITH_ARG("--sched-stop",
 		     set_schedtime, NULL, &schedstop,
 		     set_schedtime, NULL, &schedstop,
 		     "Set a time of day in HH:MM to stop mining (will quit without a start time)"),
 		     "Set a time of day in HH:MM to stop mining (will quit without a start time)"),
-#ifdef USE_SCRYPT
-	OPT_WITHOUT_ARG("--scrypt",
-			opt_set_bool, &opt_scrypt,
-			"Use the scrypt algorithm for mining (litecoin only)"),
-	OPT_WITH_ARG("--shaders",
-		     set_shaders, NULL, NULL,
-		     "GPU shaders per card for tuning scrypt, comma separated"),
-#endif
 	OPT_WITH_ARG("--sharelog",
 	OPT_WITH_ARG("--sharelog",
 		     set_sharelog, NULL, NULL,
 		     set_sharelog, NULL, NULL,
 		     "Append share log to file"),
 		     "Append share log to file"),
@@ -1435,20 +1340,10 @@ static struct opt_table opt_config_table[] = {
 	OPT_WITHOUT_ARG("--usb-list-all",
 	OPT_WITHOUT_ARG("--usb-list-all",
 			opt_set_bool, &opt_usb_list_all,
 			opt_set_bool, &opt_usb_list_all,
 			opt_hidden),
 			opt_hidden),
-#endif
-#ifdef HAVE_OPENCL
-	OPT_WITH_ARG("--vectors|-v",
-		     set_vector, NULL, NULL,
-		     "Override detected optimal vector (1, 2 or 4) - one value or comma separated list"),
 #endif
 #endif
 	OPT_WITHOUT_ARG("--verbose",
 	OPT_WITHOUT_ARG("--verbose",
 			opt_set_bool, &opt_log_output,
 			opt_set_bool, &opt_log_output,
 			"Log verbose output to stderr as well as status output"),
 			"Log verbose output to stderr as well as status output"),
-#ifdef HAVE_OPENCL
-	OPT_WITH_ARG("--worksize|-w",
-		     set_worksize, NULL, NULL,
-		     "Override detected optimal worksize - one value or comma separated list"),
-#endif
 	OPT_WITH_ARG("--userpass|-O",
 	OPT_WITH_ARG("--userpass|-O",
 		     set_userpass, NULL, NULL,
 		     set_userpass, NULL, NULL,
 		     "Username:Password pair for bitcoin JSON-RPC server"),
 		     "Username:Password pair for bitcoin JSON-RPC server"),
@@ -1613,9 +1508,6 @@ static char *opt_verusage_and_exit(const char *extra)
 #ifdef USE_BITFURY
 #ifdef USE_BITFURY
 		"bitfury "
 		"bitfury "
 #endif
 #endif
-#ifdef HAVE_OPENCL
-		"GPU "
-#endif
 #ifdef USE_HASHFAST
 #ifdef USE_HASHFAST
 		"hashfast "
 		"hashfast "
 #endif
 #endif
@@ -1630,9 +1522,6 @@ static char *opt_verusage_and_exit(const char *extra)
 #endif
 #endif
 #ifdef USE_MODMINER
 #ifdef USE_MODMINER
 		"modminer "
 		"modminer "
-#endif
-#ifdef USE_SCRYPT
-		"scrypt "
 #endif
 #endif
 		"mining support.\n"
 		"mining support.\n"
 		, packagename);
 		, packagename);
@@ -1641,16 +1530,11 @@ static char *opt_verusage_and_exit(const char *extra)
 	exit(0);
 	exit(0);
 }
 }
 
 
-#if defined(HAVE_OPENCL) || defined(USE_USBUTILS)
+#if defined(USE_USBUTILS)
 char *display_devs(int *ndevs)
 char *display_devs(int *ndevs)
 {
 {
 	*ndevs = 0;
 	*ndevs = 0;
-#ifdef HAVE_OPENCL
-	print_ndevs(ndevs);
-#endif
-#ifdef USE_USBUTILS
 	usb_all(0);
 	usb_all(0);
-#endif
 	exit(*ndevs);
 	exit(*ndevs);
 }
 }
 #endif
 #endif
@@ -1668,17 +1552,10 @@ static struct opt_table opt_cmdline_table[] = {
 	OPT_WITHOUT_ARG("--help|-h",
 	OPT_WITHOUT_ARG("--help|-h",
 			opt_verusage_and_exit, NULL,
 			opt_verusage_and_exit, NULL,
 			"Print this message"),
 			"Print this message"),
-#if defined(HAVE_OPENCL) || defined(USE_USBUTILS)
+#if defined(USE_USBUTILS)
 	OPT_WITHOUT_ARG("--ndevs|-n",
 	OPT_WITHOUT_ARG("--ndevs|-n",
 			display_devs, &nDevs,
 			display_devs, &nDevs,
-			"Display "
-#ifdef HAVE_OPENCL
-			"number of detected GPUs, OpenCL platform information, "
-#endif
-#ifdef USE_USBUTILS
-			"all USB devices, "
-#endif
-			"and exit"),
+			"Display all USB devices and exit"),
 #endif
 #endif
 	OPT_WITHOUT_ARG("--version|-V",
 	OPT_WITHOUT_ARG("--version|-V",
 			opt_version_and_exit, packagename,
 			opt_version_and_exit, packagename,
@@ -2143,9 +2020,6 @@ static int devcursor, logstart, logcursor;
 /* statusy is where the status window goes up to in cases where it won't fit at startup */
 /* statusy is where the status window goes up to in cases where it won't fit at startup */
 static int statusy;
 static int statusy;
 #endif
 #endif
-#ifdef HAVE_OPENCL
-struct cgpu_info gpus[MAX_GPUDEVICES]; /* Maximum number apparently possible */
-#endif
 
 
 #ifdef HAVE_CURSES
 #ifdef HAVE_CURSES
 static inline void unlock_curses(void)
 static inline void unlock_curses(void)
@@ -2313,8 +2187,7 @@ static void curses_print_status(void)
 		     prev_block, block_diff, blocktime, best_share);
 		     prev_block, block_diff, blocktime, best_share);
 	mvwhline(statuswin, 6, 0, '-', 80);
 	mvwhline(statuswin, 6, 0, '-', 80);
 	mvwhline(statuswin, statusy - 1, 0, '-', 80);
 	mvwhline(statuswin, statusy - 1, 0, '-', 80);
-	cg_mvwprintw(statuswin, devcursor - 1, 1, "[P]ool management %s[S]ettings [D]isplay options [Q]uit",
-		have_opencl ? "[G]PU management " : "");
+	cg_mvwprintw(statuswin, devcursor - 1, 1, "[P]ool management [S]ettings [D]isplay options [Q]uit");
 }
 }
 
 
 static void adj_width(int var, int *length)
 static void adj_width(int var, int *length)
@@ -2868,8 +2741,8 @@ static bool submit_upstream_work(struct work *work, CURL *curl, bool resubmit)
 
 
 			snprintf(worktime, sizeof(worktime),
 			snprintf(worktime, sizeof(worktime),
 				" <-%08lx.%08lx M:%c D:%1.*f G:%02d:%02d:%02d:%1.3f %s (%1.3f) W:%1.3f (%1.3f) S:%1.3f R:%02d:%02d:%02d",
 				" <-%08lx.%08lx M:%c D:%1.*f G:%02d:%02d:%02d:%1.3f %s (%1.3f) W:%1.3f (%1.3f) S:%1.3f R:%02d:%02d:%02d",
-				(unsigned long)be32toh(*(uint32_t *)&(work->data[opt_scrypt ? 32 : 28])),
-				(unsigned long)be32toh(*(uint32_t *)&(work->data[opt_scrypt ? 28 : 24])),
+				(unsigned long)be32toh(*(uint32_t *)&(work->data[28])),
+				(unsigned long)be32toh(*(uint32_t *)&(work->data[24])),
 				work->getwork_mode, diffplaces, work->work_difficulty,
 				work->getwork_mode, diffplaces, work->work_difficulty,
 				tm_getwork.tm_hour, tm_getwork.tm_min,
 				tm_getwork.tm_hour, tm_getwork.tm_min,
 				tm_getwork.tm_sec, getwork_time, workclone,
 				tm_getwork.tm_sec, getwork_time, workclone,
@@ -3127,8 +3000,6 @@ static void calc_diff(struct work *work, double known)
 		double d64, dcut64;
 		double d64, dcut64;
 
 
 		d64 = truediffone;
 		d64 = truediffone;
-		if (opt_scrypt)
-			d64 *= (double)65536;
 		dcut64 = le256todouble(work->target);
 		dcut64 = le256todouble(work->target);
 		if (unlikely(!dcut64))
 		if (unlikely(!dcut64))
 			dcut64 = 1;
 			dcut64 = 1;
@@ -3265,11 +3136,9 @@ static void __kill_work(void)
 #ifdef USE_USBUTILS
 #ifdef USE_USBUTILS
 	/* Best to get rid of it first so it doesn't
 	/* Best to get rid of it first so it doesn't
 	 * try to create any new devices */
 	 * try to create any new devices */
-	if (!opt_scrypt) {
-		forcelog(LOG_DEBUG, "Killing off HotPlug thread");
-		thr = &control_thr[hotplug_thr_id];
-		kill_timeout(thr);
-	}
+	forcelog(LOG_DEBUG, "Killing off HotPlug thread");
+	thr = &control_thr[hotplug_thr_id];
+	kill_timeout(thr);
 #endif
 #endif
 
 
 	forcelog(LOG_DEBUG, "Killing off watchpool thread");
 	forcelog(LOG_DEBUG, "Killing off watchpool thread");
@@ -3308,14 +3177,12 @@ static void __kill_work(void)
 #ifdef USE_USBUTILS
 #ifdef USE_USBUTILS
 	/* Release USB resources in case it's a restart
 	/* Release USB resources in case it's a restart
 	 * and not a QUIT */
 	 * and not a QUIT */
-	if (!opt_scrypt) {
-		forcelog(LOG_DEBUG, "Releasing all USB devices");
-		cg_completion_timeout(&usb_cleanup, NULL, 1000);
+	forcelog(LOG_DEBUG, "Releasing all USB devices");
+	cg_completion_timeout(&usb_cleanup, NULL, 1000);
 
 
-		forcelog(LOG_DEBUG, "Killing off usbres thread");
-		thr = &control_thr[usbres_thr_id];
-		kill_timeout(thr);
-	}
+	forcelog(LOG_DEBUG, "Killing off usbres thread");
+	thr = &control_thr[usbres_thr_id];
+	kill_timeout(thr);
 #endif
 #endif
 
 
 }
 }
@@ -3467,7 +3334,7 @@ static void roll_work(struct work *work)
 	*work_ntime = htobe32(ntime);
 	*work_ntime = htobe32(ntime);
 	local_work++;
 	local_work++;
 	work->rolls++;
 	work->rolls++;
-	work->blk.nonce = 0;
+	work->nonce = 0;
 	applog(LOG_DEBUG, "Successfully rolled work");
 	applog(LOG_DEBUG, "Successfully rolled work");
 
 
 	/* This is now a different work item so it needs a different ID for the
 	/* This is now a different work item so it needs a different ID for the
@@ -3759,8 +3626,6 @@ static uint64_t share_diff(const struct work *work)
 	uint64_t ret;
 	uint64_t ret;
 
 
 	d64 = truediffone;
 	d64 = truediffone;
-	if (opt_scrypt)
-		d64 *= (double)65536;
 	s64 = le256todouble(work->hash);
 	s64 = le256todouble(work->hash);
 	if (unlikely(!s64))
 	if (unlikely(!s64))
 		s64 = 0;
 		s64 = 0;
@@ -3795,14 +3660,6 @@ static void regen_hash(struct work *work)
 	sha256(hash1, 32, (unsigned char *)(work->hash));
 	sha256(hash1, 32, (unsigned char *)(work->hash));
 }
 }
 
 
-static void rebuild_hash(struct work *work)
-{
-	if (opt_scrypt)
-		scrypt_regenhash(work);
-	else
-		regen_hash(work);
-}
-
 static bool cnx_needed(struct pool *pool);
 static bool cnx_needed(struct pool *pool);
 
 
 /* Find the pool that currently has the highest priority */
 /* Find the pool that currently has the highest priority */
@@ -4404,94 +4261,6 @@ void write_config(FILE *fcfg)
 		}
 		}
 	fputs("\n]\n", fcfg);
 	fputs("\n]\n", fcfg);
 
 
-#ifdef HAVE_OPENCL
-	if (nDevs) {
-		/* Write GPU device values */
-		fputs(",\n\"intensity\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, gpus[i].dynamic ? "%sd" : "%s%d", i > 0 ? "," : "", gpus[i].intensity);
-		fputs("\",\n\"vectors\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				gpus[i].vwidth);
-		fputs("\",\n\"worksize\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				(int)gpus[i].work_size);
-		fputs("\",\n\"kernel\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++) {
-			fprintf(fcfg, "%s", i > 0 ? "," : "");
-			switch (gpus[i].kernel) {
-				case KL_NONE: // Shouldn't happen
-					break;
-				case KL_POCLBM:
-					fprintf(fcfg, "poclbm");
-					break;
-				case KL_PHATK:
-					fprintf(fcfg, "phatk");
-					break;
-				case KL_DIAKGCN:
-					fprintf(fcfg, "diakgcn");
-					break;
-				case KL_DIABLO:
-					fprintf(fcfg, "diablo");
-					break;
-				case KL_SCRYPT:
-					fprintf(fcfg, "scrypt");
-					break;
-			}
-		}
-#ifdef USE_SCRYPT
-		fputs("\",\n\"lookup-gap\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				(int)gpus[i].opt_lg);
-		fputs("\",\n\"thread-concurrency\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				(int)gpus[i].opt_tc);
-		fputs("\",\n\"shaders\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "",
-				(int)gpus[i].shaders);
-#endif
-#ifdef HAVE_ADL
-		fputs("\",\n\"gpu-engine\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d-%d", i > 0 ? "," : "", gpus[i].min_engine, gpus[i].gpu_engine);
-		fputs("\",\n\"gpu-fan\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d-%d", i > 0 ? "," : "", gpus[i].min_fan, gpus[i].gpu_fan);
-		fputs("\",\n\"gpu-memclock\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].gpu_memclock);
-		fputs("\",\n\"gpu-memdiff\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].gpu_memdiff);
-		fputs("\",\n\"gpu-powertune\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].gpu_powertune);
-		fputs("\",\n\"gpu-vddc\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%1.3f", i > 0 ? "," : "", gpus[i].gpu_vddc);
-		fputs("\",\n\"temp-cutoff\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].cutofftemp);
-		fputs("\",\n\"temp-overheat\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].adl.overtemp);
-		fputs("\",\n\"temp-target\" : \"", fcfg);
-		for(i = 0; i < nDevs; i++)
-			fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].adl.targettemp);
-#endif
-		fputs("\"", fcfg);
-	}
-#endif
-#ifdef HAVE_ADL
-	if (opt_reorder)
-		fprintf(fcfg, ",\n\"gpu-reorder\" : true");
-#endif
-
 	/* Simple bool and int options */
 	/* Simple bool and int options */
 	struct opt_table *opt;
 	struct opt_table *opt;
 	for (opt = opt_config_table; opt->type != OPT_END; opt++) {
 	for (opt = opt_config_table; opt->type != OPT_END; opt++) {
@@ -5050,10 +4819,6 @@ static void *input_thread(void __maybe_unused *userdata)
 			display_pools();
 			display_pools();
 		else if (!strncasecmp(&input, "s", 1))
 		else if (!strncasecmp(&input, "s", 1))
 			set_options();
 			set_options();
-#if HAVE_OPENCL
-		else if (have_opencl && !strncasecmp(&input, "g", 1))
-			manage_gpu();
-#endif
 		if (opt_realquiet) {
 		if (opt_realquiet) {
 			disable_curses();
 			disable_curses();
 			break;
 			break;
@@ -5961,8 +5726,6 @@ void set_target(unsigned char *dest_target, double diff)
 	}
 	}
 
 
 	d64 = truediffone;
 	d64 = truediffone;
-	if (opt_scrypt)
-		d64 *= (double)65536;
 	d64 /= diff;
 	d64 /= diff;
 
 
 	dcut64 = d64 / bits192;
 	dcut64 = d64 / bits192;
@@ -6065,7 +5828,7 @@ static void gen_stratum_work(struct pool *pool, struct work *work)
 	local_work++;
 	local_work++;
 	work->pool = pool;
 	work->pool = pool;
 	work->stratum = true;
 	work->stratum = true;
-	work->blk.nonce = 0;
+	work->nonce = 0;
 	work->id = total_work++;
 	work->id = total_work++;
 	work->longpoll = false;
 	work->longpoll = false;
 	work->getwork_mode = GETWORK_MODE_STRATUM;
 	work->getwork_mode = GETWORK_MODE_STRATUM;
@@ -6163,19 +5926,16 @@ static void rebuild_nonce(struct work *work, uint32_t nonce)
 
 
 	*work_nonce = htole32(nonce);
 	*work_nonce = htole32(nonce);
 
 
-	rebuild_hash(work);
+	regen_hash(work);
 }
 }
 
 
 /* For testing a nonce against diff 1 */
 /* For testing a nonce against diff 1 */
 bool test_nonce(struct work *work, uint32_t nonce)
 bool test_nonce(struct work *work, uint32_t nonce)
 {
 {
 	uint32_t *hash_32 = (uint32_t *)(work->hash + 28);
 	uint32_t *hash_32 = (uint32_t *)(work->hash + 28);
-	uint32_t diff1targ;
 
 
 	rebuild_nonce(work, nonce);
 	rebuild_nonce(work, nonce);
-	diff1targ = opt_scrypt ? 0x0000ffffUL : 0;
-
-	return (le32toh(*hash_32) <= diff1targ);
+	return (*hash_32 == 0);
 }
 }
 
 
 /* For testing a nonce against an arbitrary diff */
 /* For testing a nonce against an arbitrary diff */
@@ -6184,7 +5944,7 @@ bool test_nonce_diff(struct work *work, uint32_t nonce, double diff)
 	uint64_t *hash64 = (uint64_t *)(work->hash + 24), diff64;
 	uint64_t *hash64 = (uint64_t *)(work->hash + 24), diff64;
 
 
 	rebuild_nonce(work, nonce);
 	rebuild_nonce(work, nonce);
-	diff64 = opt_scrypt ? 0x0000ffff00000000ULL : 0x00000000ffff0000ULL;
+	diff64 = 0x00000000ffff0000ULL;
 	diff64 /= diff;
 	diff64 /= diff;
 
 
 	return (le64toh(*hash64) <= diff64);
 	return (le64toh(*hash64) <= diff64);
@@ -6196,9 +5956,6 @@ static void update_work_stats(struct thr_info *thr, struct work *work)
 
 
 	work->share_diff = share_diff(work);
 	work->share_diff = share_diff(work);
 
 
-	if (opt_scrypt)
-		test_diff *= 65536;
-
 	if (unlikely(work->share_diff >= test_diff)) {
 	if (unlikely(work->share_diff >= test_diff)) {
 		work->block = true;
 		work->block = true;
 		work->pool->solved++;
 		work->pool->solved++;
@@ -6274,9 +6031,7 @@ out:
 
 
 static inline bool abandon_work(struct work *work, struct timeval *wdiff, uint64_t hashes)
 static inline bool abandon_work(struct work *work, struct timeval *wdiff, uint64_t hashes)
 {
 {
-	if (wdiff->tv_sec > opt_scantime ||
-	    work->blk.nonce >= MAXTHREADS - hashes ||
-	    hashes >= 0xfffffffe ||
+	if (wdiff->tv_sec > opt_scantime || hashes >= 0xfffffffe ||
 	    stale_work(work, false))
 	    stale_work(work, false))
 		return true;
 		return true;
 	return false;
 	return false;
@@ -6324,7 +6079,7 @@ static void hash_sole_work(struct thr_info *mythr)
 		cgpu->new_work = true;
 		cgpu->new_work = true;
 
 
 		cgtime(&tv_workstart);
 		cgtime(&tv_workstart);
-		work->blk.nonce = 0;
+		work->nonce = 0;
 		cgpu->max_hashes = 0;
 		cgpu->max_hashes = 0;
 		if (!drv->prepare_work(mythr, work)) {
 		if (!drv->prepare_work(mythr, work)) {
 			applog(LOG_ERR, "work prepare failed, exiting "
 			applog(LOG_ERR, "work prepare failed, exiting "
@@ -6332,25 +6087,6 @@ static void hash_sole_work(struct thr_info *mythr)
 			break;
 			break;
 		}
 		}
 		work->device_diff = MIN(drv->working_diff, work->work_difficulty);
 		work->device_diff = MIN(drv->working_diff, work->work_difficulty);
-#ifdef USE_SCRYPT
-		/* Dynamically adjust the working diff even if the target
-		 * diff is very high to ensure we can still validate scrypt is
-		 * returning shares. */
-		if (opt_scrypt) {
-			double wu;
-
-			wu = total_diff1 / total_secs * 60;
-			if (wu > 30 && drv->working_diff < drv->max_diff &&
-			    drv->working_diff < work->work_difficulty) {
-				drv->working_diff++;
-				applog(LOG_DEBUG, "Driver %s working diff changed to %.0f",
-					drv->dname, drv->working_diff);
-				work->device_diff = MIN(drv->working_diff, work->work_difficulty);
-			} else if (drv->working_diff > work->work_difficulty)
-				drv->working_diff = work->work_difficulty;
-			set_target(work->device_target, work->device_diff);
-		}
-#endif
 
 
 		do {
 		do {
 			cgtime(&tv_start);
 			cgtime(&tv_start);
@@ -6380,7 +6116,7 @@ static void hash_sole_work(struct thr_info *mythr)
 			pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
 			pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
 
 
 			thread_reportin(mythr);
 			thread_reportin(mythr);
-			hashes = drv->scanhash(mythr, work, work->blk.nonce + max_nonce);
+			hashes = drv->scanhash(mythr, work, work->nonce + max_nonce);
 			thread_reportout(mythr);
 			thread_reportout(mythr);
 
 
 			pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
 			pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
@@ -7362,9 +7098,6 @@ void print_summary(void)
 
 
 static void clean_up(bool restarting)
 static void clean_up(bool restarting)
 {
 {
-#ifdef HAVE_OPENCL
-	clear_adl(nDevs);
-#endif
 #ifdef USE_USBUTILS
 #ifdef USE_USBUTILS
 	usb_polling = false;
 	usb_polling = false;
 	pthread_join(usb_poll_thread, NULL);
 	pthread_join(usb_poll_thread, NULL);
@@ -7753,11 +7486,6 @@ void enable_device(struct cgpu_info *cgpu)
 		adj_width(mining_threads, &dev_width);
 		adj_width(mining_threads, &dev_width);
 #endif
 #endif
 	}
 	}
-#ifdef HAVE_OPENCL
-	if (cgpu->drv->drv_id == DRIVER_opencl) {
-		gpu_threads += cgpu->threads;
-	}
-#endif
 	rwlock_init(&cgpu->qlock);
 	rwlock_init(&cgpu->qlock);
 	cgpu->queued_work = NULL;
 	cgpu->queued_work = NULL;
 }
 }
@@ -8062,12 +7790,6 @@ int main(int argc, char *argv[])
 
 
 	INIT_LIST_HEAD(&scan_devices);
 	INIT_LIST_HEAD(&scan_devices);
 
 
-#ifdef HAVE_OPENCL
-	memset(gpus, 0, sizeof(gpus));
-	for (i = 0; i < MAX_GPUDEVICES; i++)
-		gpus[i].dynamic = true;
-#endif
-
 	/* parse command line */
 	/* parse command line */
 	opt_register_table(opt_config_table,
 	opt_register_table(opt_config_table,
 			   "Options for both config file and command line");
 			   "Options for both config file and command line");
@@ -8084,8 +7806,6 @@ int main(int argc, char *argv[])
 	if (opt_benchmark) {
 	if (opt_benchmark) {
 		struct pool *pool;
 		struct pool *pool;
 
 
-		if (opt_scrypt)
-			quit(1, "Cannot use benchmark mode with scrypt");
 		pool = add_pool();
 		pool = add_pool();
 		pool->rpc_url = malloc(255);
 		pool->rpc_url = malloc(255);
 		strcpy(pool->rpc_url, "Benchmark");
 		strcpy(pool->rpc_url, "Benchmark");
@@ -8129,9 +7849,8 @@ int main(int argc, char *argv[])
 	if (want_per_device_stats)
 	if (want_per_device_stats)
 		opt_log_output = true;
 		opt_log_output = true;
 
 
-	/* Use a shorter scantime for scrypt */
 	if (opt_scantime < 0)
 	if (opt_scantime < 0)
-		opt_scantime = opt_scrypt ? 30 : 60;
+		opt_scantime = 60;
 
 
 	total_control_threads = 8;
 	total_control_threads = 8;
 	control_thr = calloc(total_control_threads, sizeof(*thr));
 	control_thr = calloc(total_control_threads, sizeof(*thr));
@@ -8144,25 +7863,19 @@ int main(int argc, char *argv[])
 	usb_initialise();
 	usb_initialise();
 
 
 	// before device detection
 	// before device detection
-	if (!opt_scrypt) {
-		cgsem_init(&usb_resource_sem);
-		usbres_thr_id = 1;
-		thr = &control_thr[usbres_thr_id];
-		if (thr_info_create(thr, NULL, usb_resource_thread, thr))
-			quit(1, "usb resource thread create failed");
-		pthread_detach(thr->pth);
-	}
+	cgsem_init(&usb_resource_sem);
+	usbres_thr_id = 1;
+	thr = &control_thr[usbres_thr_id];
+	if (thr_info_create(thr, NULL, usb_resource_thread, thr))
+		quit(1, "usb resource thread create failed");
+	pthread_detach(thr->pth);
 #endif
 #endif
 
 
 	/* Use the DRIVER_PARSE_COMMANDS macro to fill all the device_drvs */
 	/* Use the DRIVER_PARSE_COMMANDS macro to fill all the device_drvs */
 	DRIVER_PARSE_COMMANDS(DRIVER_FILL_DEVICE_DRV)
 	DRIVER_PARSE_COMMANDS(DRIVER_FILL_DEVICE_DRV)
 
 
-	if (opt_scrypt)
-		opencl_drv.drv_detect(false);
-	else {
 	/* Use the DRIVER_PARSE_COMMANDS macro to detect all devices */
 	/* Use the DRIVER_PARSE_COMMANDS macro to detect all devices */
-		DRIVER_PARSE_COMMANDS(DRIVER_DRV_DETECT_ALL)
-	}
+	DRIVER_PARSE_COMMANDS(DRIVER_DRV_DETECT_ALL)
 
 
 	if (opt_display_devs) {
 	if (opt_display_devs) {
 		applog(LOG_ERR, "Devices detected:");
 		applog(LOG_ERR, "Devices detected:");
@@ -8362,12 +8075,6 @@ begin_bench:
 		}
 		}
 	}
 	}
 
 
-#ifdef HAVE_OPENCL
-	applog(LOG_INFO, "%d gpu miner threads started", gpu_threads);
-	for (i = 0; i < nDevs; i++)
-		pause_dynamic_threads(i);
-#endif
-
 	cgtime(&total_tv_start);
 	cgtime(&total_tv_start);
 	cgtime(&total_tv_end);
 	cgtime(&total_tv_end);
 
 
@@ -8385,17 +8092,6 @@ begin_bench:
 		quit(1, "watchdog thread create failed");
 		quit(1, "watchdog thread create failed");
 	pthread_detach(thr->pth);
 	pthread_detach(thr->pth);
 
 
-#ifdef HAVE_OPENCL
-	/* Create reinit gpu thread */
-	gpur_thr_id = 4;
-	thr = &control_thr[gpur_thr_id];
-	thr->q = tq_new();
-	if (!thr->q)
-		quit(1, "tq_new failed for gpur_thr_id");
-	if (thr_info_create(thr, NULL, reinit_gpu, thr))
-		quit(1, "reinit_gpu thread create failed");
-#endif	
-
 	/* Create API socket thread */
 	/* Create API socket thread */
 	api_thr_id = 5;
 	api_thr_id = 5;
 	thr = &control_thr[api_thr_id];
 	thr = &control_thr[api_thr_id];
@@ -8403,13 +8099,11 @@ begin_bench:
 		quit(1, "API thread create failed");
 		quit(1, "API thread create failed");
 
 
 #ifdef USE_USBUTILS
 #ifdef USE_USBUTILS
-	if (!opt_scrypt) {
-		hotplug_thr_id = 6;
-		thr = &control_thr[hotplug_thr_id];
-		if (thr_info_create(thr, NULL, hotplug_thread, thr))
-			quit(1, "hotplug thread create failed");
-		pthread_detach(thr->pth);
-	}
+	hotplug_thr_id = 6;
+	thr = &control_thr[hotplug_thr_id];
+	if (thr_info_create(thr, NULL, hotplug_thread, thr))
+		quit(1, "hotplug thread create failed");
+	pthread_detach(thr->pth);
 #endif
 #endif
 
 
 #ifdef HAVE_CURSES
 #ifdef HAVE_CURSES

+ 1 - 132
configure.ac

@@ -63,7 +63,6 @@ AC_FUNC_ALLOCA
 have_win32=false
 have_win32=false
 PTHREAD_FLAGS="-lpthread"
 PTHREAD_FLAGS="-lpthread"
 DLOPEN_FLAGS="-ldl"
 DLOPEN_FLAGS="-ldl"
-OPENCL_LIBS="-lOpenCL"
 WS2_LIBS=""
 WS2_LIBS=""
 MM_LIBS=""
 MM_LIBS=""
 MATH_LIBS="-lm"
 MATH_LIBS="-lm"
@@ -97,37 +96,16 @@ case $target in
   powerpc-*-darwin*)
   powerpc-*-darwin*)
     have_darwin=true
     have_darwin=true
     CFLAGS="$CFLAGS -faltivec"
     CFLAGS="$CFLAGS -faltivec"
-    OPENCL_LIBS=""
     PTHREAD_FLAGS=""
     PTHREAD_FLAGS=""
     RT_LIBS=""
     RT_LIBS=""
     ;;
     ;;
   *-*-darwin*)
   *-*-darwin*)
     have_darwin=true
     have_darwin=true
-    OPENCL_LIBS="-framework OpenCL"
     PTHREAD_FLAGS=""
     PTHREAD_FLAGS=""
     RT_LIBS=""
     RT_LIBS=""
 	;;
 	;;
 esac
 esac
 
 
-
-if test "x$have_win32" != xtrue; then
-	if test "x$have_x86_64" = xtrue; then
-		ARCH_DIR=x86_64
-	else
-		ARCH_DIR=x86
-	fi
-
-	if test "x$ATISTREAMSDKROOT" != x; then
-		OPENCL_FLAGS="-I$ATISTREAMSDKROOT/include $OPENCL_FLAGS"
-		OPENCL_LIBS="-L$ATISTREAMSDKROOT/lib/$ARCH_DIR $OPENCL_LIBS"
-	fi
-
-	if test "x$AMDAPPSDKROOT" != x; then
-		OPENCL_FLAGS="-I$AMDAPPSDKROOT/include $OPENCL_FLAGS"
-		OPENCL_LIBS="-L$AMDAPPSDKROOT/lib/$ARCH_DIR $OPENCL_LIBS"
-	fi
-fi
-
 have_cgminer_sdk=false
 have_cgminer_sdk=false
 if test -n "$CGMINER_SDK"; then
 if test -n "$CGMINER_SDK"; then
 	have_cgminer_sdk=true
 	have_cgminer_sdk=true
@@ -135,44 +113,6 @@ if test -n "$CGMINER_SDK"; then
 	LDFLAGS="-L$CGMINER_SDK/lib/$target $LDFLAGS"
 	LDFLAGS="-L$CGMINER_SDK/lib/$target $LDFLAGS"
 fi
 fi
 
 
-opencl="no"
-
-AC_ARG_ENABLE([opencl],
-	[AC_HELP_STRING([--enable-opencl],[Enable support for GPU mining with opencl])],
-	[opencl=$enableval]
-	)
-if test "x$opencl" != xno; then
-	# Check for OpenCL (the long way needed on mingw32 due to calling conventions)
-	AC_MSG_CHECKING([for OpenCL])
-	SAVED_LIBS=$LIBS
-	SAVED_CFLAGS=$CFLAGS
-	LIBS="$LIBS $OPENCL_LIBS"
-	CFLAGS="$CFLAGS $OPENCL_FLAGS"
-	AC_LINK_IFELSE(
-	[AC_LANG_PROGRAM([[
-		#ifdef __APPLE_CC__
-		#include <OpenCL/opencl.h>
-		#else
-		#include <CL/cl.h>
-		#endif
-	]],
-	[[return clSetKernelArg(0, 0, 0, 0); ]])],
-	[AC_MSG_RESULT(yes)
-	AC_DEFINE([HAVE_OPENCL], [1], [Defined to 1 if OpenCL is present on the system.])
-	found_opencl=1
-	],
-	[AC_MSG_RESULT(no)
-	OPENCL_FLAGS=
-	OPENCL_LIBS=
-	found_opencl=0])
-	LIBS=$SAVED_LIBS
-	CFLAGS=$SAVED_CFLAGS
-else
-	OPENCL_FLAGS=""
-	OPENCL_LIBS=""
-fi
-AM_CONDITIONAL([HAS_OPENCL], [test x$opencl = xyes])
-
 has_winpthread=false
 has_winpthread=false
 if test "x$have_win32" = xtrue; then
 if test "x$have_win32" = xtrue; then
         has_winpthread=true
         has_winpthread=true
@@ -186,41 +126,6 @@ if test "x$has_winpthread" != xtrue; then
         PTHREAD_LIBS=-lpthread
         PTHREAD_LIBS=-lpthread
 fi
 fi
 
 
-AC_ARG_ENABLE([adl],
-	[AC_HELP_STRING([--disable-adl],[Override detection and disable building with adl])],
-	[adl=$enableval]
-	)
-
-scrypt="no"
-
-if test "$found_opencl" = 1; then
-	if test "x$adl" != xno; then
-		ADL_CPPFLAGS=
-		AC_CHECK_FILE([$srcdir/ADL_SDK/adl_sdk.h], [have_adl=true; ADL_CPPFLAGS=-I$srcdir], have_adl=false,)
-		if test x$have_adl+$have_cgminer_sdk = xfalse+true; then
-			AC_CHECK_FILE([$CGMINER_SDK/include/ADL_SDK/adl_sdk.h], [have_adl=true; ADL_CPPFLAGS=-I$CGMINER_SDK/include], have_adl=false,)
-		fi
-		if test x$have_adl = xtrue
-		then
-			AC_DEFINE([HAVE_ADL], [1], [Defined if ADL headers were found])
-		else
-			DLOPEN_FLAGS=""
-		fi
-	fi
-
-	AC_ARG_ENABLE([scrypt],
-		[AC_HELP_STRING([--enable-scrypt],[Compile support for scrypt litecoin mining (default disabled)])],
-		[scrypt=$enableval]
-		)
-	if test "x$scrypt" = xyes; then
-		AC_DEFINE([USE_SCRYPT], [1], [Defined to 1 if scrypt support is wanted])
-	fi
-else
-	DLOPEN_FLAGS=""
-fi
-
-AM_CONDITIONAL([HAS_SCRYPT], [test x$scrypt = xyes])
-
 avalon="no"
 avalon="no"
 
 
 AC_ARG_ENABLE([avalon],
 AC_ARG_ENABLE([avalon],
@@ -483,8 +388,6 @@ AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo130302"], [Filename for diablo ker
 AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt130511"], [Filename for scrypt kernel])
 AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt130511"], [Filename for scrypt kernel])
 
 
 
 
-AC_SUBST(OPENCL_LIBS)
-AC_SUBST(OPENCL_FLAGS)
 AC_SUBST(JANSSON_LIBS)
 AC_SUBST(JANSSON_LIBS)
 AC_SUBST(PTHREAD_FLAGS)
 AC_SUBST(PTHREAD_FLAGS)
 AC_SUBST(DLOPEN_FLAGS)
 AC_SUBST(DLOPEN_FLAGS)
@@ -526,40 +429,6 @@ fi
 echo "  curses.TUI...........: $cursesmsg"
 echo "  curses.TUI...........: $cursesmsg"
 
 
 
 
-if test "x$opencl" != xno; then
-	if test $found_opencl = 1; then
-		echo "  OpenCL...............: FOUND. GPU mining support enabled"
-	if test "x$scrypt" != xno; then
-		echo "  scrypt...............: Enabled"
-	else
-		echo "  scrypt...............: Disabled"
-	fi
-
-	else
-		echo "  OpenCL...............: NOT FOUND. GPU mining support DISABLED"
-		if test "x$avalon$bitforce$bitfury$icarus$modminer$bflsc$hashfast$klondike$knc" = xnonononononononono; then
-			AC_MSG_ERROR([No mining configured in])
-		fi
-		echo "  scrypt...............: Disabled (needs OpenCL)"
-	fi
-else
-	echo "  OpenCL...............: Detection overrided. GPU mining support DISABLED"
-	if test "x$avalon$bitforce$bitfury$icarus$modminer$bflsc$hashfast$klondike$knc" = xnonononononononono; then
-		AC_MSG_ERROR([No mining configured in])
-	fi
-	echo "  scrypt...............: Disabled (needs OpenCL)"
-fi
-
-if test "x$adl" != xno; then
-	if test x$have_adl = xtrue; then
-		echo "  ADL..................: SDK found, GPU monitoring support enabled"
-	else
-		echo "  ADL..................: SDK NOT found, GPU monitoring support DISABLED"
-	fi
-else
-	echo "  ADL..................: Detection overrided. GPU monitoring support DISABLED"
-fi
-
 echo
 echo
 if test "x$avalon" = xyes; then
 if test "x$avalon" = xyes; then
 	echo "  Avalon.ASICs.........: Enabled"
 	echo "  Avalon.ASICs.........: Enabled"
@@ -620,7 +489,7 @@ echo "Compilation............: make (or gmake)"
 echo "  CPPFLAGS.............: $CPPFLAGS"
 echo "  CPPFLAGS.............: $CPPFLAGS"
 echo "  CFLAGS...............: $CFLAGS"
 echo "  CFLAGS...............: $CFLAGS"
 echo "  LDFLAGS..............: $LDFLAGS $PTHREAD_FLAGS"
 echo "  LDFLAGS..............: $LDFLAGS $PTHREAD_FLAGS"
-echo "  LDADD................: $DLOPEN_FLAGS $LIBCURL_LIBS $JANSSON_LIBS $PTHREAD_LIBS $OPENCL_LIBS $NCURSES_LIBS $PDCURSES_LIBS $WS2_LIBS $MATH_LIBS $LIBUSB_LIBS $RT_LIBS"
+echo "  LDADD................: $DLOPEN_FLAGS $LIBCURL_LIBS $JANSSON_LIBS $PTHREAD_LIBS $NCURSES_LIBS $PDCURSES_LIBS $WS2_LIBS $MATH_LIBS $LIBUSB_LIBS $RT_LIBS"
 echo
 echo
 echo "Installation...........: make install (as root if needed, with 'su' or 'sudo')"
 echo "Installation...........: make install (as root if needed, with 'su' or 'sudo')"
 echo "  prefix...............: $prefix"
 echo "  prefix...............: $prefix"

+ 0 - 1361
diablo130302.cl

@@ -1,1361 +0,0 @@
-/*
- *  DiabloMiner - OpenCL miner for BitCoin
- *  Copyright (C) 2012, 2013 Con Kolivas <kernel@kolivas.org>
- *  Copyright (C) 2010, 2011, 2012 Patrick McFarland <diablod3@gmail.com>
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more detail).
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifdef VECTORS4
-	typedef uint4 z;
-#elif defined(VECTORS2)
-	typedef uint2 z;
-#else
-	typedef uint z;
-#endif
-
-#ifdef BITALIGN
-#pragma OPENCL EXTENSION cl_amd_media_ops : enable
-#define Zrotr(a, b) amd_bitalign((z)a, (z)a, (z)(32 - b))
-#else
-#define Zrotr(a, b) rotate((z)a, (z)b)
-#endif
-
-#ifdef BFI_INT
-#define ZCh(a, b, c) amd_bytealign(a, b, c)
-#define ZMa(a, b, c) amd_bytealign((c ^ a), (b), (a))
-#else
-#define ZCh(a, b, c) bitselect((z)c, (z)b, (z)a)
-#define ZMa(a, b, c) bitselect((z)a, (z)b, (z)c ^ (z)a)
-#endif
-
-/* These constants are not the classic SHA256 constants but the order that
- * constants are used in this kernel.
- */
-__constant uint K[] = {
-	0xd807aa98U,
-	0x12835b01U,
-	0x243185beU,
-	0x550c7dc3U,
-	0x72be5d74U,
-	0x80deb1feU,
-	0x9bdc06a7U,
-	0xc19bf3f4U,
-	0x0fc19dc6U,
-	0x240ca1ccU,
-	0x80000000U, // 10
-	0x2de92c6fU,
-	0x4a7484aaU,
-	0x00000280U,
-	0x5cb0a9dcU,
-	0x76f988daU,
-	0x983e5152U,
-	0xa831c66dU,
-	0xb00327c8U,
-	0xbf597fc7U,
-	0xc6e00bf3U, // 20
-	0x00A00055U,
-	0xd5a79147U,
-	0x06ca6351U,
-	0x14292967U,
-	0x27b70a85U,
-	0x2e1b2138U,
-	0x4d2c6dfcU,
-	0x53380d13U,
-	0x650a7354U,
-	0x766a0abbU, // 30
-	0x81c2c92eU,
-	0x92722c85U,
-	0xa2bfe8a1U,
-	0xa81a664bU,
-	0xc24b8b70U,
-	0xc76c51a3U,
-	0xd192e819U,
-	0xd6990624U,
-	0xf40e3585U,
-	0x106aa070U, // 40
-	0x19a4c116U,
-	0x1e376c08U,
-	0x2748774cU,
-	0x34b0bcb5U,
-	0x391c0cb3U,
-	0x4ed8aa4aU,
-	0x5b9cca4fU,
-	0x682e6ff3U,
-	0x748f82eeU,
-	0x78a5636fU, // 50
-	0x84c87814U,
-	0x8cc70208U,
-	0x90befffaU,
-	0xa4506cebU,
-	0xbef9a3f7U,
-	0xc67178f2U,
-	0x98c7e2a2U,
-	0x90bb1e3cU,
-	0x510e527fU,
-	0x9b05688cU, // 60
-	0xfc08884dU,
-	0x3c6ef372U,
-	0x50c6645bU,
-	0x6a09e667U,
-	0xbb67ae85U,
-	0x3ac42e24U,
-	0xd21ea4fdU,
-	0x59f111f1U,
-	0x923f82a4U,
-	0xab1c5ed5U, // 70
-	0x5807aa98U,
-	0xc19bf274U,
-	0xe49b69c1U,
-	0x00a00000U,
-	0xefbe4786U,
-	0x00000100U,
-	0x11002000U,
-	0x00400022U,
-	0x136032EDU
-};
-
-#define ZR25(n) ((Zrotr((n), 25) ^ Zrotr((n), 14) ^ ((n) >> 3U)))
-#define ZR15(n) ((Zrotr((n), 15) ^ Zrotr((n), 13) ^ ((n) >> 10U)))
-#define ZR26(n) ((Zrotr((n), 26) ^ Zrotr((n), 21) ^ Zrotr((n), 7)))
-#define ZR30(n) ((Zrotr((n), 30) ^ Zrotr((n), 19) ^ Zrotr((n), 10)))
-
-__kernel
-__attribute__((vec_type_hint(z)))
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-void search(
-#ifndef GOFFSET
-    const z base,
-#endif
-    const uint PreVal4_state0, const uint PreVal4_state0_k7,
-    const uint PreVal4_T1,
-    const uint W18, const uint W19,
-    const uint W16, const uint W17,
-    const uint W16_plus_K16, const uint W17_plus_K17,
-    const uint W31, const uint W32,
-    const uint d1, const uint b1, const uint c1,
-    const uint h1, const uint f1, const uint g1,
-    const uint c1_plus_k5, const uint b1_plus_k6,
-    const uint state0, const uint state1, const uint state2, const uint state3,
-    const uint state4, const uint state5, const uint state6, const uint state7,
-    volatile __global uint * output)
-{
-
-  z ZA[930];
-
-#ifdef GOFFSET
-	const z Znonce = (uint)(get_global_id(0));
-#else
-	const z Znonce = base + (uint)(get_global_id(0));
-#endif
-
-    ZA[15] = Znonce + PreVal4_state0;
-    
-    ZA[16] = (ZCh(ZA[15], b1, c1) + d1) + ZR26(ZA[15]);
-    ZA[26] = Znonce + PreVal4_T1;
-    
-    ZA[27] = ZMa(f1, g1, ZA[26]) + ZR30(ZA[26]);
-    ZA[17] = ZA[16] + h1;
-    
-    ZA[19] = (ZCh(ZA[17], ZA[15], b1) + c1_plus_k5) + ZR26(ZA[17]);
-    ZA[28] = ZA[27] + ZA[16];
-    
-    ZA[548] = ZMa(ZA[26], f1, ZA[28]) + ZR30(ZA[28]);
-    ZA[20] = ZA[19] + g1;
-    
-    ZA[22] = (ZCh(ZA[20], ZA[17], ZA[15]) + b1_plus_k6) + ZR26(ZA[20]);
-    ZA[29] = ZA[548] + ZA[19];
-    
-    ZA[549] = ZMa(ZA[28], ZA[26], ZA[29]) + ZR30(ZA[29]);
-    ZA[23] = ZA[22] + f1;
-    
-    ZA[24] = ZCh(ZA[23], ZA[20], ZA[17]) + ZR26(ZA[23]);
-    ZA[180] = Znonce + PreVal4_state0_k7;
-    ZA[30] = ZA[549] + ZA[22];
-    
-    ZA[31] = ZMa(ZA[29], ZA[28], ZA[30]) + ZR30(ZA[30]);
-    ZA[181] = ZA[180] + ZA[24];
-    
-    ZA[182] = ZA[181] + ZA[26];
-    ZA[183] = ZA[181] + ZA[31];
-    ZA[18] = ZA[17] + K[0];
-    
-    ZA[186] = (ZCh(ZA[182], ZA[23], ZA[20]) + ZA[18]) + ZR26(ZA[182]);
-    ZA[184] = ZMa(ZA[30], ZA[29], ZA[183]) + ZR30(ZA[183]);
-    
-    ZA[187] = ZA[186] + ZA[28];
-    ZA[188] = ZA[186] + ZA[184];
-    ZA[21] = ZA[20] + K[1];
-    
-    ZA[191] = (ZCh(ZA[187], ZA[182], ZA[23]) + ZA[21]) + ZR26(ZA[187]);
-    ZA[189] = ZMa(ZA[183], ZA[30], ZA[188]) + ZR30(ZA[188]);
-    
-    ZA[192] = ZA[191] + ZA[29];
-    ZA[193] = ZA[191] + ZA[189];
-    ZA[25] = ZA[23] + K[2];
-    
-    ZA[196] = (ZCh(ZA[192], ZA[187], ZA[182]) + ZA[25]) + ZR26(ZA[192]);
-    ZA[194] = ZMa(ZA[188], ZA[183], ZA[193]) + ZR30(ZA[193]);
-    
-    ZA[197] = ZA[196] + ZA[30];
-    ZA[198] = ZA[196] + ZA[194];
-    ZA[185] = ZA[182] + K[3];
-    
-    ZA[201] = (ZCh(ZA[197], ZA[192], ZA[187]) + ZA[185]) + ZR26(ZA[197]);
-    ZA[199] = ZMa(ZA[193], ZA[188], ZA[198]) + ZR30(ZA[198]);
-    
-    ZA[202] = ZA[201] + ZA[183];
-    ZA[203] = ZA[201] + ZA[199];
-    ZA[190] = ZA[187] + K[4];
-    
-    ZA[206] = (ZCh(ZA[202], ZA[197], ZA[192]) + ZA[190]) + ZR26(ZA[202]);
-    ZA[204] = ZMa(ZA[198], ZA[193], ZA[203]) + ZR30(ZA[203]);
-    
-    ZA[207] = ZA[206] + ZA[188];
-    ZA[208] = ZA[206] + ZA[204];
-    ZA[195] = ZA[192] + K[5];
-    
-    ZA[211] = (ZCh(ZA[207], ZA[202], ZA[197]) + ZA[195]) + ZR26(ZA[207]);
-    ZA[209] = ZMa(ZA[203], ZA[198], ZA[208]) + ZR30(ZA[208]);
-    
-    ZA[212] = ZA[193] + ZA[211];
-    ZA[213] = ZA[211] + ZA[209];
-    ZA[200] = ZA[197] + K[6];
-    
-    ZA[216] = (ZCh(ZA[212], ZA[207], ZA[202]) + ZA[200]) + ZR26(ZA[212]);
-    ZA[214] = ZMa(ZA[208], ZA[203], ZA[213]) + ZR30(ZA[213]);
-    
-    ZA[217] = ZA[198] + ZA[216];
-    ZA[218] = ZA[216] + ZA[214];
-    ZA[205] = ZA[202] + K[7];
-    
-    ZA[220] = (ZCh(ZA[217], ZA[212], ZA[207]) + ZA[205]) + ZR26(ZA[217]);
-    ZA[219] = ZMa(ZA[213], ZA[208], ZA[218]) + ZR30(ZA[218]);
-    
-    ZA[222] = ZA[203] + ZA[220];
-    ZA[223] = ZA[220] + ZA[219];
-    ZA[210] = ZA[207] + W16_plus_K16;
-    
-    ZA[226] = (ZCh(ZA[222], ZA[217], ZA[212]) + ZA[210]) + ZR26(ZA[222]);
-    ZA[225] = ZMa(ZA[218], ZA[213], ZA[223]) + ZR30(ZA[223]);
-    
-    ZA[0] = ZR25(Znonce) + W18;
-    ZA[228] = ZA[226] + ZA[225];
-    ZA[227] = ZA[208] + ZA[226];
-    ZA[215] = ZA[212] + W17_plus_K17;
-    
-    ZA[231] = (ZCh(ZA[227], ZA[222], ZA[217]) + ZA[215]) + ZR26(ZA[227]);
-    ZA[229] = ZMa(ZA[223], ZA[218], ZA[228]) + ZR30(ZA[228]);
-    ZA[1] = ZA[0] + K[8];
-    
-    ZA[232] = ZA[213] + ZA[231];
-    ZA[233] = ZA[231] + ZA[229];
-    ZA[221] = ZA[217] + ZA[1];
-    ZA[32] = Znonce + W19;
-    
-    ZA[236] = (ZCh(ZA[232], ZA[227], ZA[222]) + ZA[221]) + ZR26(ZA[232]);
-    ZA[234] = ZMa(ZA[228], ZA[223], ZA[233]) + ZR30(ZA[233]);
-    ZA[33] = ZA[32] + K[9];
-    
-    ZA[3] = ZR15(ZA[0]) + K[10];
-    ZA[238] = ZA[236] + ZA[234];
-    ZA[237] = ZA[218] + ZA[236];
-    ZA[224] = ZA[222] + ZA[33];
-    
-    ZA[241] = (ZCh(ZA[237], ZA[232], ZA[227]) + ZA[224]) + ZR26(ZA[237]);
-    ZA[239] = ZMa(ZA[233], ZA[228], ZA[238]) + ZR30(ZA[238]);
-    ZA[4] = ZA[3] + K[11];
-    
-    ZA[35] = ZR15(ZA[32]);
-    ZA[243] = ZA[241] + ZA[239];
-    ZA[242] = ZA[223] + ZA[241];
-    ZA[230] = ZA[227] + ZA[4];
-    
-    ZA[246] = (ZCh(ZA[242], ZA[237], ZA[232]) + ZA[230]) + ZR26(ZA[242]);
-    ZA[244] = ZMa(ZA[238], ZA[233], ZA[243]) + ZR30(ZA[243]);
-    ZA[36] = ZA[35] + K[12];
-    
-    ZA[7] = ZR15(ZA[3]) + K[13];
-    ZA[248] = ZA[246] + ZA[244];
-    ZA[247] = ZA[228] + ZA[246];
-    ZA[235] = ZA[232] + ZA[36];
-    
-    ZA[251] = (ZCh(ZA[247], ZA[242], ZA[237]) + ZA[235]) + ZR26(ZA[247]);
-    ZA[249] = ZMa(ZA[243], ZA[238], ZA[248]) + ZR30(ZA[248]);
-    ZA[8] = ZA[7] + K[14];
-    
-    ZA[38] = ZR15(ZA[35]) + W16;
-    ZA[253] = ZA[251] + ZA[249];
-    ZA[252] = ZA[233] + ZA[251];
-    ZA[240] = ZA[237] + ZA[8];
-    
-    ZA[256] = (ZCh(ZA[252], ZA[247], ZA[242]) + ZA[240]) + ZR26(ZA[252]);
-    ZA[254] = ZMa(ZA[248], ZA[243], ZA[253]) + ZR30(ZA[253]);
-    ZA[40] = ZA[38] + K[15];
-    
-    ZA[10] = ZR15(ZA[7]) + W17;
-    ZA[258] = ZA[256] + ZA[254];
-    ZA[257] = ZA[238] + ZA[256];
-    ZA[245] = ZA[242] + ZA[40];
-    
-    ZA[261] = (ZCh(ZA[257], ZA[252], ZA[247]) + ZA[245]) + ZR26(ZA[257]);
-    ZA[259] = ZMa(ZA[253], ZA[248], ZA[258]) + ZR30(ZA[258]);
-    ZA[13] = ZA[10] + K[16];
-    
-    ZA[43] = ZR15(ZA[38]) + ZA[0];
-    ZA[263] = ZA[261] + ZA[259];
-    ZA[262] = ZA[243] + ZA[261];
-    ZA[250] = ZA[247] + ZA[13];
-    
-    ZA[266] = (ZCh(ZA[262], ZA[257], ZA[252]) + ZA[250]) + ZR26(ZA[262]);
-    ZA[264] = ZMa(ZA[258], ZA[253], ZA[263]) + ZR30(ZA[263]);
-    ZA[11] = ZR15(ZA[10]);
-    ZA[45] = ZA[43] + K[17];
-    
-    ZA[52] = ZA[11] + ZA[32];
-    ZA[267] = ZA[248] + ZA[266];
-    ZA[255] = ZA[252] + ZA[45];
-    ZA[268] = ZA[266] + ZA[264];
-    
-    ZA[271] = (ZCh(ZA[267], ZA[262], ZA[257]) + ZA[255]) + ZR26(ZA[267]);
-    ZA[269] = ZMa(ZA[263], ZA[258], ZA[268]) + ZR30(ZA[268]);
-    ZA[54] = ZA[52] + K[18];
-    
-    ZA[48] = ZR15(ZA[43]) + ZA[3];
-    ZA[273] = ZA[271] + ZA[269];
-    ZA[272] = ZA[253] + ZA[271];
-    ZA[260] = ZA[257] + ZA[54];
-    
-    ZA[276] = (ZCh(ZA[272], ZA[267], ZA[262]) + ZA[260]) + ZR26(ZA[272]);
-    ZA[274] = ZMa(ZA[268], ZA[263], ZA[273]) + ZR30(ZA[273]);
-    ZA[49] = ZA[48] + K[19];
-    
-    ZA[61] = ZR15(ZA[52]) + ZA[35];
-    ZA[278] = ZA[276] + ZA[274];
-    ZA[277] = ZA[258] + ZA[276];
-    ZA[265] = ZA[262] + ZA[49];
-    
-    ZA[281] = (ZCh(ZA[277], ZA[272], ZA[267]) + ZA[265]) + ZR26(ZA[277]);
-    ZA[279] = ZMa(ZA[273], ZA[268], ZA[278]) + ZR30(ZA[278]);
-    ZA[62] = ZA[61] + K[20];
-    
-    ZA[53] = ZR15(ZA[48]) + ZA[7];
-    ZA[283] = ZA[281] + ZA[279];
-    ZA[282] = ZA[263] + ZA[281];
-    ZA[270] = ZA[267] + ZA[62];
-    
-    ZA[286] = (ZCh(ZA[282], ZA[277], ZA[272]) + ZA[270]) + ZR26(ZA[282]);
-    ZA[284] = ZMa(ZA[278], ZA[273], ZA[283]) + ZR30(ZA[283]);
-    ZA[39] = ZA[38] + K[21];
-    ZA[55] = ZA[53] + K[22];
-    
-    ZA[66] = ZR15(ZA[61]) + ZA[39];
-    ZA[288] = ZA[286] + ZA[284];
-    ZA[287] = ZA[268] + ZA[286];
-    ZA[275] = ZA[272] + ZA[55];
-    
-    ZA[291] = (ZCh(ZA[287], ZA[282], ZA[277]) + ZA[275]) + ZR26(ZA[287]);
-    ZA[289] = ZMa(ZA[283], ZA[278], ZA[288]) + ZR30(ZA[288]);
-    ZA[12] = ZA[10] + W31;
-    ZA[68] = ZA[66] + K[23];
-    
-    ZA[67] = ZR15(ZA[53]) + ZA[12];
-    ZA[293] = ZA[291] + ZA[289];
-    ZA[292] = ZA[273] + ZA[291];
-    ZA[280] = ZA[277] + ZA[68];
-    
-    ZA[296] = (ZCh(ZA[292], ZA[287], ZA[282]) + ZA[280]) + ZR26(ZA[292]);
-    ZA[294] = ZMa(ZA[288], ZA[283], ZA[293]) + ZR30(ZA[293]);
-    ZA[2] = ZR25(ZA[0]);
-    ZA[69] = ZA[67] + K[24];
-    ZA[44] = ZA[43] + W32;
-    
-    ZA[75] = ZR15(ZA[66]) + ZA[44];
-    ZA[298] = ZA[296] + ZA[294];
-    ZA[297] = ZA[278] + ZA[296];
-    ZA[285] = ZA[282] + ZA[69];
-    ZA[5] = ZA[2] + W17;
-    
-    ZA[301] = (ZCh(ZA[297], ZA[292], ZA[287]) + ZA[285]) + ZR26(ZA[297]);
-    ZA[299] = ZMa(ZA[293], ZA[288], ZA[298]) + ZR30(ZA[298]);
-    ZA[56] = ZA[52] + ZA[5];
-    ZA[76] = ZA[75] + K[25];
-    
-    ZA[34] = ZR25(ZA[32]) + ZA[0];
-    ZA[70] = ZR15(ZA[67]) + ZA[56];
-    ZA[302] = ZA[283] + ZA[301];
-    ZA[303] = ZA[301] + ZA[299];
-    ZA[290] = ZA[287] + ZA[76];
-    
-    ZA[306] = (ZCh(ZA[302], ZA[297], ZA[292]) + ZA[290]) + ZR26(ZA[302]);
-    ZA[304] = ZMa(ZA[298], ZA[293], ZA[303]) + ZR30(ZA[303]);
-    ZA[6] = ZR25(ZA[3]);
-    ZA[77] = ZA[70] + K[26];
-    ZA[50] = ZA[34] + ZA[48];
-    
-    ZA[78] = ZR15(ZA[75]) + ZA[50];
-    ZA[308] = ZA[306] + ZA[304];
-    ZA[307] = ZA[288] + ZA[306];
-    ZA[295] = ZA[292] + ZA[77];
-    ZA[41] = ZA[32] + ZA[6];
-    
-    ZA[311] = (ZCh(ZA[307], ZA[302], ZA[297]) + ZA[295]) + ZR26(ZA[307]);
-    ZA[309] = ZMa(ZA[303], ZA[298], ZA[308]) + ZR30(ZA[308]);
-    ZA[63] = ZA[41] + ZA[61];
-    ZA[85] = ZA[78] + K[27];
-    
-    ZA[37] = ZR25(ZA[35]) + ZA[3];
-    ZA[79] = ZR15(ZA[70]) + ZA[63];
-    ZA[312] = ZA[293] + ZA[311];
-    ZA[313] = ZA[311] + ZA[309];
-    ZA[300] = ZA[297] + ZA[85];
-    
-    ZA[316] = (ZCh(ZA[312], ZA[307], ZA[302]) + ZA[300]) + ZR26(ZA[312]);
-    ZA[314] = ZMa(ZA[308], ZA[303], ZA[313]) + ZR30(ZA[313]);
-    ZA[9] = ZR25(ZA[7]);
-    ZA[86] = ZA[79] + K[28];
-    ZA[57] = ZA[37] + ZA[53];
-    
-    ZA[87] = ZR15(ZA[78]) + ZA[57];
-    ZA[318] = ZA[316] + ZA[314];
-    ZA[317] = ZA[298] + ZA[316];
-    ZA[305] = ZA[302] + ZA[86];
-    ZA[46] = ZA[35] + ZA[9];
-    
-    ZA[321] = (ZCh(ZA[317], ZA[312], ZA[307]) + ZA[305]) + ZR26(ZA[317]);
-    ZA[319] = ZMa(ZA[313], ZA[308], ZA[318]) + ZR30(ZA[318]);
-    ZA[71] = ZA[46] + ZA[66];
-    ZA[92] = ZA[87] + K[29];
-    
-    ZA[42] = ZR25(ZA[38]) + ZA[7];
-    ZA[88] = ZR15(ZA[79]) + ZA[71];
-    ZA[322] = ZA[303] + ZA[321];
-    ZA[323] = ZA[321] + ZA[319];
-    ZA[310] = ZA[307] + ZA[92];
-    
-    ZA[326] = (ZCh(ZA[322], ZA[317], ZA[312]) + ZA[310]) + ZR26(ZA[322]);
-    ZA[324] = ZMa(ZA[318], ZA[313], ZA[323]) + ZR30(ZA[323]);
-    ZA[14] = ZR25(ZA[10]);
-    ZA[93] = ZA[88] + K[30];
-    ZA[72] = ZA[42] + ZA[67];
-    
-    ZA[94] = ZR15(ZA[87]) + ZA[72];
-    ZA[328] = ZA[326] + ZA[324];
-    ZA[327] = ZA[308] + ZA[326];
-    ZA[315] = ZA[312] + ZA[93];
-    ZA[51] = ZA[38] + ZA[14];
-    
-    ZA[331] = (ZCh(ZA[327], ZA[322], ZA[317]) + ZA[315]) + ZR26(ZA[327]);
-    ZA[329] = ZMa(ZA[323], ZA[318], ZA[328]) + ZR30(ZA[328]);
-    ZA[80] = ZA[51] + ZA[75];
-    ZA[100] = ZA[94] + K[31];
-    
-    ZA[47] = ZR25(ZA[43]) + ZA[10];
-    ZA[95] = ZR15(ZA[88]) + ZA[80];
-    ZA[332] = ZA[313] + ZA[331];
-    ZA[333] = ZA[331] + ZA[329];
-    ZA[320] = ZA[317] + ZA[100];
-    
-    ZA[336] = (ZCh(ZA[332], ZA[327], ZA[322]) + ZA[320]) + ZR26(ZA[332]);
-    ZA[334] = ZMa(ZA[328], ZA[323], ZA[333]) + ZR30(ZA[333]);
-    ZA[81] = ZA[47] + ZA[70];
-    ZA[101] = ZA[95] + K[32];
-    
-    ZA[58] = ZR25(ZA[52]) + ZA[43];
-    ZA[102] = ZR15(ZA[94]) + ZA[81];
-    ZA[337] = ZA[318] + ZA[336];
-    ZA[338] = ZA[336] + ZA[334];
-    ZA[325] = ZA[322] + ZA[101];
-    
-    ZA[341] = (ZCh(ZA[337], ZA[332], ZA[327]) + ZA[325]) + ZR26(ZA[337]);
-    ZA[339] = ZMa(ZA[333], ZA[328], ZA[338]) + ZR30(ZA[338]);
-    ZA[89] = ZA[58] + ZA[78];
-    ZA[108] = ZA[102] + K[33];
-    
-    ZA[59] = ZR25(ZA[48]) + ZA[52];
-    ZA[103] = ZR15(ZA[95]) + ZA[89];
-    ZA[342] = ZA[323] + ZA[341];
-    ZA[343] = ZA[341] + ZA[339];
-    ZA[330] = ZA[327] + ZA[108];
-    
-    ZA[346] = (ZCh(ZA[342], ZA[337], ZA[332]) + ZA[330]) + ZR26(ZA[342]);
-    ZA[344] = ZMa(ZA[338], ZA[333], ZA[343]) + ZR30(ZA[343]);
-    ZA[90] = ZA[59] + ZA[79];
-    ZA[109] = ZA[103] + K[34];
-    
-    ZA[64] = ZR25(ZA[61]) + ZA[48];
-    ZA[110] = ZR15(ZA[102]) + ZA[90];
-    ZA[347] = ZA[328] + ZA[346];
-    ZA[348] = ZA[346] + ZA[344];
-    ZA[335] = ZA[332] + ZA[109];
-    
-    ZA[351] = (ZCh(ZA[347], ZA[342], ZA[337]) + ZA[335]) + ZR26(ZA[347]);
-    ZA[349] = ZMa(ZA[343], ZA[338], ZA[348]) + ZR30(ZA[348]);
-    ZA[60] = ZR25(ZA[53]);
-    ZA[116] = ZA[110] + K[35];
-    ZA[96] = ZA[87] + ZA[64];
-    
-    ZA[111] = ZR15(ZA[103]) + ZA[96];
-    ZA[353] = ZA[351] + ZA[349];
-    ZA[352] = ZA[333] + ZA[351];
-    ZA[340] = ZA[337] + ZA[116];
-    ZA[65] = ZA[60] + ZA[61];
-    
-    ZA[356] = (ZCh(ZA[352], ZA[347], ZA[342]) + ZA[340]) + ZR26(ZA[352]);
-    ZA[354] = ZMa(ZA[348], ZA[343], ZA[353]) + ZR30(ZA[353]);
-    ZA[97] = ZA[88] + ZA[65];
-    ZA[117] = ZA[111] + K[36];
-    
-    ZA[73] = ZR25(ZA[66]) + ZA[53];
-    ZA[118] = ZR15(ZA[110]) + ZA[97];
-    ZA[357] = ZA[338] + ZA[356];
-    ZA[358] = ZA[356] + ZA[354];
-    ZA[345] = ZA[342] + ZA[117];
-    
-    ZA[361] = (ZCh(ZA[357], ZA[352], ZA[347]) + ZA[345]) + ZR26(ZA[357]);
-    ZA[359] = ZMa(ZA[353], ZA[348], ZA[358]) + ZR30(ZA[358]);
-    ZA[104] = ZA[73] + ZA[94];
-    ZA[124] = ZA[118] + K[37];
-    
-    ZA[74] = ZR25(ZA[67]) + ZA[66];
-    ZA[119] = ZR15(ZA[111]) + ZA[104];
-    ZA[362] = ZA[343] + ZA[361];
-    ZA[363] = ZA[361] + ZA[359];
-    ZA[350] = ZA[347] + ZA[124];
-    
-    ZA[366] = (ZCh(ZA[362], ZA[357], ZA[352]) + ZA[350]) + ZR26(ZA[362]);
-    ZA[364] = ZMa(ZA[358], ZA[353], ZA[363]) + ZR30(ZA[363]);
-    ZA[105] = ZA[74] + ZA[95];
-    ZA[125] = ZA[119] + K[38];
-    
-    ZA[82] = ZR25(ZA[75]) + ZA[67];
-    ZA[126] = ZR15(ZA[118]) + ZA[105];
-    ZA[367] = ZA[348] + ZA[366];
-    ZA[368] = ZA[366] + ZA[364];
-    ZA[355] = ZA[352] + ZA[125];
-    
-    ZA[371] = (ZCh(ZA[367], ZA[362], ZA[357]) + ZA[355]) + ZR26(ZA[367]);
-    ZA[369] = ZMa(ZA[363], ZA[358], ZA[368]) + ZR30(ZA[368]);
-    ZA[112] = ZA[102] + ZA[82];
-    ZA[132] = ZA[126] + K[39];
-    
-    ZA[83] = ZR25(ZA[70]) + ZA[75];
-    ZA[127] = ZR15(ZA[119]) + ZA[112];
-    ZA[372] = ZA[353] + ZA[371];
-    ZA[373] = ZA[371] + ZA[369];
-    ZA[360] = ZA[357] + ZA[132];
-    
-    ZA[376] = (ZCh(ZA[372], ZA[367], ZA[362]) + ZA[360]) + ZR26(ZA[372]);
-    ZA[374] = ZMa(ZA[368], ZA[363], ZA[373]) + ZR30(ZA[373]);
-    ZA[113] = ZA[103] + ZA[83];
-    ZA[133] = ZA[127] + K[40];
-    
-    ZA[84] = ZR25(ZA[78]) + ZA[70];
-    ZA[134] = ZR15(ZA[126]) + ZA[113];
-    ZA[377] = ZA[358] + ZA[376];
-    ZA[378] = ZA[376] + ZA[374];
-    ZA[365] = ZA[362] + ZA[133];
-    
-    ZA[381] = (ZCh(ZA[377], ZA[372], ZA[367]) + ZA[365]) + ZR26(ZA[377]);
-    ZA[379] = ZMa(ZA[373], ZA[368], ZA[378]) + ZR30(ZA[378]);
-    ZA[120] = ZA[110] + ZA[84];
-    ZA[140] = ZA[134] + K[41];
-    
-    ZA[91] = ZR25(ZA[79]) + ZA[78];
-    ZA[135] = ZR15(ZA[127]) + ZA[120];
-    ZA[382] = ZA[363] + ZA[381];
-    ZA[383] = ZA[381] + ZA[379];
-    ZA[370] = ZA[367] + ZA[140];
-    
-    ZA[386] = (ZCh(ZA[382], ZA[377], ZA[372]) + ZA[370]) + ZR26(ZA[382]);
-    ZA[384] = ZMa(ZA[378], ZA[373], ZA[383]) + ZR30(ZA[383]);
-    ZA[121] = ZA[111] + ZA[91];
-    ZA[141] = ZA[135] + K[42];
-    
-    ZA[98] = ZR25(ZA[87]) + ZA[79];
-    ZA[142] = ZR15(ZA[134]) + ZA[121];
-    ZA[387] = ZA[368] + ZA[386];
-    ZA[388] = ZA[386] + ZA[384];
-    ZA[375] = ZA[372] + ZA[141];
-    
-    ZA[391] = (ZCh(ZA[387], ZA[382], ZA[377]) + ZA[375]) + ZR26(ZA[387]);
-    ZA[389] = ZMa(ZA[383], ZA[378], ZA[388]) + ZR30(ZA[388]);
-    ZA[128] = ZA[118] + ZA[98];
-    ZA[147] = ZA[142] + K[43];
-    
-    ZA[99] = ZR25(ZA[88]) + ZA[87];
-    ZA[143] = ZR15(ZA[135]) + ZA[128];
-    ZA[392] = ZA[373] + ZA[391];
-    ZA[393] = ZA[391] + ZA[389];
-    ZA[380] = ZA[377] + ZA[147];
-    
-    ZA[396] = (ZCh(ZA[392], ZA[387], ZA[382]) + ZA[380]) + ZR26(ZA[392]);
-    ZA[394] = ZMa(ZA[388], ZA[383], ZA[393]) + ZR30(ZA[393]);
-    ZA[129] = ZA[119] + ZA[99];
-    ZA[148] = ZA[143] + K[44];
-    
-    ZA[106] = ZR25(ZA[94]) + ZA[88];
-    ZA[149] = ZR15(ZA[142]) + ZA[129];
-    ZA[397] = ZA[378] + ZA[396];
-    ZA[398] = ZA[396] + ZA[394];
-    ZA[385] = ZA[382] + ZA[148];
-    
-    ZA[401] = (ZCh(ZA[397], ZA[392], ZA[387]) + ZA[385]) + ZR26(ZA[397]);
-    ZA[399] = ZMa(ZA[393], ZA[388], ZA[398]) + ZR30(ZA[398]);
-    ZA[136] = ZA[126] + ZA[106];
-    ZA[153] = ZA[149] + K[45];
-    
-    ZA[107] = ZR25(ZA[95]) + ZA[94];
-    ZA[150] = ZR15(ZA[143]) + ZA[136];
-    ZA[402] = ZA[383] + ZA[401];
-    ZA[403] = ZA[401] + ZA[399];
-    ZA[390] = ZA[387] + ZA[153];
-    
-    ZA[406] = (ZCh(ZA[402], ZA[397], ZA[392]) + ZA[390]) + ZR26(ZA[402]);
-    ZA[404] = ZMa(ZA[398], ZA[393], ZA[403]) + ZR30(ZA[403]);
-    ZA[137] = ZA[127] + ZA[107];
-    ZA[154] = ZA[150] + K[46];
-    
-    ZA[114] = ZR25(ZA[102]) + ZA[95];
-    ZA[155] = ZR15(ZA[149]) + ZA[137];
-    ZA[407] = ZA[388] + ZA[406];
-    ZA[408] = ZA[406] + ZA[404];
-    ZA[395] = ZA[392] + ZA[154];
-    
-    ZA[411] = (ZCh(ZA[407], ZA[402], ZA[397]) + ZA[395]) + ZR26(ZA[407]);
-    ZA[409] = ZMa(ZA[403], ZA[398], ZA[408]) + ZR30(ZA[408]);
-    ZA[144] = ZA[134] + ZA[114];
-    ZA[159] = ZA[155] + K[47];
-    
-    ZA[115] = ZR25(ZA[103]) + ZA[102];
-    ZA[156] = ZR15(ZA[150]) + ZA[144];
-    ZA[412] = ZA[393] + ZA[411];
-    ZA[413] = ZA[411] + ZA[409];
-    ZA[400] = ZA[397] + ZA[159];
-    
-    ZA[416] = (ZCh(ZA[412], ZA[407], ZA[402]) + ZA[400]) + ZR26(ZA[412]);
-    ZA[414] = ZMa(ZA[408], ZA[403], ZA[413]) + ZR30(ZA[413]);
-    ZA[145] = ZA[135] + ZA[115];
-    ZA[160] = ZA[156] + K[48];
-    
-    ZA[122] = ZR25(ZA[110]) + ZA[103];
-    ZA[161] = ZR15(ZA[155]) + ZA[145];
-    ZA[417] = ZA[398] + ZA[416];
-    ZA[418] = ZA[416] + ZA[414];
-    ZA[405] = ZA[402] + ZA[160];
-    
-    ZA[421] = (ZCh(ZA[417], ZA[412], ZA[407]) + ZA[405]) + ZR26(ZA[417]);
-    ZA[419] = ZMa(ZA[413], ZA[408], ZA[418]) + ZR30(ZA[418]);
-    ZA[151] = ZA[142] + ZA[122];
-    ZA[165] = ZA[161] + K[49];
-    
-    ZA[123] = ZR25(ZA[111]) + ZA[110];
-    ZA[162] = ZR15(ZA[156]) + ZA[151];
-    ZA[422] = ZA[403] + ZA[421];
-    ZA[423] = ZA[421] + ZA[419];
-    ZA[410] = ZA[407] + ZA[165];
-    
-    ZA[426] = (ZCh(ZA[422], ZA[417], ZA[412]) + ZA[410]) + ZR26(ZA[422]);
-    ZA[424] = ZMa(ZA[418], ZA[413], ZA[423]) + ZR30(ZA[423]);
-    ZA[152] = ZA[143] + ZA[123];
-    ZA[166] = ZA[162] + K[50];
-    
-    ZA[130] = ZR25(ZA[118]) + ZA[111];
-    ZA[167] = ZR15(ZA[161]) + ZA[152];
-    ZA[427] = ZA[408] + ZA[426];
-    ZA[428] = ZA[426] + ZA[424];
-    ZA[415] = ZA[412] + ZA[166];
-    
-    ZA[431] = (ZCh(ZA[427], ZA[422], ZA[417]) + ZA[415]) + ZR26(ZA[427]);
-    ZA[429] = ZMa(ZA[423], ZA[418], ZA[428]) + ZR30(ZA[428]);
-    ZA[157] = ZA[149] + ZA[130];
-    ZA[170] = ZA[167] + K[51];
-    
-    ZA[131] = ZR25(ZA[119]) + ZA[118];
-    ZA[168] = ZR15(ZA[162]) + ZA[157];
-    ZA[432] = ZA[413] + ZA[431];
-    ZA[433] = ZA[431] + ZA[429];
-    ZA[420] = ZA[417] + ZA[170];
-    
-    ZA[436] = (ZCh(ZA[432], ZA[427], ZA[422]) + ZA[420]) + ZR26(ZA[432]);
-    ZA[434] = ZMa(ZA[428], ZA[423], ZA[433]) + ZR30(ZA[433]);
-    ZA[158] = ZA[150] + ZA[131];
-    ZA[171] = ZA[168] + K[52];
-    
-    ZA[138] = ZR25(ZA[126]) + ZA[119];
-    ZA[172] = ZR15(ZA[167]) + ZA[158];
-    ZA[437] = ZA[418] + ZA[436];
-    ZA[438] = ZA[436] + ZA[434];
-    ZA[425] = ZA[422] + ZA[171];
-    
-    ZA[441] = (ZCh(ZA[437], ZA[432], ZA[427]) + ZA[425]) + ZR26(ZA[437]);
-    ZA[439] = ZMa(ZA[433], ZA[428], ZA[438]) + ZR30(ZA[438]);
-    ZA[163] = ZA[155] + ZA[138];
-    ZA[174] = ZA[172] + K[53];
-    
-    ZA[139] = ZR25(ZA[127]) + ZA[126];
-    ZA[173] = ZR15(ZA[168]) + ZA[163];
-    ZA[442] = ZA[423] + ZA[441];
-    ZA[443] = ZA[441] + ZA[439];
-    ZA[430] = ZA[427] + ZA[174];
-    
-    ZA[445] = (ZCh(ZA[442], ZA[437], ZA[432]) + ZA[430]) + ZR26(ZA[442]);
-    ZA[444] = ZMa(ZA[438], ZA[433], ZA[443]) + ZR30(ZA[443]);
-    ZA[164] = ZA[156] + ZA[139];
-    ZA[175] = ZA[173] + K[54];
-    
-    ZA[146] = ZR25(ZA[134]) + ZA[127];
-    ZA[176] = ZR15(ZA[172]) + ZA[164];
-    ZA[446] = ZA[428] + ZA[445];
-    ZA[447] = ZA[445] + ZA[444];
-    ZA[435] = ZA[432] + ZA[175];
-    
-    ZA[449] = (ZCh(ZA[446], ZA[442], ZA[437]) + ZA[435]) + ZR26(ZA[446]);
-    ZA[448] = ZMa(ZA[443], ZA[438], ZA[447]) + ZR30(ZA[447]);
-    ZA[169] = ZA[161] + ZA[146];
-    ZA[178] = ZA[176] + K[55];
-    
-    ZA[177] = ZR15(ZA[173]) + ZA[169];
-    ZA[451] = ZA[449] + ZA[448];
-    ZA[450] = ZA[433] + ZA[449];
-    ZA[440] = ZA[437] + ZA[178];
-    
-    ZA[453] = (ZCh(ZA[450], ZA[446], ZA[442]) + ZA[440]) + ZR26(ZA[450]);
-    ZA[452] = ZMa(ZA[447], ZA[443], ZA[451]) + ZR30(ZA[451]);
-    ZA[179] = ZA[177] + K[56];
-    
-    ZA[454] = ZA[438] + ZA[453];
-    ZA[494] = ZA[442] + ZA[179];
-    ZA[455] = ZA[453] + ZA[452];
-    
-    ZA[457] = (ZCh(ZA[454], ZA[450], ZA[446]) + ZA[494]) + ZR26(ZA[454]);
-    ZA[456] = ZMa(ZA[451], ZA[447], ZA[455]) + ZR30(ZA[455]);
-    
-    ZA[459] = ZA[457] + ZA[456];
-    
-    ZA[461] = ZA[455] + state1;
-    ZA[460] = ZA[459] + state0;
-    
-    ZA[495] = ZA[460] + K[57];
-    ZA[469] = ZA[461] + K[58];
-    
-    ZA[498] = (ZCh(ZA[495], K[59], K[60]) + ZA[469]) + ZR26(ZA[495]);
-    ZA[462] = ZA[451] + state2;
-    
-    ZA[496] = ZA[460] + K[61];
-    ZA[506] = ZA[498] + K[62];
-    ZA[470] = ZA[462] + K[63];
-    
-    ZA[507] = (ZCh(ZA[506], ZA[495], K[59]) + ZA[470]) + ZR26(ZA[506]);
-    ZA[500] = ZMa(K[64], K[65], ZA[496]) + ZR30(ZA[496]);
-    ZA[463] = ZA[447] + state3;
-    
-    ZA[458] = ZA[443] + ZA[457];
-    ZA[499] = ZA[498] + ZA[500];
-    ZA[508] = ZA[507] + K[65];
-    ZA[473] = ZA[463] + K[66];
-    
-    ZA[510] = (ZCh(ZA[508], ZA[506], ZA[495]) + ZA[473]) + ZR26(ZA[508]);
-    ZA[928] = ZMa(ZA[496], K[64], ZA[499]) + ZR30(ZA[499]);
-    ZA[464] = ZA[458] + state4;
-    
-    ZA[476] = ZA[464] + ZA[460] + K[67];
-    ZA[511] = ZA[510] + K[64];
-    ZA[509] = ZA[928] + ZA[507];
-    ZA[465] = ZA[454] + state5;
-    
-    ZA[514] = (ZCh(ZA[511], ZA[508], ZA[506]) + ZA[476]) + ZR26(ZA[511]);
-    ZA[512] = ZMa(ZA[499], ZA[496], ZA[509]) + ZR30(ZA[509]);
-    ZA[478] = ZA[465] + K[68];
-    
-    ZA[519] = ZA[506] + ZA[478];
-    ZA[516] = ZA[496] + ZA[514];
-    ZA[513] = ZA[510] + ZA[512];
-    ZA[466] = ZA[450] + state6;
-    
-    ZA[520] = (ZCh(ZA[516], ZA[511], ZA[508]) + ZA[519]) + ZR26(ZA[516]);
-    ZA[515] = ZMa(ZA[509], ZA[499], ZA[513]) + ZR30(ZA[513]);
-    ZA[480] = ZA[466] + K[69];
-    
-    ZA[524] = ZA[508] + ZA[480];
-    ZA[521] = ZA[499] + ZA[520];
-    ZA[517] = ZA[514] + ZA[515];
-    ZA[467] = ZA[446] + state7;
-    
-    ZA[525] = (ZCh(ZA[521], ZA[516], ZA[511]) + ZA[524]) + ZR26(ZA[521]);
-    ZA[522] = ZMa(ZA[513], ZA[509], ZA[517]) + ZR30(ZA[517]);
-    ZA[484] = ZA[467] + K[70];
-    
-    ZA[529] = ZA[511] + ZA[484];
-    ZA[526] = ZA[509] + ZA[525];
-    ZA[523] = ZA[520] + ZA[522];
-    
-    ZA[530] = (ZCh(ZA[526], ZA[521], ZA[516]) + ZA[529]) + ZR26(ZA[526]);
-    ZA[550] = ZMa(ZA[517], ZA[513], ZA[523]) + ZR30(ZA[523]);
-    
-    ZA[531] = ZA[513] + ZA[530];
-    ZA[533] = ZA[516] + K[71];
-    ZA[527] = ZA[550] + ZA[525];
-    
-    ZA[534] = (ZCh(ZA[531], ZA[526], ZA[521]) + ZA[533]) + ZR26(ZA[531]);
-    ZA[551] = ZMa(ZA[523], ZA[517], ZA[527]) + ZR30(ZA[527]);
-    
-    ZA[535] = ZA[517] + ZA[534];
-    ZA[538] = ZA[521] + K[1];
-    ZA[532] = ZA[551] + ZA[530];
-    
-    ZA[539] = (ZCh(ZA[535], ZA[531], ZA[526]) + ZA[538]) + ZR26(ZA[535]);
-    ZA[552] = ZMa(ZA[527], ZA[523], ZA[532]) + ZR30(ZA[532]);
-    
-    ZA[540] = ZA[523] + ZA[539];
-    ZA[542] = ZA[526] + K[2];
-    ZA[536] = ZA[552] + ZA[534];
-    
-    ZA[543] = (ZCh(ZA[540], ZA[535], ZA[531]) + ZA[542]) + ZR26(ZA[540]);
-    ZA[553] = ZMa(ZA[532], ZA[527], ZA[536]) + ZR30(ZA[536]);
-    
-    ZA[544] = ZA[527] + ZA[543];
-    ZA[555] = ZA[531] + K[3];
-    ZA[541] = ZA[553] + ZA[539];
-    
-    ZA[558] = (ZCh(ZA[544], ZA[540], ZA[535]) + ZA[555]) + ZR26(ZA[544]);
-    ZA[547] = ZMa(ZA[536], ZA[532], ZA[541]) + ZR30(ZA[541]);
-    
-    ZA[559] = ZA[532] + ZA[558];
-    ZA[556] = ZA[535] + K[4];
-    ZA[545] = ZA[547] + ZA[543];
-    
-    ZA[562] = (ZCh(ZA[559], ZA[544], ZA[540]) + ZA[556]) + ZR26(ZA[559]);
-    ZA[561] = ZMa(ZA[541], ZA[536], ZA[545]) + ZR30(ZA[545]);
-    
-    ZA[563] = ZA[536] + ZA[562];
-    ZA[560] = ZA[561] + ZA[558];
-    ZA[557] = ZA[540] + K[5];
-    
-    ZA[568] = (ZCh(ZA[563], ZA[559], ZA[544]) + ZA[557]) + ZR26(ZA[563]);
-    ZA[564] = ZMa(ZA[545], ZA[541], ZA[560]) + ZR30(ZA[560]);
-    
-    ZA[569] = ZA[541] + ZA[568];
-    ZA[572] = ZA[544] + K[6];
-    ZA[565] = ZA[562] + ZA[564];
-    
-    ZA[574] = (ZCh(ZA[569], ZA[563], ZA[559]) + ZA[572]) + ZR26(ZA[569]);
-    ZA[570] = ZMa(ZA[560], ZA[545], ZA[565]) + ZR30(ZA[565]);
-    ZA[468] = ZR25(ZA[461]);
-    
-    ZA[497] = ZA[468] + ZA[460];
-    ZA[575] = ZA[545] + ZA[574];
-    ZA[571] = ZA[568] + ZA[570];
-    ZA[573] = ZA[559] + K[72];
-    
-    ZA[578] = (ZCh(ZA[575], ZA[569], ZA[563]) + ZA[573]) + ZR26(ZA[575]);
-    ZA[576] = ZMa(ZA[565], ZA[560], ZA[571]) + ZR30(ZA[571]);
-    ZA[929] = ZR25(ZA[462]);
-    ZA[503] = ZA[497] + 0xe49b69c1U;
-    
-    ZA[471] = ZA[929] + ZA[461] + K[74];
-    ZA[582] = ZA[563] + ZA[503];
-    ZA[579] = ZA[560] + ZA[578];
-    ZA[577] = ZA[574] + ZA[576];
-    
-    ZA[583] = (ZCh(ZA[579], ZA[575], ZA[569]) + ZA[582]) + ZR26(ZA[579]);
-    ZA[580] = ZMa(ZA[571], ZA[565], ZA[577]) + ZR30(ZA[577]);
-    ZA[488] = ZA[471] + K[75];
-    
-    ZA[472] = ZR25(ZA[463]) + ZA[462];
-    ZA[587] = ZA[569] + ZA[488];
-    ZA[584] = ZA[565] + ZA[583];
-    ZA[581] = ZA[578] + ZA[580];
-    
-    ZA[588] = (ZCh(ZA[584], ZA[579], ZA[575]) + ZA[587]) + ZR26(ZA[584]);
-    ZA[586] = ZMa(ZA[577], ZA[571], ZA[581]) + ZR30(ZA[581]);
-    ZA[501] = ZR15(ZA[497]) + ZA[472];
-    ZA[475] = ZR15(ZA[471]);
-    ZA[926] = ZA[575] + K[8];
-    
-    ZA[474] = ZA[475] + ZA[463] + ZR25(ZA[464]);
-    ZA[927] = ZA[926] + ZA[501];
-    ZA[589] = ZA[571] + ZA[588];
-    ZA[585] = ZA[583] + ZA[586];
-    
-    ZA[592] = (ZCh(ZA[589], ZA[584], ZA[579]) + ZA[927]) + ZR26(ZA[589]);
-    ZA[590] = ZMa(ZA[581], ZA[577], ZA[585]) + ZR30(ZA[585]);
-    ZA[477] = ZR25(ZA[465]) + ZA[464];
-    ZA[489] = ZA[474] + K[9];
-    
-    ZA[518] = ZR15(ZA[501]) + ZA[477];
-    ZA[479] = ZR25(ZA[466]);
-    ZA[596] = ZA[579] + ZA[489];
-    ZA[593] = ZA[577] + ZA[592];
-    ZA[591] = ZA[588] + ZA[590];
-    
-    ZA[597] = (ZCh(ZA[593], ZA[589], ZA[584]) + ZA[596]) + ZR26(ZA[593]);
-    ZA[594] = ZMa(ZA[585], ZA[581], ZA[591]) + ZR30(ZA[591]);
-    ZA[481] = ZA[479] + ZA[465];
-    ZA[601] = ZA[518] + K[11];
-    
-    ZA[482] = ZR15(ZA[474]) + ZA[481];
-    ZA[602] = ZA[584] + ZA[601];
-    ZA[598] = ZA[581] + ZA[597];
-    ZA[595] = ZA[592] + ZA[594];
-    
-    ZA[632] = (ZCh(ZA[598], ZA[593], ZA[589]) + ZA[602]) + ZR26(ZA[598]);
-    ZA[599] = ZMa(ZA[591], ZA[585], ZA[595]) + ZR30(ZA[595]);
-    ZA[483] = ZA[466] + K[76] + ZR25(ZA[467]);
-    ZA[490] = ZA[482] + K[12];
-    
-    ZA[528] = ZR15(ZA[518]) + ZA[483];
-    ZA[736] = ZA[585] + ZA[632];
-    ZA[605] = ZA[589] + ZA[490];
-    ZA[600] = ZA[597] + ZA[599];
-    ZA[485] = ZA[467] + K[77];
-    
-    ZA[738] = (ZCh(ZA[736], ZA[598], ZA[593]) + ZA[605]) + ZR26(ZA[736]);
-    ZA[744] = ZMa(ZA[595], ZA[591], ZA[600]) + ZR30(ZA[600]);
-    ZA[487] = ZR15(ZA[482]) + ZA[485];
-    ZA[603] = ZA[528] + K[14];
-    
-    ZA[502] = ZA[497] + ZA[487];
-    ZA[739] = ZA[591] + ZA[738];
-    ZA[604] = ZA[593] + ZA[603];
-    ZA[737] = ZA[744] + ZA[632];
-    
-    ZA[741] = (ZCh(ZA[739], ZA[736], ZA[598]) + ZA[604]) + ZR26(ZA[739]);
-    ZA[745] = ZMa(ZA[600], ZA[595], ZA[737]) + ZR30(ZA[737]);
-    ZA[486] = ZA[471] + K[10];
-    ZA[606] = ZA[502] + K[15];
-    
-    ZA[537] = ZR15(ZA[528]) + ZA[486];
-    ZA[742] = ZA[595] + ZA[741];
-    ZA[613] = ZA[598] + ZA[606];
-    ZA[740] = ZA[745] + ZA[738];
-    
-    ZA[747] = (ZCh(ZA[742], ZA[739], ZA[736]) + ZA[613]) + ZR26(ZA[742]);
-    ZA[746] = ZMa(ZA[737], ZA[600], ZA[740]) + ZR30(ZA[740]);
-    ZA[607] = ZA[537] + K[16];
-    
-    ZA[546] = ZR15(ZA[502]) + ZA[501];
-    ZA[751] = ZA[736] + ZA[607];
-    ZA[748] = ZA[600] + ZA[747];
-    ZA[743] = ZA[746] + ZA[741];
-    
-    ZA[752] = (ZCh(ZA[748], ZA[742], ZA[739]) + ZA[751]) + ZR26(ZA[748]);
-    ZA[749] = ZMa(ZA[740], ZA[737], ZA[743]) + ZR30(ZA[743]);
-    ZA[608] = ZA[546] + K[17];
-    
-    ZA[554] = ZR15(ZA[537]) + ZA[474];
-    ZA[756] = ZA[739] + ZA[608];
-    ZA[753] = ZA[737] + ZA[752];
-    ZA[750] = ZA[747] + ZA[749];
-    
-    ZA[757] = (ZCh(ZA[753], ZA[748], ZA[742]) + ZA[756]) + ZR26(ZA[753]);
-    ZA[754] = ZMa(ZA[743], ZA[740], ZA[750]) + ZR30(ZA[750]);
-    ZA[609] = ZA[554] + K[18];
-    
-    ZA[566] = ZR15(ZA[546]) + ZA[518];
-    ZA[761] = ZA[742] + ZA[609];
-    ZA[758] = ZA[740] + ZA[757];
-    ZA[755] = ZA[752] + ZA[754];
-    
-    ZA[762] = (ZCh(ZA[758], ZA[753], ZA[748]) + ZA[761]) + ZR26(ZA[758]);
-    ZA[759] = ZMa(ZA[750], ZA[743], ZA[755]) + ZR30(ZA[755]);
-    ZA[610] = ZA[566] + K[19];
-    
-    ZA[567] = ZR15(ZA[554]) + ZA[482];
-    ZA[766] = ZA[748] + ZA[610];
-    ZA[763] = ZA[743] + ZA[762];
-    ZA[760] = ZA[757] + ZA[759];
-    
-    ZA[767] = (ZCh(ZA[763], ZA[758], ZA[753]) + ZA[766]) + ZR26(ZA[763]);
-    ZA[764] = ZMa(ZA[755], ZA[750], ZA[760]) + ZR30(ZA[760]);
-    ZA[611] = ZA[567] + K[20];
-    
-    ZA[614] = ZR15(ZA[566]) + ZA[528];
-    ZA[771] = ZA[753] + ZA[611];
-    ZA[768] = ZA[750] + ZA[767];
-    ZA[765] = ZA[762] + ZA[764];
-    
-    ZA[772] = (ZCh(ZA[768], ZA[763], ZA[758]) + ZA[771]) + ZR26(ZA[768]);
-    ZA[769] = ZMa(ZA[760], ZA[755], ZA[765]) + ZR30(ZA[765]);
-    ZA[612] = ZA[502] + K[78];
-    ZA[615] = ZA[614] + K[22];
-    
-    ZA[616] = ZR15(ZA[567]) + ZA[612];
-    ZA[504] = ZR25(ZA[497]) + K[76];
-    ZA[776] = ZA[758] + ZA[615];
-    ZA[773] = ZA[755] + ZA[772];
-    ZA[770] = ZA[767] + ZA[769];
-    
-    ZA[777] = (ZCh(ZA[773], ZA[768], ZA[763]) + ZA[776]) + ZR26(ZA[773]);
-    ZA[774] = ZMa(ZA[765], ZA[760], ZA[770]) + ZR30(ZA[770]);
-    ZA[492] = ZR25(ZA[471]);
-    ZA[618] = ZA[537] + ZA[504];
-    ZA[617] = ZA[616] + K[23];
-    
-    ZA[619] = ZR15(ZA[614]) + ZA[618];
-    ZA[781] = ZA[763] + ZA[617];
-    ZA[778] = ZA[760] + ZA[777];
-    ZA[775] = ZA[772] + ZA[774];
-    ZA[505] = ZA[492] + ZA[497];
-    
-    ZA[782] = (ZCh(ZA[778], ZA[773], ZA[768]) + ZA[781]) + ZR26(ZA[778]);
-    ZA[779] = ZMa(ZA[770], ZA[765], ZA[775]) + ZR30(ZA[775]);
-    ZA[621] = ZA[505] + ZA[546];
-    ZA[620] = ZA[619] + K[24];
-    
-    ZA[622] = ZR15(ZA[616]) + ZA[621];
-    ZA[625] = ZR25(ZA[501]);
-    ZA[786] = ZA[768] + ZA[620];
-    ZA[783] = ZA[765] + ZA[782];
-    ZA[624] = ZA[554] + ZA[471];
-    ZA[780] = ZA[777] + ZA[779];
-    
-    ZA[787] = (ZCh(ZA[783], ZA[778], ZA[773]) + ZA[786]) + ZR26(ZA[783]);
-    ZA[784] = ZMa(ZA[775], ZA[770], ZA[780]) + ZR30(ZA[780]);
-    ZA[493] = ZR25(ZA[474]);
-    ZA[626] = ZA[625] + ZA[624];
-    ZA[623] = ZA[622] + K[25];
-    
-    ZA[627] = ZR15(ZA[619]) + ZA[626];
-    ZA[791] = ZA[773] + ZA[623];
-    ZA[788] = ZA[770] + ZA[787];
-    ZA[785] = ZA[782] + ZA[784];
-    ZA[629] = ZA[493] + ZA[501];
-    
-    ZA[792] = (ZCh(ZA[788], ZA[783], ZA[778]) + ZA[791]) + ZR26(ZA[788]);
-    ZA[789] = ZMa(ZA[780], ZA[775], ZA[785]) + ZR30(ZA[785]);
-    ZA[630] = ZA[566] + ZA[629];
-    ZA[628] = ZA[627] + K[26];
-    
-    ZA[634] = ZR25(ZA[518]) + ZA[474];
-    ZA[631] = ZR15(ZA[622]) + ZA[630];
-    ZA[796] = ZA[778] + ZA[628];
-    ZA[793] = ZA[775] + ZA[792];
-    ZA[790] = ZA[787] + ZA[789];
-    
-    ZA[797] = (ZCh(ZA[793], ZA[788], ZA[783]) + ZA[796]) + ZR26(ZA[793]);
-    ZA[794] = ZMa(ZA[785], ZA[780], ZA[790]) + ZR30(ZA[790]);
-    ZA[491] = ZR25(ZA[482]);
-    ZA[635] = ZA[567] + ZA[634];
-    ZA[633] = ZA[631] + K[27];
-    
-    ZA[636] = ZR15(ZA[627]) + ZA[635];
-    ZA[801] = ZA[783] + ZA[633];
-    ZA[798] = ZA[780] + ZA[797];
-    ZA[795] = ZA[792] + ZA[794];
-    ZA[638] = ZA[491] + ZA[518];
-    
-    ZA[802] = (ZCh(ZA[798], ZA[793], ZA[788]) + ZA[801]) + ZR26(ZA[798]);
-    ZA[799] = ZMa(ZA[790], ZA[785], ZA[795]) + ZR30(ZA[795]);
-    ZA[639] = ZA[638] + ZA[614];
-    ZA[637] = ZA[636] + K[28];
-    
-    ZA[642] = ZR25(ZA[528]) + ZA[482];
-    ZA[640] = ZR15(ZA[631]) + ZA[639];
-    ZA[806] = ZA[788] + ZA[637];
-    ZA[803] = ZA[785] + ZA[802];
-    ZA[800] = ZA[797] + ZA[799];
-    
-    ZA[807] = (ZCh(ZA[803], ZA[798], ZA[793]) + ZA[806]) + ZR26(ZA[803]);
-    ZA[804] = ZMa(ZA[795], ZA[790], ZA[800]) + ZR30(ZA[800]);
-    ZA[643] = ZA[616] + ZA[642];
-    ZA[641] = ZA[640] + K[29];
-    
-    ZA[646] = ZR25(ZA[502]) + ZA[528];
-    ZA[644] = ZR15(ZA[636]) + ZA[643];
-    ZA[811] = ZA[793] + ZA[641];
-    ZA[808] = ZA[790] + ZA[807];
-    ZA[805] = ZA[802] + ZA[804];
-    
-    ZA[812] = (ZCh(ZA[808], ZA[803], ZA[798]) + ZA[811]) + ZR26(ZA[808]);
-    ZA[809] = ZMa(ZA[800], ZA[795], ZA[805]) + ZR30(ZA[805]);
-    ZA[647] = ZA[619] + ZA[646];
-    ZA[645] = ZA[644] + K[30];
-    
-    ZA[650] = ZR25(ZA[537]) + ZA[502];
-    ZA[648] = ZR15(ZA[640]) + ZA[647];
-    ZA[816] = ZA[798] + ZA[645];
-    ZA[813] = ZA[795] + ZA[812];
-    ZA[810] = ZA[807] + ZA[809];
-    
-    ZA[817] = (ZCh(ZA[813], ZA[808], ZA[803]) + ZA[816]) + ZR26(ZA[813]);
-    ZA[814] = ZMa(ZA[805], ZA[800], ZA[810]) + ZR30(ZA[810]);
-    ZA[925] = ZA[622] + ZA[650];
-    ZA[649] = ZA[648] + K[31];
-    
-    ZA[653] = ZR25(ZA[546]) + ZA[537];
-    ZA[651] = ZR15(ZA[644]) + ZA[925];
-    ZA[821] = ZA[803] + ZA[649];
-    ZA[818] = ZA[800] + ZA[817];
-    ZA[815] = ZA[812] + ZA[814];
-    
-    ZA[822] = (ZCh(ZA[818], ZA[813], ZA[808]) + ZA[821]) + ZR26(ZA[818]);
-    ZA[819] = ZMa(ZA[810], ZA[805], ZA[815]) + ZR30(ZA[815]);
-    ZA[654] = ZA[627] + ZA[653];
-    ZA[652] = ZA[651] + K[32];
-    
-    ZA[657] = ZR25(ZA[554]) + ZA[546];
-    ZA[655] = ZR15(ZA[648]) + ZA[654];
-    ZA[826] = ZA[808] + ZA[652];
-    ZA[823] = ZA[805] + ZA[822];
-    ZA[820] = ZA[817] + ZA[819];
-    
-    ZA[827] = (ZCh(ZA[823], ZA[818], ZA[813]) + ZA[826]) + ZR26(ZA[823]);
-    ZA[824] = ZMa(ZA[815], ZA[810], ZA[820]) + ZR30(ZA[820]);
-    ZA[658] = ZA[631] + ZA[657];
-    ZA[656] = ZA[655] + K[33];
-    
-    ZA[661] = ZR25(ZA[566]) + ZA[554];
-    ZA[659] = ZR15(ZA[651]) + ZA[658];
-    ZA[831] = ZA[813] + ZA[656];
-    ZA[828] = ZA[810] + ZA[827];
-    ZA[825] = ZA[822] + ZA[824];
-    
-    ZA[832] = (ZCh(ZA[828], ZA[823], ZA[818]) + ZA[831]) + ZR26(ZA[828]);
-    ZA[829] = ZMa(ZA[820], ZA[815], ZA[825]) + ZR30(ZA[825]);
-    ZA[662] = ZA[636] + ZA[661];
-    ZA[660] = ZA[659] + K[34];
-    
-    ZA[665] = ZR25(ZA[567]) + ZA[566];
-    ZA[663] = ZR15(ZA[655]) + ZA[662];
-    ZA[836] = ZA[818] + ZA[660];
-    ZA[833] = ZA[815] + ZA[832];
-    ZA[830] = ZA[827] + ZA[829];
-    
-    ZA[837] = (ZCh(ZA[833], ZA[828], ZA[823]) + ZA[836]) + ZR26(ZA[833]);
-    ZA[834] = ZMa(ZA[825], ZA[820], ZA[830]) + ZR30(ZA[830]);
-    ZA[666] = ZA[640] + ZA[665];
-    ZA[664] = ZA[663] + K[35];
-    
-    ZA[669] = ZR25(ZA[614]) + ZA[567];
-    ZA[667] = ZR15(ZA[659]) + ZA[666];
-    ZA[841] = ZA[823] + ZA[664];
-    ZA[838] = ZA[820] + ZA[837];
-    ZA[835] = ZA[832] + ZA[834];
-    
-    ZA[842] = (ZCh(ZA[838], ZA[833], ZA[828]) + ZA[841]) + ZR26(ZA[838]);
-    ZA[839] = ZMa(ZA[830], ZA[825], ZA[835]) + ZR30(ZA[835]);
-    ZA[670] = ZA[644] + ZA[669];
-    ZA[668] = ZA[667] + K[36];
-    
-    ZA[677] = ZR25(ZA[616]) + ZA[614];
-    ZA[671] = ZR15(ZA[663]) + ZA[670];
-    ZA[846] = ZA[828] + ZA[668];
-    ZA[843] = ZA[825] + ZA[842];
-    ZA[840] = ZA[837] + ZA[839];
-    
-    ZA[847] = (ZCh(ZA[843], ZA[838], ZA[833]) + ZA[846]) + ZR26(ZA[843]);
-    ZA[844] = ZMa(ZA[835], ZA[830], ZA[840]) + ZR30(ZA[840]);
-    ZA[678] = ZA[648] + ZA[677];
-    ZA[676] = ZA[671] + K[37];
-    
-    ZA[682] = ZR25(ZA[619]) + ZA[616];
-    ZA[679] = ZR15(ZA[667]) + ZA[678];
-    ZA[851] = ZA[833] + ZA[676];
-    ZA[848] = ZA[830] + ZA[847];
-    ZA[845] = ZA[842] + ZA[844];
-    
-    ZA[852] = (ZCh(ZA[848], ZA[843], ZA[838]) + ZA[851]) + ZR26(ZA[848]);
-    ZA[849] = ZMa(ZA[840], ZA[835], ZA[845]) + ZR30(ZA[845]);
-    ZA[683] = ZA[651] + ZA[682];
-    ZA[680] = ZA[679] + K[38];
-    
-    ZA[686] = ZR25(ZA[622]) + ZA[619];
-    ZA[684] = ZR15(ZA[671]) + ZA[683];
-    ZA[856] = ZA[838] + ZA[680];
-    ZA[853] = ZA[835] + ZA[852];
-    ZA[850] = ZA[847] + ZA[849];
-    
-    ZA[857] = (ZCh(ZA[853], ZA[848], ZA[843]) + ZA[856]) + ZR26(ZA[853]);
-    ZA[854] = ZMa(ZA[845], ZA[840], ZA[850]) + ZR30(ZA[850]);
-    ZA[687] = ZA[655] + ZA[686];
-    ZA[685] = ZA[684] + K[39];
-    
-    ZA[690] = ZR25(ZA[627]) + ZA[622];
-    ZA[688] = ZR15(ZA[679]) + ZA[687];
-    ZA[861] = ZA[843] + ZA[685];
-    ZA[858] = ZA[840] + ZA[857];
-    ZA[855] = ZA[852] + ZA[854];
-    
-    ZA[862] = (ZCh(ZA[858], ZA[853], ZA[848]) + ZA[861]) + ZR26(ZA[858]);
-    ZA[859] = ZMa(ZA[850], ZA[845], ZA[855]) + ZR30(ZA[855]);
-    ZA[691] = ZA[659] + ZA[690];
-    ZA[689] = ZA[688] + K[40];
-    
-    ZA[694] = ZR25(ZA[631]) + ZA[627];
-    ZA[692] = ZR15(ZA[684]) + ZA[691];
-    ZA[866] = ZA[848] + ZA[689];
-    ZA[863] = ZA[845] + ZA[862];
-    ZA[860] = ZA[857] + ZA[859];
-    
-    ZA[867] = (ZCh(ZA[863], ZA[858], ZA[853]) + ZA[866]) + ZR26(ZA[863]);
-    ZA[864] = ZMa(ZA[855], ZA[850], ZA[860]) + ZR30(ZA[860]);
-    ZA[695] = ZA[663] + ZA[694];
-    ZA[693] = ZA[692] + K[41];
-    
-    ZA[698] = ZR25(ZA[636]) + ZA[631];
-    ZA[696] = ZR15(ZA[688]) + ZA[695];
-    ZA[871] = ZA[853] + ZA[693];
-    ZA[868] = ZA[850] + ZA[867];
-    ZA[865] = ZA[862] + ZA[864];
-    
-    ZA[873] = (ZCh(ZA[868], ZA[863], ZA[858]) + ZA[871]) + ZR26(ZA[868]);
-    ZA[869] = ZMa(ZA[860], ZA[855], ZA[865]) + ZR30(ZA[865]);
-    ZA[699] = ZA[667] + ZA[698];
-    ZA[697] = ZA[696] + K[42];
-    
-    ZA[702] = ZR25(ZA[640]) + ZA[636];
-    ZA[700] = ZR15(ZA[692]) + ZA[699];
-    ZA[877] = ZA[858] + ZA[697];
-    ZA[874] = ZA[855] + ZA[873];
-    ZA[870] = ZA[867] + ZA[869];
-    
-    ZA[878] = (ZCh(ZA[874], ZA[868], ZA[863]) + ZA[877]) + ZR26(ZA[874]);
-    ZA[875] = ZMa(ZA[865], ZA[860], ZA[870]) + ZR30(ZA[870]);
-    ZA[703] = ZA[671] + ZA[702];
-    ZA[701] = ZA[700] + K[43];
-    
-    ZA[706] = ZR25(ZA[644]) + ZA[640];
-    ZA[704] = ZR15(ZA[696]) + ZA[703];
-    ZA[882] = ZA[863] + ZA[701];
-    ZA[879] = ZA[860] + ZA[878];
-    ZA[876] = ZA[873] + ZA[875];
-    
-    ZA[883] = (ZCh(ZA[879], ZA[874], ZA[868]) + ZA[882]) + ZR26(ZA[879]);
-    ZA[880] = ZMa(ZA[870], ZA[865], ZA[876]) + ZR30(ZA[876]);
-    ZA[707] = ZA[679] + ZA[706];
-    ZA[705] = ZA[704] + K[44];
-    
-    ZA[710] = ZR25(ZA[648]) + ZA[644];
-    ZA[708] = ZR15(ZA[700]) + ZA[707];
-    ZA[887] = ZA[868] + ZA[705];
-    ZA[884] = ZA[865] + ZA[883];
-    ZA[881] = ZA[878] + ZA[880];
-    
-    ZA[888] = (ZCh(ZA[884], ZA[879], ZA[874]) + ZA[887]) + ZR26(ZA[884]);
-    ZA[885] = ZMa(ZA[876], ZA[870], ZA[881]) + ZR30(ZA[881]);
-    ZA[711] = ZA[684] + ZA[710];
-    ZA[709] = ZA[708] + K[45];
-    
-    ZA[714] = ZR25(ZA[651]) + ZA[648];
-    ZA[712] = ZR15(ZA[704]) + ZA[711];
-    ZA[892] = ZA[874] + ZA[709];
-    ZA[889] = ZA[870] + ZA[888];
-    ZA[886] = ZA[883] + ZA[885];
-    
-    ZA[893] = (ZCh(ZA[889], ZA[884], ZA[879]) + ZA[892]) + ZR26(ZA[889]);
-    ZA[890] = ZMa(ZA[881], ZA[876], ZA[886]) + ZR30(ZA[886]);
-    ZA[715] = ZA[688] + ZA[714];
-    ZA[713] = ZA[712] + K[46];
-    
-    ZA[718] = ZR25(ZA[655]) + ZA[651];
-    ZA[716] = ZR15(ZA[708]) + ZA[715];
-    ZA[897] = ZA[879] + ZA[713];
-    ZA[894] = ZA[876] + ZA[893];
-    ZA[891] = ZA[888] + ZA[890];
-    
-    ZA[898] = (ZCh(ZA[894], ZA[889], ZA[884]) + ZA[897]) + ZR26(ZA[894]);
-    ZA[895] = ZMa(ZA[886], ZA[881], ZA[891]) + ZR30(ZA[891]);
-    ZA[719] = ZA[692] + ZA[718];
-    ZA[717] = ZA[716] + K[47];
-    
-    ZA[722] = ZR25(ZA[659]) + ZA[655];
-    ZA[720] = ZR15(ZA[712]) + ZA[719];
-    ZA[902] = ZA[884] + ZA[717];
-    ZA[899] = ZA[881] + ZA[898];
-    ZA[896] = ZA[893] + ZA[895];
-    
-    ZA[903] = (ZCh(ZA[899], ZA[894], ZA[889]) + ZA[902]) + ZR26(ZA[899]);
-    ZA[900] = ZMa(ZA[891], ZA[886], ZA[896]) + ZR30(ZA[896]);
-    ZA[723] = ZA[696] + ZA[722];
-    ZA[721] = ZA[720] + K[48];
-    
-    ZA[672] = ZR25(ZA[663]) + ZA[659];
-    ZA[724] = ZR15(ZA[716]) + ZA[723];
-    ZA[907] = ZA[889] + ZA[721];
-    ZA[904] = ZA[886] + ZA[903];
-    ZA[901] = ZA[898] + ZA[900];
-    
-    ZA[908] = (ZCh(ZA[904], ZA[899], ZA[894]) + ZA[907]) + ZR26(ZA[904]);
-    ZA[905] = ZMa(ZA[896], ZA[891], ZA[901]) + ZR30(ZA[901]);
-    ZA[673] = ZR25(ZA[667]) + ZA[663];
-    ZA[726] = ZA[700] + ZA[672];
-    ZA[725] = ZA[724] + K[49];
-    
-    ZA[727] = ZR15(ZA[720]) + ZA[726];
-    ZA[912] = ZA[894] + ZA[725];
-    ZA[909] = ZA[891] + ZA[908];
-    ZA[906] = ZA[903] + ZA[905];
-    ZA[675] = ZA[667] + K[52];
-    ZA[729] = ZA[704] + ZA[673];
-    
-    ZA[913] = (ZCh(ZA[909], ZA[904], ZA[899]) + ZA[912]) + ZR26(ZA[909]);
-    ZA[910] = ZMa(ZA[901], ZA[896], ZA[906]) + ZR30(ZA[906]);
-    ZA[674] = ZR25(ZA[671]) + ZA[675];
-    ZA[730] = ZR15(ZA[724]) + ZA[729];
-    ZA[728] = ZA[727] + K[50];
-    
-    ZA[681] = ZR25(ZA[679]) + ZA[671];
-    ZA[917] = ZA[899] + ZA[901] + ZA[728];
-    ZA[914] = ZA[896] + ZA[913];
-    ZA[911] = ZA[908] + ZA[910];
-    ZA[732] = ZA[708] + ZA[674];
-    ZA[731] = ZA[730] + K[51];
-    
-    ZA[918] = (ZCh(ZA[914], ZA[909], ZA[904]) + ZA[917]) + ZR26(ZA[914]);
-    ZA[915] = ZMa(ZA[906], ZA[901], ZA[911]) + ZR30(ZA[911]);
-    ZA[733] = ZR15(ZA[727]) + ZA[732];
-    ZA[919] = ZA[906] + ZA[904] + ZA[731];
-    ZA[734] = ZA[712] + ZA[681];
-    
-    ZA[920] = (ZCh(ZA[918], ZA[914], ZA[909]) + ZA[919]) + ZR26(ZA[918]);
-    ZA[735] = ZR15(ZA[730]) + ZA[734];
-    ZA[921] = ZA[911] + ZA[909] + ZA[733];
-    ZA[916] = ZA[913] + ZA[915];
-    
-    ZA[922] = (ZCh(ZA[920], ZA[918], ZA[914]) + ZA[921]) + ZR26(ZA[920]);
-    ZA[923] = ZA[916] + ZA[914] + ZA[735];
-    
-    ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
-    
-#define FOUND (0x0F)
-#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-
-#if defined(VECTORS4)
-	bool result = any(ZA[924] == K[79]);
-
-	if (result) {
-		if (ZA[924].x == K[79])
-			SETFOUND(Znonce.x);
-		if (ZA[924].y == K[79])
-			SETFOUND(Znonce.y);
-		if (ZA[924].z == K[79])
-			SETFOUND(Znonce.z);
-		if (ZA[924].w == K[79])
-			SETFOUND(Znonce.w);
-	}
-#elif defined(VECTORS2)
-	bool result = any(ZA[924] == K[79]);
-
-	if (result) {
-		if (ZA[924].x == K[79])
-			SETFOUND(Znonce.x);
-		if (ZA[924].y == K[79])
-			SETFOUND(Znonce.y);
-	}
-#else
-	if (ZA[924] == K[79])
-		SETFOUND(Znonce);
-#endif
-}

+ 0 - 599
diakgcn121016.cl

@@ -1,599 +0,0 @@
-// DiaKGCN 27-04-2012 - OpenCL kernel by Diapolo
-//
-// Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3.
-// The kernel was rewritten by me (Diapolo) and is still public-domain!
-
-#ifdef VECTORS4
-	typedef uint4 u;
-#elif defined VECTORS2
-	typedef uint2 u;
-#else
-	typedef uint u;
-#endif
-
-#ifdef BITALIGN
-	#pragma OPENCL EXTENSION cl_amd_media_ops : enable
-	#ifdef BFI_INT
-		#define ch(x, y, z) amd_bytealign(x, y, z)
-		#define ma(x, y, z) amd_bytealign(z ^ x, y, x)
-	#else
-		#define ch(x, y, z) bitselect(z, y, x)
-		#define ma(z, x, y) bitselect(z, y, z ^ x)
-	#endif
-#else
-	#define ch(x, y, z) (z ^ (x & (y ^ z)))
-	#define ma(x, y, z) ((x & z) | (y & (x | z)))
-#endif
-
-#define rotr15(n) (rotate(n, 15U) ^ rotate(n, 13U) ^ (n >> 10U))
-#define rotr25(n) (rotate(n, 25U) ^ rotate(n, 14U) ^ (n >> 3U))
-#define rotr26(n) (rotate(n, 26U) ^ rotate(n, 21U) ^ rotate(n, 7U))
-#define rotr30(n) (rotate(n, 30U) ^ rotate(n, 19U) ^ rotate(n, 10U))
-
-__kernel
-	__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-	void search(	
-#ifndef GOFFSET
-			const u base,
-#endif
-			const uint PreVal0, const uint PreVal4,
-			const uint H1, const uint D1A, const uint B1, const uint C1,
-			const uint F1, const uint G1, const uint C1addK5, const uint B1addK6, const uint PreVal0addK7,
-			const uint W16addK16, const uint W17addK17,
-			const uint PreW18, const uint PreW19,
-			const uint W16, const uint W17,
-			const uint PreW31, const uint PreW32,
-			const uint state0, const uint state1, const uint state2, const uint state3,
-			const uint state4, const uint state5, const uint state6, const uint state7,
-			const uint state0A, const uint state0B,
-			const uint state1A, const uint state2A, const uint state3A, const uint state4A,
-			const uint state5A, const uint state6A, const uint state7A,
-			volatile __global uint * output)
-{
-	u V[8];
-	u W[16];
-
-#ifdef VECTORS4
-	const u nonce = (uint)(get_local_id(0)) * 4U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base;
-#elif defined VECTORS2
-	const u nonce = (uint)(get_local_id(0)) * 2U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base;
-#else
-	#ifdef GOFFSET
-		const u nonce = (uint)(get_global_id(0));
-	#else
-		const u nonce = (uint)(get_local_id(0)) + (uint)(get_group_id(0)) * (uint)(WORKSIZE) + base;
-	#endif
-#endif
-
-	V[0] = PreVal0 + nonce;
-	V[1] = B1;
-	V[2] = C1;
-	V[3] = D1A;
-	V[4] = PreVal4 + nonce;
-	V[5] = F1;
-	V[6] = G1;
-	V[7] = H1;
-
-	V[7] += V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += C1addK5 + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  C1addK5 + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += B1addK6 + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  B1addK6 + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += PreVal0addK7 + nonce + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  PreVal0addK7 + nonce + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-	V[3] += 0xd807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0xd807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-	V[1] += 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0xc19bf3f4U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0xc19bf3f4U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-	V[3] += W16addK16 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  W16addK16 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += W17addK17 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  W17addK17 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-//----------------------------------------------------------------------------------
-
-#ifdef VECTORS4
-	 W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U, rotr25(nonce.x) ^ 0x4008000U, rotr25(nonce.x) ^ 0x600c000U);
-#elif defined VECTORS2
-	 W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U);
-#else
-	 W[0] = PreW18 + rotr25(nonce);
-#endif
-	 W[1] = PreW19 + nonce;
-	 W[2] = 0x80000000U + rotr15(W[0]);
-	 W[3] = rotr15(W[1]);
-	 W[4] = 0x00000280U + rotr15(W[2]);
-	 W[5] = W16 + rotr15(W[3]);
-	 W[6] = W17 + rotr15(W[4]);
-	 W[7] = W[0] + rotr15(W[5]);
-	 W[8] = W[1] + rotr15(W[6]);
-	 W[9] = W[2] + rotr15(W[7]);
-	W[10] = W[3] + rotr15(W[8]);
-	W[11] = W[4] + rotr15(W[9]);
-	W[12] = W[5] + 0x00a00055U + rotr15(W[10]);
-	W[13] = W[6] + PreW31 + rotr15(W[11]);
-	W[14] = W[7] + PreW32 + rotr15(W[12]);
-	W[15] = W[8] + W17 + rotr15(W[13]) + rotr25(W[0]);
-
-	V[1] += 0x0fc19dc6U + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + W[0];
-	V[5] =  0x0fc19dc6U + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + W[0] + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0x2de92c6fU + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x2de92c6fU + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0x4a7484aaU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x4a7484aaU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0x5cb0a9dcU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x5cb0a9dcU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0x76f988daU + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x76f988daU + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-	V[3] += 0x983e5152U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x983e5152U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0xa831c66dU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0xa831c66dU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-	V[1] += 0xb00327c8U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0xb00327c8U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0xbf597fc7U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0xbf597fc7U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0xc6e00bf3U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0xc6e00bf3U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0xd5a79147U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0xd5a79147U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0x06ca6351U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x06ca6351U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0x14292967U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x14292967U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-	V[3] += 0x27b70a85U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x27b70a85U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0x2e1b2138U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x2e1b2138U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-//----------------------------------------------------------------------------------
-
-	 W[0] =  W[0] +  W[9] + rotr15(W[14]) + rotr25( W[1]);
-	 W[1] =  W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]);
-	 W[2] =  W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]);
-	 W[3] =  W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]);
-	 W[4] =  W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]);
-	 W[5] =  W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]);
-	 W[6] =  W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]);
-	 W[7] =  W[7] +  W[0] + rotr15( W[5]) + rotr25( W[8]);
-	 W[8] =  W[8] +  W[1] + rotr15( W[6]) + rotr25( W[9]);
-	 W[9] =  W[9] +  W[2] + rotr15( W[7]) + rotr25(W[10]);
-	W[10] = W[10] +  W[3] + rotr15( W[8]) + rotr25(W[11]);
-	W[11] = W[11] +  W[4] + rotr15( W[9]) + rotr25(W[12]);
-	W[12] = W[12] +  W[5] + rotr15(W[10]) + rotr25(W[13]);
-	W[13] = W[13] +  W[6] + rotr15(W[11]) + rotr25(W[14]);
-	W[14] = W[14] +  W[7] + rotr15(W[12]) + rotr25(W[15]);
-	W[15] = W[15] +  W[8] + rotr15(W[13]) + rotr25( W[0]);
-
-	V[1] += 0x4d2c6dfcU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x4d2c6dfcU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0x53380d13U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x53380d13U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0x650a7354U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x650a7354U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0x766a0abbU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x766a0abbU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0x81c2c92eU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x81c2c92eU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0x92722c85U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x92722c85U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-	V[3] += 0xa2bfe8a1U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0xa2bfe8a1U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0xa81a664bU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0xa81a664bU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-	V[1] += 0xc24b8b70U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0xc24b8b70U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0xc76c51a3U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0xc76c51a3U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0xd192e819U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0xd192e819U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0xd6990624U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0xd6990624U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0xf40e3585U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0xf40e3585U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0x106aa070U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x106aa070U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-	V[3] += 0x19a4c116U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x19a4c116U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0x1e376c08U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x1e376c08U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-//----------------------------------------------------------------------------------
-
-	 W[0] =  W[0] +  W[9] + rotr15(W[14]) + rotr25( W[1]);
-	 W[1] =  W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]);
-	 W[2] =  W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]);
-	 W[3] =  W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]);
-	 W[4] =  W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]);
-	 W[5] =  W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]);
-	 W[6] =  W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]);
-	 W[7] =  W[7] +  W[0] + rotr15( W[5]) + rotr25( W[8]);
-	 W[8] =  W[8] +  W[1] + rotr15( W[6]) + rotr25( W[9]);
-	 W[9] =  W[9] +  W[2] + rotr15( W[7]) + rotr25(W[10]);
-	W[10] = W[10] +  W[3] + rotr15( W[8]) + rotr25(W[11]);
-	W[11] = W[11] +  W[4] + rotr15( W[9]) + rotr25(W[12]);
-	W[12] = W[12] +  W[5] + rotr15(W[10]) + rotr25(W[13]);
-	W[13] = W[13] +  W[6] + rotr15(W[11]) + rotr25(W[14]);
-
-	V[1] += 0x2748774cU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x2748774cU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0x34b0bcb5U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x34b0bcb5U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0x391c0cb3U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x391c0cb3U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0x4ed8aa4aU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x4ed8aa4aU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0x5b9cca4fU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x5b9cca4fU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0x682e6ff3U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x682e6ff3U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-	V[3] += 0x748f82eeU + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x748f82eeU + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0x78a5636fU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x78a5636fU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-	V[1] += 0x84c87814U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x84c87814U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0x8cc70208U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x8cc70208U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0x90befffaU + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x90befffaU + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0xa4506cebU + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0xa4506cebU + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0xbef9a3f7U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0xbef9a3f7U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0xc67178f2U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0xc67178f2U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-//----------------------------------------------------------------------------------
-
-	 W[0] = state0 + V[0] + rotr25(state1 + V[1]);
-	 W[1] = state1 + V[1] + 0x00a00000U + rotr25(state2 + V[2]);
-	 W[2] = state2 + V[2] + rotr15(W[0]) + rotr25(state3 + V[3]);
-	 W[3] = state3 + V[3] + rotr15(W[1]) + rotr25(state4 + V[4]);
-	 W[4] = state4 + V[4] + rotr15(W[2]) + rotr25(state5 + V[5]);
-	 W[5] = state5 + V[5] + rotr15(W[3]) + rotr25(state6 + V[6]);
-	 W[6] = state6 + V[6] + 0x00000100U + rotr15(W[4]) + rotr25(state7 + V[7]);	
-	 W[7] = state7 + V[7] + W[0] + 0x11002000U + rotr15(W[5]);
-	 W[8] = W[1] + 0x80000000U + rotr15(W[6]);	
-	 W[9] = W[2] + rotr15(W[7]);
-	W[10] = W[3] + rotr15(W[8]);
-	W[11] = W[4] + rotr15(W[9]);
-	W[12] = W[5] + rotr15(W[10]);
-	W[13] = W[6] + rotr15(W[11]);
-	W[14] = W[7] + 0x00400022U + rotr15(W[12]);
-	W[15] = W[8] + 0x00000100U + rotr15(W[13]) + rotr25(W[0]);
-
-	// 0x71374491U + 0x1f83d9abU + state1
-	const u state1AaddV1 = state1A + V[1];
-	// 0xb5c0fbcfU + 0x9b05688cU + state2
-	const u state2AaddV2 = state2A + V[2];
-	// 0x510e527fU + 0xe9b5dba5U + state3
-	const u state3AaddV3 = state3A + V[3];
-	// 0x3956c25bU + state4
-	const u state4AaddV4 = state4A + V[4];
-	// 0x59f111f1U + state5
-	const u state5AaddV5 = state5A + V[5];
-	// 0x923f82a4U + state6
-	const u state6AaddV6 = state6A + V[6];
-	// 0xab1c5ed5U + state7
-	const u state7AaddV7 = state7A + V[7];
-
-	// 0x98c7e2a2U + state0	
-	V[3] = state0A + V[0];
-	// 0xfc08884dU + state0
-	V[7] = state0B + V[0];
-	V[0] = 0x6a09e667U;
-	V[1] = 0xbb67ae85U;
-	V[2] = 0x3c6ef372U;
-	V[4] = 0x510e527fU;
-	V[5] = 0x9b05688cU;
-	V[6] = 0x1f83d9abU;
-
-	V[2] += state1AaddV1 + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  state1AaddV1 + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-	V[1] += state2AaddV2 + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  state2AaddV2 + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += state3AaddV3 + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  state3AaddV3 + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += state4AaddV4 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  state4AaddV4 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += state5AaddV5 + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  state5AaddV5 + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += state6AaddV6 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  state6AaddV6 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += state7AaddV7 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  state7AaddV7 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-	V[3] += 0x5807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x5807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-	V[1] += 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0xc19bf274U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0xc19bf274U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-	V[3] += 0xe49b69c1U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0xe49b69c1U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0xefbe4786U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0xefbe4786U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-	V[1] += 0x0fc19dc6U + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x0fc19dc6U + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0x240ca1ccU + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x240ca1ccU + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0x2de92c6fU + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x2de92c6fU + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0x4a7484aaU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x4a7484aaU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0x5cb0a9dcU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x5cb0a9dcU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0x76f988daU + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x76f988daU + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-	V[3] += 0x983e5152U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x983e5152U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0xa831c66dU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0xa831c66dU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-	V[1] += 0xb00327c8U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0xb00327c8U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0xbf597fc7U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0xbf597fc7U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0xc6e00bf3U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0xc6e00bf3U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0xd5a79147U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0xd5a79147U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0x06ca6351U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x06ca6351U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0x14292967U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x14292967U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-//----------------------------------------------------------------------------------
-
-	 W[0] =  W[0] +  W[9] + rotr15(W[14]) + rotr25( W[1]);
-	 W[1] =  W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]);
-	 W[2] =  W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]);
-	 W[3] =  W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]);
-	 W[4] =  W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]);
-	 W[5] =  W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]);
-	 W[6] =  W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]);
-	 W[7] =  W[7] +  W[0] + rotr15( W[5]) + rotr25( W[8]);
-	 W[8] =  W[8] +  W[1] + rotr15( W[6]) + rotr25( W[9]);
-	 W[9] =  W[9] +  W[2] + rotr15( W[7]) + rotr25(W[10]);
-	W[10] = W[10] +  W[3] + rotr15( W[8]) + rotr25(W[11]);
-	W[11] = W[11] +  W[4] + rotr15( W[9]) + rotr25(W[12]);
-	W[12] = W[12] +  W[5] + rotr15(W[10]) + rotr25(W[13]);
-	W[13] = W[13] +  W[6] + rotr15(W[11]) + rotr25(W[14]);
-	W[14] = W[14] +  W[7] + rotr15(W[12]) + rotr25(W[15]);
-	W[15] = W[15] +  W[8] + rotr15(W[13]) + rotr25( W[0]);
-
-	V[3] += 0x27b70a85U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x27b70a85U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0x2e1b2138U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x2e1b2138U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-	V[1] += 0x4d2c6dfcU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x4d2c6dfcU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0x53380d13U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x53380d13U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0x650a7354U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x650a7354U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0x766a0abbU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x766a0abbU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0x81c2c92eU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x81c2c92eU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0x92722c85U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x92722c85U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-	V[3] += 0xa2bfe8a1U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0xa2bfe8a1U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0xa81a664bU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0xa81a664bU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-	V[1] += 0xc24b8b70U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0xc24b8b70U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0xc76c51a3U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0xc76c51a3U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0xd192e819U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0xd192e819U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0xd6990624U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0xd6990624U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0xf40e3585U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0xf40e3585U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0x106aa070U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x106aa070U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-//----------------------------------------------------------------------------------
-
-	 W[0] =  W[0] +  W[9] + rotr15(W[14]) + rotr25( W[1]);
-	 W[1] =  W[1] + W[10] + rotr15(W[15]) + rotr25( W[2]);
-	 W[2] =  W[2] + W[11] + rotr15( W[0]) + rotr25( W[3]);
-	 W[3] =  W[3] + W[12] + rotr15( W[1]) + rotr25( W[4]);
-	 W[4] =  W[4] + W[13] + rotr15( W[2]) + rotr25( W[5]);
-	 W[5] =  W[5] + W[14] + rotr15( W[3]) + rotr25( W[6]);
-	 W[6] =  W[6] + W[15] + rotr15( W[4]) + rotr25( W[7]);
-	 W[7] =  W[7] +  W[0] + rotr15( W[5]) + rotr25( W[8]);
-	 W[8] =  W[8] +  W[1] + rotr15( W[6]) + rotr25( W[9]);
-	 W[9] =  W[9] +  W[2] + rotr15( W[7]) + rotr25(W[10]);
-	W[10] = W[10] +  W[3] + rotr15( W[8]) + rotr25(W[11]);
-	W[11] = W[11] +  W[4] + rotr15( W[9]) + rotr25(W[12]);
-	W[12] = W[12] +  W[5] + rotr15(W[10]) + rotr25(W[13]);
-
-	V[3] += 0x19a4c116U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x19a4c116U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0x1e376c08U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x1e376c08U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
-
-	V[1] += 0x2748774cU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x2748774cU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
-
-	V[0] += 0x34b0bcb5U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x34b0bcb5U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
-
-	V[7] += 0x391c0cb3U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x391c0cb3U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
-
-	V[6] += 0x4ed8aa4aU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x4ed8aa4aU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
-
-	V[5] += 0x5b9cca4fU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x5b9cca4fU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
-
-	V[4] += 0x682e6ff3U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x682e6ff3U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-
-	V[3] += 0x748f82eeU + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x748f82eeU + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
-
-	V[2] += 0x78a5636fU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-
-	V[1] += 0x84c87814U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-
-	V[0] += 0x8cc70208U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-
-	V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-
-#define FOUND (0x0F)
-#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-
-#ifdef VECTORS4
-	if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) {
-		if (V[7].x == 0x136032edU)
-			SETFOUND(nonce.x);
-		if (V[7].y == 0x136032edU)
-			SETFOUND(nonce.y);
-		if (V[7].z == 0x136032edU)
-			SETFOUND(nonce.z);
-		if (V[7].w == 0x136032edU)
-			SETFOUND(nonce.w);
-	}
-#elif defined VECTORS2
-	if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU)) {
-		if (V[7].x == 0x136032edU)
-			SETFOUND(nonce.x);
-		if (V[7].y == 0x136032edU)
-			SETFOUND(nonce.y);
-	}
-#else
-	if (V[7] == 0x136032edU)
-		SETFOUND(nonce);
-#endif
-}

+ 7 - 7
driver-bitforce.c

@@ -483,18 +483,18 @@ re_send:
 	memcpy(ob + 8 + 32, work->data + 64, 12);
 	memcpy(ob + 8 + 32, work->data + 64, 12);
 	if (!bitforce->nonce_range) {
 	if (!bitforce->nonce_range) {
 		sprintf((char *)ob + 8 + 32 + 12, ">>>>>>>>");
 		sprintf((char *)ob + 8 + 32 + 12, ">>>>>>>>");
-		work->blk.nonce = bitforce->nonces = 0xffffffff;
+		work->nonce = bitforce->nonces = 0xffffffff;
 		len = 60;
 		len = 60;
 	} else {
 	} else {
 		uint32_t *nonce;
 		uint32_t *nonce;
 
 
 		nonce = (uint32_t *)(ob + 8 + 32 + 12);
 		nonce = (uint32_t *)(ob + 8 + 32 + 12);
-		*nonce = htobe32(work->blk.nonce);
+		*nonce = htobe32(work->nonce);
 		nonce = (uint32_t *)(ob + 8 + 32 + 12 + 4);
 		nonce = (uint32_t *)(ob + 8 + 32 + 12 + 4);
 		/* Split work up into 1/5th nonce ranges */
 		/* Split work up into 1/5th nonce ranges */
 		bitforce->nonces = 0x33333332;
 		bitforce->nonces = 0x33333332;
-		*nonce = htobe32(work->blk.nonce + bitforce->nonces);
-		work->blk.nonce += bitforce->nonces + 1;
+		*nonce = htobe32(work->nonce + bitforce->nonces);
+		work->nonce += bitforce->nonces + 1;
 		sprintf((char *)ob + 8 + 32 + 12 + 8, ">>>>>>>>");
 		sprintf((char *)ob + 8 + 32 + 12 + 8, ">>>>>>>>");
 		len = 68;
 		len = 68;
 	}
 	}
@@ -634,12 +634,12 @@ static int64_t bitforce_get_result(struct thr_info *thr, struct work *work)
 #ifndef __BIG_ENDIAN__
 #ifndef __BIG_ENDIAN__
 		nonce = swab32(nonce);
 		nonce = swab32(nonce);
 #endif
 #endif
-		if (unlikely(bitforce->nonce_range && (nonce >= work->blk.nonce ||
-			(work->blk.nonce > 0 && nonce < work->blk.nonce - bitforce->nonces - 1)))) {
+		if (unlikely(bitforce->nonce_range && (nonce >= work->nonce ||
+			(work->nonce > 0 && nonce < work->nonce - bitforce->nonces - 1)))) {
 				applog(LOG_WARNING, "%s%i: Disabling broken nonce range support",
 				applog(LOG_WARNING, "%s%i: Disabling broken nonce range support",
 					bitforce->drv->name, bitforce->device_id);
 					bitforce->drv->name, bitforce->device_id);
 				bitforce->nonce_range = false;
 				bitforce->nonce_range = false;
-				work->blk.nonce = 0xffffffff;
+				work->nonce = 0xffffffff;
 				bitforce->sleep_ms *= 5;
 				bitforce->sleep_ms *= 5;
 				bitforce->kname = KNAME_WORK;
 				bitforce->kname = KNAME_WORK;
 		}
 		}

+ 0 - 2
driver-icarus.c

@@ -1143,8 +1143,6 @@ static int64_t icarus_scanwork(struct thr_info *thr)
 	if (ret == ICA_NONCE_ERROR)
 	if (ret == ICA_NONCE_ERROR)
 		goto out;
 		goto out;
 
 
-	work->blk.nonce = 0xffffffff;
-
 	// aborted before becoming idle, get new work
 	// aborted before becoming idle, get new work
 	if (ret == ICA_NONCE_TIMEOUT || ret == ICA_NONCE_RESTART) {
 	if (ret == ICA_NONCE_TIMEOUT || ret == ICA_NONCE_RESTART) {
 		timersub(&tv_finish, &tv_start, &elapsed);
 		timersub(&tv_finish, &tv_start, &elapsed);

+ 1 - 1
driver-modminer.c

@@ -1029,7 +1029,7 @@ tryagain:
 	if (hashes > 0xffffffff)
 	if (hashes > 0xffffffff)
 		hashes = 0xffffffff;
 		hashes = 0xffffffff;
 
 
-	work->blk.nonce = 0xffffffff;
+	work->nonce = 0xffffffff;
 
 
 	return hashes;
 	return hashes;
 }
 }

+ 0 - 1591
driver-opencl.c

@@ -1,1591 +0,0 @@
-/*
- * Copyright 2011-2012 Con Kolivas
- * Copyright 2011-2012 Luke Dashjr
- * Copyright 2010 Jeff Garzik
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.  See COPYING for more details.
- */
-
-#include "config.h"
-
-#ifdef HAVE_CURSES
-#include <curses.h>
-#endif
-
-#include <string.h>
-#include <stdbool.h>
-#include <stdint.h>
-
-#include <sys/types.h>
-
-#ifndef WIN32
-#include <sys/resource.h>
-#endif
-#include <ccan/opt/opt.h>
-
-#include "compat.h"
-#include "miner.h"
-#include "driver-opencl.h"
-#include "findnonce.h"
-#include "ocl.h"
-#include "adl.h"
-#include "util.h"
-
-/* TODO: cleanup externals ********************/
-
-#ifdef HAVE_CURSES
-extern WINDOW *mainwin, *statuswin, *logwin;
-extern void enable_curses(void);
-#endif
-
-extern int mining_threads;
-extern double total_secs;
-extern int opt_g_threads;
-extern bool opt_loginput;
-extern char *opt_kernel_path;
-extern int gpur_thr_id;
-extern bool opt_noadl;
-extern bool have_opencl;
-
-extern void *miner_thread(void *userdata);
-extern int dev_from_id(int thr_id);
-extern void decay_time(double *f, double fadd);
-
-/**********************************************/
-
-#ifdef HAVE_ADL
-extern float gpu_temp(int gpu);
-extern int gpu_fanspeed(int gpu);
-extern int gpu_fanpercent(int gpu);
-#endif
-
-#ifdef HAVE_OPENCL
-char *set_vector(char *arg)
-{
-	int i, val = 0, device = 0;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set vector";
-	val = atoi(nextptr);
-	if (val != 1 && val != 2 && val != 4)
-		return "Invalid value passed to set_vector";
-
-	gpus[device++].vwidth = val;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		val = atoi(nextptr);
-		if (val != 1 && val != 2 && val != 4)
-			return "Invalid value passed to set_vector";
-
-		gpus[device++].vwidth = val;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++)
-			gpus[i].vwidth = gpus[0].vwidth;
-	}
-
-	return NULL;
-}
-
-char *set_worksize(char *arg)
-{
-	int i, val = 0, device = 0;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set work size";
-	val = atoi(nextptr);
-	if (val < 1 || val > 9999)
-		return "Invalid value passed to set_worksize";
-
-	gpus[device++].work_size = val;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		val = atoi(nextptr);
-		if (val < 1 || val > 9999)
-			return "Invalid value passed to set_worksize";
-
-		gpus[device++].work_size = val;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++)
-			gpus[i].work_size = gpus[0].work_size;
-	}
-
-	return NULL;
-}
-
-#ifdef USE_SCRYPT
-char *set_shaders(char *arg)
-{
-	int i, val = 0, device = 0;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set lookup gap";
-	val = atoi(nextptr);
-
-	gpus[device++].shaders = val;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		val = atoi(nextptr);
-
-		gpus[device++].shaders = val;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++)
-			gpus[i].shaders = gpus[0].shaders;
-	}
-
-	return NULL;
-}
-
-char *set_lookup_gap(char *arg)
-{
-	int i, val = 0, device = 0;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set lookup gap";
-	val = atoi(nextptr);
-
-	gpus[device++].opt_lg = val;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		val = atoi(nextptr);
-
-		gpus[device++].opt_lg = val;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++)
-			gpus[i].opt_lg = gpus[0].opt_lg;
-	}
-
-	return NULL;
-}
-
-char *set_thread_concurrency(char *arg)
-{
-	int i, val = 0, device = 0;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set thread concurrency";
-	val = atoi(nextptr);
-
-	gpus[device++].opt_tc = val;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		val = atoi(nextptr);
-
-		gpus[device++].opt_tc = val;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++)
-			gpus[i].opt_tc = gpus[0].opt_tc;
-	}
-
-	return NULL;
-}
-#endif
-
-static enum cl_kernels select_kernel(char *arg)
-{
-	if (!strcmp(arg, "diablo"))
-		return KL_DIABLO;
-	if (!strcmp(arg, "diakgcn"))
-		return KL_DIAKGCN;
-	if (!strcmp(arg, "poclbm"))
-		return KL_POCLBM;
-	if (!strcmp(arg, "phatk"))
-		return KL_PHATK;
-#ifdef USE_SCRYPT
-	if (!strcmp(arg, "scrypt"))
-		return KL_SCRYPT;
-#endif
-	return KL_NONE;
-}
-
-char *set_kernel(char *arg)
-{
-	enum cl_kernels kern;
-	int i, device = 0;
-	char *nextptr;
-
-	if (opt_scrypt)
-		return "Cannot specify a kernel with scrypt";
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set kernel";
-	kern = select_kernel(nextptr);
-	if (kern == KL_NONE)
-		return "Invalid parameter to set_kernel";
-	gpus[device++].kernel = kern;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		kern = select_kernel(nextptr);
-		if (kern == KL_NONE)
-			return "Invalid parameter to set_kernel";
-
-		gpus[device++].kernel = kern;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++)
-			gpus[i].kernel = gpus[0].kernel;
-	}
-
-	return NULL;
-}
-#endif
-
-#ifdef HAVE_ADL
-/* This function allows us to map an adl device to an opencl device for when
- * simple enumeration has failed to match them. */
-char *set_gpu_map(char *arg)
-{
-	int val1 = 0, val2 = 0;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set gpu map";
-	if (sscanf(arg, "%d:%d", &val1, &val2) != 2)
-		return "Invalid description for map pair";
-	if (val1 < 0 || val1 > MAX_GPUDEVICES || val2 < 0 || val2 > MAX_GPUDEVICES)
-		return "Invalid value passed to set_gpu_map";
-
-	gpus[val1].virtual_adl = val2;
-	gpus[val1].mapped = true;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		if (sscanf(nextptr, "%d:%d", &val1, &val2) != 2)
-			return "Invalid description for map pair";
-		if (val1 < 0 || val1 > MAX_GPUDEVICES || val2 < 0 || val2 > MAX_GPUDEVICES)
-			return "Invalid value passed to set_gpu_map";
-		gpus[val1].virtual_adl = val2;
-		gpus[val1].mapped = true;
-	}
-
-	return NULL;
-}
-
-char *set_gpu_engine(char *arg)
-{
-	int i, val1 = 0, val2 = 0, device = 0;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set gpu engine";
-	get_intrange(nextptr, &val1, &val2);
-	if (val1 < 0 || val1 > 9999 || val2 < 0 || val2 > 9999)
-		return "Invalid value passed to set_gpu_engine";
-
-	gpus[device].min_engine = val1;
-	gpus[device].gpu_engine = val2;
-	device++;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		get_intrange(nextptr, &val1, &val2);
-		if (val1 < 0 || val1 > 9999 || val2 < 0 || val2 > 9999)
-			return "Invalid value passed to set_gpu_engine";
-		gpus[device].min_engine = val1;
-		gpus[device].gpu_engine = val2;
-		device++;
-	}
-
-	if (device == 1) {
-		for (i = 1; i < MAX_GPUDEVICES; i++) {
-			gpus[i].min_engine = gpus[0].min_engine;
-			gpus[i].gpu_engine = gpus[0].gpu_engine;
-		}
-	}
-
-	return NULL;
-}
-
-char *set_gpu_fan(char *arg)
-{
-	int i, val1 = 0, val2 = 0, device = 0;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set gpu fan";
-	get_intrange(nextptr, &val1, &val2);
-	if (val1 < 0 || val1 > 100 || val2 < 0 || val2 > 100)
-		return "Invalid value passed to set_gpu_fan";
-
-	gpus[device].min_fan = val1;
-	gpus[device].gpu_fan = val2;
-	device++;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		get_intrange(nextptr, &val1, &val2);
-		if (val1 < 0 || val1 > 100 || val2 < 0 || val2 > 100)
-			return "Invalid value passed to set_gpu_fan";
-
-		gpus[device].min_fan = val1;
-		gpus[device].gpu_fan = val2;
-		device++;
-	}
-
-	if (device == 1) {
-		for (i = 1; i < MAX_GPUDEVICES; i++) {
-			gpus[i].min_fan = gpus[0].min_fan;
-			gpus[i].gpu_fan = gpus[0].gpu_fan;
-		}
-	}
-
-	return NULL;
-}
-
-char *set_gpu_memclock(char *arg)
-{
-	int i, val = 0, device = 0;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set gpu memclock";
-	val = atoi(nextptr);
-	if (val < 0 || val >= 9999)
-		return "Invalid value passed to set_gpu_memclock";
-
-	gpus[device++].gpu_memclock = val;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		val = atoi(nextptr);
-		if (val < 0 || val >= 9999)
-			return "Invalid value passed to set_gpu_memclock";
-
-		gpus[device++].gpu_memclock = val;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++)
-			gpus[i].gpu_memclock = gpus[0].gpu_memclock;
-	}
-
-	return NULL;
-}
-
-char *set_gpu_memdiff(char *arg)
-{
-	int i, val = 0, device = 0;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set gpu memdiff";
-	val = atoi(nextptr);
-	if (val < -9999 || val > 9999)
-		return "Invalid value passed to set_gpu_memdiff";
-
-	gpus[device++].gpu_memdiff = val;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		val = atoi(nextptr);
-		if (val < -9999 || val > 9999)
-			return "Invalid value passed to set_gpu_memdiff";
-
-		gpus[device++].gpu_memdiff = val;
-	}
-		if (device == 1) {
-			for (i = device; i < MAX_GPUDEVICES; i++)
-				gpus[i].gpu_memdiff = gpus[0].gpu_memdiff;
-		}
-
-			return NULL;
-}
-
-char *set_gpu_powertune(char *arg)
-{
-	int i, val = 0, device = 0;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set gpu powertune";
-	val = atoi(nextptr);
-	if (val < -99 || val > 99)
-		return "Invalid value passed to set_gpu_powertune";
-
-	gpus[device++].gpu_powertune = val;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		val = atoi(nextptr);
-		if (val < -99 || val > 99)
-			return "Invalid value passed to set_gpu_powertune";
-
-		gpus[device++].gpu_powertune = val;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++)
-			gpus[i].gpu_powertune = gpus[0].gpu_powertune;
-	}
-
-	return NULL;
-}
-
-char *set_gpu_vddc(char *arg)
-{
-	int i, device = 0;
-	float val = 0;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set gpu vddc";
-	val = atof(nextptr);
-	if (val < 0 || val >= 9999)
-		return "Invalid value passed to set_gpu_vddc";
-
-	gpus[device++].gpu_vddc = val;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		val = atof(nextptr);
-		if (val < 0 || val >= 9999)
-			return "Invalid value passed to set_gpu_vddc";
-
-		gpus[device++].gpu_vddc = val;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++)
-			gpus[i].gpu_vddc = gpus[0].gpu_vddc;
-	}
-
-	return NULL;
-}
-
-char *set_temp_overheat(char *arg)
-{
-	int i, val = 0, device = 0, *to;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set temp overheat";
-	val = atoi(nextptr);
-	if (val < 0 || val > 200)
-		return "Invalid value passed to set temp overheat";
-
-	to = &gpus[device++].adl.overtemp;
-	*to = val;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		val = atoi(nextptr);
-		if (val < 0 || val > 200)
-			return "Invalid value passed to set temp overheat";
-
-		to = &gpus[device++].adl.overtemp;
-		*to = val;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++) {
-			to = &gpus[i].adl.overtemp;
-			*to = val;
-		}
-	}
-
-	return NULL;
-}
-
-char *set_temp_target(char *arg)
-{
-	int i, val = 0, device = 0, *tt;
-	char *nextptr;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set temp target";
-	val = atoi(nextptr);
-	if (val < 0 || val > 200)
-		return "Invalid value passed to set temp target";
-
-	tt = &gpus[device++].adl.targettemp;
-	*tt = val;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		val = atoi(nextptr);
-		if (val < 0 || val > 200)
-			return "Invalid value passed to set temp target";
-
-		tt = &gpus[device++].adl.targettemp;
-		*tt = val;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++) {
-			tt = &gpus[i].adl.targettemp;
-			*tt = val;
-		}
-	}
-
-	return NULL;
-}
-#endif
-#ifdef HAVE_OPENCL
-char *set_intensity(char *arg)
-{
-	int i, device = 0, *tt;
-	char *nextptr, val = 0;
-
-	nextptr = strtok(arg, ",");
-	if (nextptr == NULL)
-		return "Invalid parameters for set intensity";
-	if (!strncasecmp(nextptr, "d", 1))
-		gpus[device].dynamic = true;
-	else {
-		gpus[device].dynamic = false;
-		val = atoi(nextptr);
-		if (val < MIN_INTENSITY || val > MAX_GPU_INTENSITY)
-			return "Invalid value passed to set intensity";
-		tt = &gpus[device].intensity;
-		*tt = val;
-	}
-
-	device++;
-
-	while ((nextptr = strtok(NULL, ",")) != NULL) {
-		if (!strncasecmp(nextptr, "d", 1))
-			gpus[device].dynamic = true;
-		else {
-			gpus[device].dynamic = false;
-			val = atoi(nextptr);
-			if (val < MIN_INTENSITY || val > MAX_GPU_INTENSITY)
-				return "Invalid value passed to set intensity";
-
-			tt = &gpus[device].intensity;
-			*tt = val;
-		}
-		device++;
-	}
-	if (device == 1) {
-		for (i = device; i < MAX_GPUDEVICES; i++) {
-			gpus[i].dynamic = gpus[0].dynamic;
-			gpus[i].intensity = gpus[0].intensity;
-		}
-	}
-
-	return NULL;
-}
-
-void print_ndevs(int *ndevs)
-{
-	opt_log_output = true;
-	opencl_drv.drv_detect(false);
-	clear_adl(*ndevs);
-	applog(LOG_INFO, "%i GPU devices max detected", *ndevs);
-}
-#endif
-
-struct cgpu_info gpus[MAX_GPUDEVICES]; /* Maximum number apparently possible */
-struct cgpu_info *cpus;
-
-#ifdef HAVE_OPENCL
-
-/* In dynamic mode, only the first thread of each device will be in use.
- * This potentially could start a thread that was stopped with the start-stop
- * options if one were to disable dynamic from the menu on a paused GPU */
-void pause_dynamic_threads(int gpu)
-{
-	struct cgpu_info *cgpu = &gpus[gpu];
-	int i;
-
-	for (i = 1; i < cgpu->threads; i++) {
-		struct thr_info *thr;
-
-		thr = get_thread(i);
-		if (!thr->pause && cgpu->dynamic) {
-			applog(LOG_WARNING, "Disabling extra threads due to dynamic mode.");
-			applog(LOG_WARNING, "Tune dynamic intensity with --gpu-dyninterval");
-		}
-
-		thr->pause = cgpu->dynamic;
-		if (!cgpu->dynamic && cgpu->deven != DEV_DISABLED)
-			cgsem_post(&thr->sem);
-	}
-}
-
-#endif /* HAVE_OPENCL */
-
-#if defined(HAVE_OPENCL) && defined(HAVE_CURSES)
-void manage_gpu(void)
-{
-	struct thr_info *thr;
-	int selected, gpu, i;
-	char checkin[40];
-	char input;
-
-	if (!opt_g_threads)
-		return;
-
-	opt_loginput = true;
-	immedok(logwin, true);
-	clear_logwin();
-retry:
-
-	for (gpu = 0; gpu < nDevs; gpu++) {
-		struct cgpu_info *cgpu = &gpus[gpu];
-		double displayed_rolling, displayed_total;
-		bool mhash_base = true;
-
-		displayed_rolling = cgpu->rolling;
-		displayed_total = cgpu->total_mhashes / total_secs;
-		if (displayed_rolling < 1) {
-			displayed_rolling *= 1000;
-			displayed_total *= 1000;
-			mhash_base = false;
-		}
-
-		wlog("GPU %d: %.1f / %.1f %sh/s | A:%d  R:%d  HW:%d  U:%.2f/m  I:%d\n",
-			gpu, displayed_rolling, displayed_total, mhash_base ? "M" : "K",
-			cgpu->accepted, cgpu->rejected, cgpu->hw_errors,
-			cgpu->utility, cgpu->intensity);
-#ifdef HAVE_ADL
-		if (gpus[gpu].has_adl) {
-			int engineclock = 0, memclock = 0, activity = 0, fanspeed = 0, fanpercent = 0, powertune = 0;
-			float temp = 0, vddc = 0;
-
-			if (gpu_stats(gpu, &temp, &engineclock, &memclock, &vddc, &activity, &fanspeed, &fanpercent, &powertune)) {
-				char logline[255];
-
-				strcpy(logline, ""); // In case it has no data
-				if (temp != -1)
-					sprintf(logline, "%.1f C  ", temp);
-				if (fanspeed != -1 || fanpercent != -1) {
-					tailsprintf(logline, sizeof(logline), "F: ");
-					if (fanpercent != -1)
-						tailsprintf(logline, sizeof(logline), "%d%% ", fanpercent);
-					if (fanspeed != -1)
-						tailsprintf(logline, sizeof(logline), "(%d RPM) ", fanspeed);
-					tailsprintf(logline, sizeof(logline), " ");
-				}
-				if (engineclock != -1)
-					tailsprintf(logline, sizeof(logline), "E: %d MHz  ", engineclock);
-				if (memclock != -1)
-					tailsprintf(logline, sizeof(logline), "M: %d Mhz  ", memclock);
-				if (vddc != -1)
-					tailsprintf(logline, sizeof(logline), "V: %.3fV  ", vddc);
-				if (activity != -1)
-					tailsprintf(logline, sizeof(logline), "A: %d%%  ", activity);
-				if (powertune != -1)
-					tailsprintf(logline, sizeof(logline), "P: %d%%", powertune);
-				tailsprintf(logline, sizeof(logline), "\n");
-				_wlog(logline);
-			}
-		}
-#endif
-		wlog("Last initialised: %s\n", cgpu->init);
-		wlog("Intensity: ");
-		if (gpus[gpu].dynamic)
-			wlog("Dynamic (only one thread in use)\n");
-		else
-			wlog("%d\n", gpus[gpu].intensity);
-		for (i = 0; i < mining_threads; i++) {
-			thr = get_thread(i);
-			if (thr->cgpu != cgpu)
-				continue;
-			get_datestamp(checkin, sizeof(checkin), &thr->last);
-			displayed_rolling = thr->rolling;
-			if (!mhash_base)
-				displayed_rolling *= 1000;
-			wlog("Thread %d: %.1f %sh/s %s ", i, displayed_rolling, mhash_base ? "M" : "K" , cgpu->deven != DEV_DISABLED ? "Enabled" : "Disabled");
-			switch (cgpu->status) {
-				default:
-				case LIFE_WELL:
-					wlog("ALIVE");
-					break;
-				case LIFE_SICK:
-					wlog("SICK reported in %s", checkin);
-					break;
-				case LIFE_DEAD:
-					wlog("DEAD reported in %s", checkin);
-					break;
-				case LIFE_INIT:
-				case LIFE_NOSTART:
-					wlog("Never started");
-					break;
-			}
-			if (thr->pause)
-				wlog(" paused");
-			wlog("\n");
-		}
-		wlog("\n");
-	}
-
-	wlogprint("[E]nable [D]isable [I]ntensity [R]estart GPU %s\n",adl_active ? "[C]hange settings" : "");
-
-	wlogprint("Or press any other key to continue\n");
-	logwin_update();
-	input = getch();
-
-	if (nDevs == 1)
-		selected = 0;
-	else
-		selected = -1;
-	if (!strncasecmp(&input, "e", 1)) {
-		struct cgpu_info *cgpu;
-
-		if (selected)
-			selected = curses_int("Select GPU to enable");
-		if (selected < 0 || selected >= nDevs) {
-			wlogprint("Invalid selection\n");
-			goto retry;
-		}
-		if (gpus[selected].deven != DEV_DISABLED) {
-			wlogprint("Device already enabled\n");
-			goto retry;
-		}
-		gpus[selected].deven = DEV_ENABLED;
-		for (i = 0; i < mining_threads; ++i) {
-			thr = get_thread(i);
-			cgpu = thr->cgpu;
-			if (cgpu->drv->drv_id != DRIVER_opencl)
-				continue;
-			if (dev_from_id(i) != selected)
-				continue;
-			if (cgpu->status != LIFE_WELL) {
-				wlogprint("Must restart device before enabling it");
-				goto retry;
-			}
-			applog(LOG_DEBUG, "Pushing sem post to thread %d", thr->id);
-
-			cgsem_post(&thr->sem);
-		}
-		goto retry;
-	} if (!strncasecmp(&input, "d", 1)) {
-		if (selected)
-			selected = curses_int("Select GPU to disable");
-		if (selected < 0 || selected >= nDevs) {
-			wlogprint("Invalid selection\n");
-			goto retry;
-		}
-		if (gpus[selected].deven == DEV_DISABLED) {
-			wlogprint("Device already disabled\n");
-			goto retry;
-		}
-		gpus[selected].deven = DEV_DISABLED;
-		goto retry;
-	} else if (!strncasecmp(&input, "i", 1)) {
-		int intensity;
-		char *intvar;
-
-		if (selected)
-			selected = curses_int("Select GPU to change intensity on");
-		if (selected < 0 || selected >= nDevs) {
-			wlogprint("Invalid selection\n");
-			goto retry;
-		}
-		if (opt_scrypt) {
-			intvar = curses_input("Set GPU scan intensity (d or "
-					      MIN_SCRYPT_INTENSITY_STR " -> "
-					      MAX_SCRYPT_INTENSITY_STR ")");
-		} else {
-			intvar = curses_input("Set GPU scan intensity (d or "
-					      MIN_SHA_INTENSITY_STR " -> "
-					      MAX_SHA_INTENSITY_STR ")");
-		}
-		if (!intvar) {
-			wlogprint("Invalid input\n");
-			goto retry;
-		}
-		if (!strncasecmp(intvar, "d", 1)) {
-			wlogprint("Dynamic mode enabled on gpu %d\n", selected);
-			gpus[selected].dynamic = true;
-			pause_dynamic_threads(selected);
-			free(intvar);
-			goto retry;
-		}
-		intensity = atoi(intvar);
-		free(intvar);
-		if (intensity < MIN_INTENSITY || intensity > MAX_INTENSITY) {
-			wlogprint("Invalid selection\n");
-			goto retry;
-		}
-		gpus[selected].dynamic = false;
-		gpus[selected].intensity = intensity;
-		wlogprint("Intensity on gpu %d set to %d\n", selected, intensity);
-		pause_dynamic_threads(selected);
-		goto retry;
-	} else if (!strncasecmp(&input, "r", 1)) {
-		if (selected)
-			selected = curses_int("Select GPU to attempt to restart");
-		if (selected < 0 || selected >= nDevs) {
-			wlogprint("Invalid selection\n");
-			goto retry;
-		}
-		wlogprint("Attempting to restart threads of GPU %d\n", selected);
-		reinit_device(&gpus[selected]);
-		goto retry;
-	} else if (adl_active && (!strncasecmp(&input, "c", 1))) {
-		if (selected)
-			selected = curses_int("Select GPU to change settings on");
-		if (selected < 0 || selected >= nDevs) {
-			wlogprint("Invalid selection\n");
-			goto retry;
-		}
-		change_gpusettings(selected);
-		goto retry;
-	} else
-		clear_logwin();
-
-	immedok(logwin, false);
-	opt_loginput = false;
-}
-#else
-void manage_gpu(void)
-{
-}
-#endif
-
-
-#ifdef HAVE_OPENCL
-static _clState *clStates[MAX_GPUDEVICES];
-
-#define CL_SET_BLKARG(blkvar) status |= clSetKernelArg(*kernel, num++, sizeof(uint), (void *)&blk->blkvar)
-#define CL_SET_ARG(var) status |= clSetKernelArg(*kernel, num++, sizeof(var), (void *)&var)
-#define CL_SET_VARG(args, var) status |= clSetKernelArg(*kernel, num++, args * sizeof(uint), (void *)var)
-
-static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
-{
-	cl_kernel *kernel = &clState->kernel;
-	unsigned int num = 0;
-	cl_int status = 0;
-
-	CL_SET_BLKARG(ctx_a);
-	CL_SET_BLKARG(ctx_b);
-	CL_SET_BLKARG(ctx_c);
-	CL_SET_BLKARG(ctx_d);
-	CL_SET_BLKARG(ctx_e);
-	CL_SET_BLKARG(ctx_f);
-	CL_SET_BLKARG(ctx_g);
-	CL_SET_BLKARG(ctx_h);
-
-	CL_SET_BLKARG(cty_b);
-	CL_SET_BLKARG(cty_c);
-
-	
-	CL_SET_BLKARG(cty_f);
-	CL_SET_BLKARG(cty_g);
-	CL_SET_BLKARG(cty_h);
-
-	if (!clState->goffset) {
-		cl_uint vwidth = clState->vwidth;
-		uint *nonces = alloca(sizeof(uint) * vwidth);
-		unsigned int i;
-
-		for (i = 0; i < vwidth; i++)
-			nonces[i] = blk->nonce + (i * threads);
-		CL_SET_VARG(vwidth, nonces);
-	}
-
-	CL_SET_BLKARG(fW0);
-	CL_SET_BLKARG(fW1);
-	CL_SET_BLKARG(fW2);
-	CL_SET_BLKARG(fW3);
-	CL_SET_BLKARG(fW15);
-	CL_SET_BLKARG(fW01r);
-
-	CL_SET_BLKARG(D1A);
-	CL_SET_BLKARG(C1addK5);
-	CL_SET_BLKARG(B1addK6);
-	CL_SET_BLKARG(W16addK16);
-	CL_SET_BLKARG(W17addK17);
-	CL_SET_BLKARG(PreVal4addT1);
-	CL_SET_BLKARG(PreVal0);
-
-	CL_SET_ARG(clState->outputBuffer);
-
-	return status;
-}
-
-static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
-				 __maybe_unused cl_uint threads)
-{
-	cl_kernel *kernel = &clState->kernel;
-	cl_uint vwidth = clState->vwidth;
-	unsigned int i, num = 0;
-	cl_int status = 0;
-	uint *nonces;
-
-	CL_SET_BLKARG(ctx_a);
-	CL_SET_BLKARG(ctx_b);
-	CL_SET_BLKARG(ctx_c);
-	CL_SET_BLKARG(ctx_d);
-	CL_SET_BLKARG(ctx_e);
-	CL_SET_BLKARG(ctx_f);
-	CL_SET_BLKARG(ctx_g);
-	CL_SET_BLKARG(ctx_h);
-
-	CL_SET_BLKARG(cty_b);
-	CL_SET_BLKARG(cty_c);
-	CL_SET_BLKARG(cty_d);
-	CL_SET_BLKARG(cty_f);
-	CL_SET_BLKARG(cty_g);
-	CL_SET_BLKARG(cty_h);
-
-	nonces = alloca(sizeof(uint) * vwidth);
-	for (i = 0; i < vwidth; i++)
-		nonces[i] = blk->nonce + i;
-	CL_SET_VARG(vwidth, nonces);
-
-	CL_SET_BLKARG(W16);
-	CL_SET_BLKARG(W17);
-	CL_SET_BLKARG(PreVal4_2);
-	CL_SET_BLKARG(PreVal0);
-	CL_SET_BLKARG(PreW18);
-	CL_SET_BLKARG(PreW19);
-	CL_SET_BLKARG(PreW31);
-	CL_SET_BLKARG(PreW32);
-
-	CL_SET_ARG(clState->outputBuffer);
-
-	return status;
-}
-
-static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
-				   __maybe_unused cl_uint threads)
-{
-	cl_kernel *kernel = &clState->kernel;
-	unsigned int num = 0;
-	cl_int status = 0;
-
-	if (!clState->goffset) {
-		cl_uint vwidth = clState->vwidth;
-		uint *nonces = alloca(sizeof(uint) * vwidth);
-		unsigned int i;
-		for (i = 0; i < vwidth; i++)
-			nonces[i] = blk->nonce + i;
-		CL_SET_VARG(vwidth, nonces);
-	}
-
-	CL_SET_BLKARG(PreVal0);
-	CL_SET_BLKARG(PreVal4_2);
-	CL_SET_BLKARG(cty_h);
-	CL_SET_BLKARG(D1A);
-	CL_SET_BLKARG(cty_b);
-	CL_SET_BLKARG(cty_c);
-	CL_SET_BLKARG(cty_f);
-	CL_SET_BLKARG(cty_g);
-	CL_SET_BLKARG(C1addK5);
-	CL_SET_BLKARG(B1addK6);
-	CL_SET_BLKARG(PreVal0addK7);
-	CL_SET_BLKARG(W16addK16);
-	CL_SET_BLKARG(W17addK17);
-	CL_SET_BLKARG(PreW18);
-	CL_SET_BLKARG(PreW19);
-	CL_SET_BLKARG(W16);
-	CL_SET_BLKARG(W17);
-	CL_SET_BLKARG(PreW31);
-	CL_SET_BLKARG(PreW32);
-
-	CL_SET_BLKARG(ctx_a);
-	CL_SET_BLKARG(ctx_b);
-	CL_SET_BLKARG(ctx_c);
-	CL_SET_BLKARG(ctx_d);
-	CL_SET_BLKARG(ctx_e);
-	CL_SET_BLKARG(ctx_f);
-	CL_SET_BLKARG(ctx_g);
-	CL_SET_BLKARG(ctx_h);
-
-	CL_SET_BLKARG(zeroA);
-	CL_SET_BLKARG(zeroB);
-
-	CL_SET_BLKARG(oneA);
-	CL_SET_BLKARG(twoA);
-	CL_SET_BLKARG(threeA);
-	CL_SET_BLKARG(fourA);
-	CL_SET_BLKARG(fiveA);
-	CL_SET_BLKARG(sixA);
-	CL_SET_BLKARG(sevenA);
-
-	CL_SET_ARG(clState->outputBuffer);
-
-	return status;
-}
-
-static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
-{
-	cl_kernel *kernel = &clState->kernel;
-	unsigned int num = 0;
-	cl_int status = 0;
-
-	if (!clState->goffset) {
-		cl_uint vwidth = clState->vwidth;
-		uint *nonces = alloca(sizeof(uint) * vwidth);
-		unsigned int i;
-
-		for (i = 0; i < vwidth; i++)
-			nonces[i] = blk->nonce + (i * threads);
-		CL_SET_VARG(vwidth, nonces);
-	}
-
-
-	CL_SET_BLKARG(PreVal0);
-	CL_SET_BLKARG(PreVal0addK7);
-	CL_SET_BLKARG(PreVal4addT1);
-	CL_SET_BLKARG(PreW18);
-	CL_SET_BLKARG(PreW19);
-	CL_SET_BLKARG(W16);
-	CL_SET_BLKARG(W17);
-	CL_SET_BLKARG(W16addK16);
-	CL_SET_BLKARG(W17addK17);
-	CL_SET_BLKARG(PreW31);
-	CL_SET_BLKARG(PreW32);
-
-	CL_SET_BLKARG(D1A);
-	CL_SET_BLKARG(cty_b);
-	CL_SET_BLKARG(cty_c);
-	CL_SET_BLKARG(cty_h);
-	CL_SET_BLKARG(cty_f);
-	CL_SET_BLKARG(cty_g);
-
-	CL_SET_BLKARG(C1addK5);
-	CL_SET_BLKARG(B1addK6);
-
-	CL_SET_BLKARG(ctx_a);
-	CL_SET_BLKARG(ctx_b);
-	CL_SET_BLKARG(ctx_c);
-	CL_SET_BLKARG(ctx_d);
-	CL_SET_BLKARG(ctx_e);
-	CL_SET_BLKARG(ctx_f);
-	CL_SET_BLKARG(ctx_g);
-	CL_SET_BLKARG(ctx_h);
-
-	CL_SET_ARG(clState->outputBuffer);
-
-	return status;
-}
-
-#ifdef USE_SCRYPT
-static cl_int queue_scrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
-{
-	unsigned char *midstate = blk->work->midstate;
-	cl_kernel *kernel = &clState->kernel;
-	unsigned int num = 0;
-	cl_uint le_target;
-	cl_int status = 0;
-
-	le_target = *(cl_uint *)(blk->work->device_target + 28);
-	clState->cldata = blk->work->data;
-	status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
-
-	CL_SET_ARG(clState->CLbuffer0);
-	CL_SET_ARG(clState->outputBuffer);
-	CL_SET_ARG(clState->padbuffer8);
-	CL_SET_VARG(4, &midstate[0]);
-	CL_SET_VARG(4, &midstate[16]);
-	CL_SET_ARG(le_target);
-
-	return status;
-}
-#endif
-
-static void set_threads_hashes(unsigned int vectors,int64_t *hashes, size_t *globalThreads,
-			       unsigned int minthreads, __maybe_unused int *intensity)
-{
-	unsigned int threads = 0;
-
-	while (threads < minthreads) {
-		threads = 1 << ((opt_scrypt ? 0 : 15) + *intensity);
-		if (threads < minthreads) {
-			if (likely(*intensity < MAX_INTENSITY))
-				(*intensity)++;
-			else
-				threads = minthreads;
-		}
-	}
-
-	*globalThreads = threads;
-	*hashes = threads * vectors;
-}
-#endif /* HAVE_OPENCL */
-
-
-#ifdef HAVE_OPENCL
-/* We have only one thread that ever re-initialises GPUs, thus if any GPU
- * init command fails due to a completely wedged GPU, the thread will never
- * return, unable to harm other GPUs. If it does return, it means we only had
- * a soft failure and then the reinit_gpu thread is ready to tackle another
- * GPU */
-void *reinit_gpu(void *userdata)
-{
-	struct thr_info *mythr = userdata;
-	struct cgpu_info *cgpu;
-	struct thr_info *thr;
-	struct timeval now;
-	char name[256];
-	int thr_id;
-	int gpu;
-
-	pthread_detach(pthread_self());
-
-select_cgpu:
-	cgpu = tq_pop(mythr->q, NULL);
-	if (!cgpu)
-		goto out;
-
-	if (clDevicesNum() != nDevs) {
-		applog(LOG_WARNING, "Hardware not reporting same number of active devices, will not attempt to restart GPU");
-		goto out;
-	}
-
-	gpu = cgpu->device_id;
-
-	for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
-		thr = get_thread(thr_id);
-		cgpu = thr->cgpu;
-		if (cgpu->drv->drv_id != DRIVER_opencl)
-			continue;
-		if (dev_from_id(thr_id) != gpu)
-			continue;
-
-		thr = get_thread(thr_id);
-		if (!thr) {
-			applog(LOG_WARNING, "No reference to thread %d exists", thr_id);
-			continue;
-		}
-
-		thr->rolling = thr->cgpu->rolling = 0;
-		/* Reports the last time we tried to revive a sick GPU */
-		cgtime(&thr->sick);
-		if (!pthread_cancel(thr->pth)) {
-			applog(LOG_WARNING, "Thread %d still exists, killing it off", thr_id);
-		} else
-			applog(LOG_WARNING, "Thread %d no longer exists", thr_id);
-	}
-
-	for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
-		int virtual_gpu;
-
-		thr = get_thread(thr_id);
-		cgpu = thr->cgpu;
-		if (cgpu->drv->drv_id != DRIVER_opencl)
-			continue;
-		if (dev_from_id(thr_id) != gpu)
-			continue;
-
-		virtual_gpu = cgpu->virtual_gpu;
-		/* Lose this ram cause we may get stuck here! */
-		//tq_freeze(thr->q);
-
-		thr->q = tq_new();
-		if (!thr->q)
-			quit(1, "Failed to tq_new in reinit_gpu");
-
-		/* Lose this ram cause we may dereference in the dying thread! */
-		//free(clState);
-
-		applog(LOG_INFO, "Reinit GPU thread %d", thr_id);
-		clStates[thr_id] = initCl(virtual_gpu, name, sizeof(name));
-		if (!clStates[thr_id]) {
-			applog(LOG_ERR, "Failed to reinit GPU thread %d", thr_id);
-			goto select_cgpu;
-		}
-		applog(LOG_INFO, "initCl() finished. Found %s", name);
-
-		if (unlikely(thr_info_create(thr, NULL, miner_thread, thr))) {
-			applog(LOG_ERR, "thread %d create failed", thr_id);
-			return NULL;
-		}
-		applog(LOG_WARNING, "Thread %d restarted", thr_id);
-	}
-
-	cgtime(&now);
-	get_datestamp(cgpu->init, sizeof(cgpu->init), &now);
-
-	for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
-		thr = get_thread(thr_id);
-		cgpu = thr->cgpu;
-		if (cgpu->drv->drv_id != DRIVER_opencl)
-			continue;
-		if (dev_from_id(thr_id) != gpu)
-			continue;
-
-		cgsem_post(&thr->sem);
-	}
-
-	goto select_cgpu;
-out:
-	return NULL;
-}
-#else
-void *reinit_gpu(__maybe_unused void *userdata)
-{
-	return NULL;
-}
-#endif
-
-
-#ifdef HAVE_OPENCL
-static void opencl_detect(bool hotplug)
-{
-	int i;
-
-	if (opt_nogpu || hotplug)
-		return;
-	nDevs = clDevicesNum();
-	if (nDevs < 0) {
-		applog(LOG_ERR, "clDevicesNum returned error, no GPUs usable");
-		nDevs = 0;
-	}
-
-	if (!nDevs)
-		return;
-
-	/* If opt_g_threads is not set, use default 1 thread on scrypt and
-	 * 2 for regular mining */
-	if (opt_g_threads == -1) {
-		if (opt_scrypt)
-			opt_g_threads = 1;
-		else
-			opt_g_threads = 2;
-	}
-
-	if (opt_scrypt)
-		opencl_drv.max_diff = 65536;
-
-	for (i = 0; i < nDevs; ++i) {
-		struct cgpu_info *cgpu;
-
-		cgpu = &gpus[i];
-		cgpu->deven = DEV_ENABLED;
-		cgpu->drv = &opencl_drv;
-		cgpu->device_id = i;
-		cgpu->threads = opt_g_threads;
-		cgpu->virtual_gpu = i;
-		add_cgpu(cgpu);
-	}
-
-	if (!opt_noadl)
-		init_adl(nDevs);
-}
-
-static void reinit_opencl_device(struct cgpu_info *gpu)
-{
-	tq_push(control_thr[gpur_thr_id].q, gpu);
-}
-
-#ifdef HAVE_ADL
-static void get_opencl_statline_before(char *buf, size_t bufsiz, struct cgpu_info *gpu)
-{
-	if (gpu->has_adl) {
-		int gpuid = gpu->device_id;
-		float gt = gpu_temp(gpuid);
-		int gf = gpu_fanspeed(gpuid);
-		int gp;
-
-		if (gt != -1)
-			tailsprintf(buf, bufsiz, "%5.1fC ", gt);
-		else
-			tailsprintf(buf, bufsiz, "       ");
-		if (gf != -1)
-			// show invalid as 9999
-			tailsprintf(buf, bufsiz, "%4dRPM ", gf > 9999 ? 9999 : gf);
-		else if ((gp = gpu_fanpercent(gpuid)) != -1)
-			tailsprintf(buf, bufsiz, "%3d%%    ", gp);
-		else
-			tailsprintf(buf, bufsiz, "        ");
-		tailsprintf(buf, bufsiz, "| ");
-	} else
-		gpu->drv->get_statline_before = &blank_get_statline_before;
-}
-#endif
-
-static void get_opencl_statline(char *buf, size_t bufsiz, struct cgpu_info *gpu)
-{
-	tailsprintf(buf, bufsiz, " I:%2d", gpu->intensity);
-}
-
-struct opencl_thread_data {
-	cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *, cl_uint);
-	uint32_t *res;
-};
-
-static uint32_t *blank_res;
-
-static bool opencl_thread_prepare(struct thr_info *thr)
-{
-	char name[256];
-	struct timeval now;
-	struct cgpu_info *cgpu = thr->cgpu;
-	int gpu = cgpu->device_id;
-	int virtual_gpu = cgpu->virtual_gpu;
-	int i = thr->id;
-	static bool failmessage = false;
-	int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
-
-	if (!blank_res)
-		blank_res = calloc(buffersize, 1);
-	if (!blank_res) {
-		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
-		return false;
-	}
-
-	strcpy(name, "");
-	applog(LOG_INFO, "Init GPU thread %i GPU %i virtual GPU %i", i, gpu, virtual_gpu);
-	clStates[i] = initCl(virtual_gpu, name, sizeof(name));
-	if (!clStates[i]) {
-#ifdef HAVE_CURSES
-		if (use_curses)
-			enable_curses();
-#endif
-		applog(LOG_ERR, "Failed to init GPU thread %d, disabling device %d", i, gpu);
-		if (!failmessage) {
-			applog(LOG_ERR, "Restarting the GPU from the menu will not fix this.");
-			applog(LOG_ERR, "Try restarting cgminer.");
-			failmessage = true;
-#ifdef HAVE_CURSES
-			char *buf;
-			if (use_curses) {
-				buf = curses_input("Press enter to continue");
-				if (buf)
-					free(buf);
-			}
-#endif
-		}
-		cgpu->deven = DEV_DISABLED;
-		cgpu->status = LIFE_NOSTART;
-
-		dev_error(cgpu, REASON_DEV_NOSTART);
-
-		return false;
-	}
-	if (!cgpu->name)
-		cgpu->name = strdup(name);
-	if (!cgpu->kname)
-	{
-		switch (clStates[i]->chosen_kernel) {
-			case KL_DIABLO:
-				cgpu->kname = "diablo";
-				break;
-			case KL_DIAKGCN:
-				cgpu->kname = "diakgcn";
-				break;
-			case KL_PHATK:
-				cgpu->kname = "phatk";
-				break;
-#ifdef USE_SCRYPT
-			case KL_SCRYPT:
-				cgpu->kname = "scrypt";
-				break;
-#endif
-			case KL_POCLBM:
-				cgpu->kname = "poclbm";
-				break;
-			default:
-				break;
-		}
-	}
-	applog(LOG_INFO, "initCl() finished. Found %s", name);
-	cgtime(&now);
-	get_datestamp(cgpu->init, sizeof(cgpu->init), &now);
-
-	have_opencl = true;
-
-	return true;
-}
-
-static bool opencl_thread_init(struct thr_info *thr)
-{
-	const int thr_id = thr->id;
-	struct cgpu_info *gpu = thr->cgpu;
-	struct opencl_thread_data *thrdata;
-	_clState *clState = clStates[thr_id];
-	cl_int status = 0;
-	thrdata = calloc(1, sizeof(*thrdata));
-	thr->cgpu_data = thrdata;
-	int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
-
-	if (!thrdata) {
-		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
-		return false;
-	}
-
-	switch (clState->chosen_kernel) {
-		case KL_POCLBM:
-			thrdata->queue_kernel_parameters = &queue_poclbm_kernel;
-			break;
-		case KL_PHATK:
-			thrdata->queue_kernel_parameters = &queue_phatk_kernel;
-			break;
-		case KL_DIAKGCN:
-			thrdata->queue_kernel_parameters = &queue_diakgcn_kernel;
-			break;
-#ifdef USE_SCRYPT
-		case KL_SCRYPT:
-			thrdata->queue_kernel_parameters = &queue_scrypt_kernel;
-			break;
-#endif
-		default:
-		case KL_DIABLO:
-			thrdata->queue_kernel_parameters = &queue_diablo_kernel;
-			break;
-	}
-
-	thrdata->res = calloc(buffersize, 1);
-
-	if (!thrdata->res) {
-		free(thrdata);
-		applog(LOG_ERR, "Failed to calloc in opencl_thread_init");
-		return false;
-	}
-
-	status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
-				       buffersize, blank_res, 0, NULL, NULL);
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
-		return false;
-	}
-
-	gpu->status = LIFE_WELL;
-
-	gpu->device_last_well = time(NULL);
-
-	return true;
-}
-
-
-static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
-{
-#ifdef USE_SCRYPT
-	if (opt_scrypt)
-		work->blk.work = work;
-	else
-#endif
-		precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
-	return true;
-}
-
-extern int opt_dynamic_interval;
-
-static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
-				int64_t __maybe_unused max_nonce)
-{
-	const int thr_id = thr->id;
-	struct opencl_thread_data *thrdata = thr->cgpu_data;
-	struct cgpu_info *gpu = thr->cgpu;
-	_clState *clState = clStates[thr_id];
-	const cl_kernel *kernel = &clState->kernel;
-	const int dynamic_us = opt_dynamic_interval * 1000;
-
-	cl_int status;
-	size_t globalThreads[1];
-	size_t localThreads[1] = { clState->wsize };
-	int64_t hashes;
-	int found = opt_scrypt ? SCRYPT_FOUND : FOUND;
-	int buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
-
-	/* Windows' timer resolution is only 15ms so oversample 5x */
-	if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) {
-		struct timeval tv_gpuend;
-		double gpu_us;
-
-		cgtime(&tv_gpuend);
-		gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
-		if (gpu_us > dynamic_us) {
-			if (gpu->intensity > MIN_INTENSITY)
-				--gpu->intensity;
-		} else if (gpu_us < dynamic_us / 2) {
-			if (gpu->intensity < MAX_INTENSITY)
-				++gpu->intensity;
-		}
-		memcpy(&(gpu->tv_gpustart), &tv_gpuend, sizeof(struct timeval));
-		gpu->intervals = 0;
-	}
-
-	set_threads_hashes(clState->vwidth, &hashes, globalThreads, localThreads[0], &gpu->intensity);
-	if (hashes > gpu->max_hashes)
-		gpu->max_hashes = hashes;
-
-	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
-		return -1;
-	}
-
-	if (clState->goffset) {
-		size_t global_work_offset[1];
-
-		global_work_offset[0] = work->blk.nonce;
-		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, global_work_offset,
-						globalThreads, localThreads, 0,  NULL, NULL);
-	} else
-		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
-						globalThreads, localThreads, 0,  NULL, NULL);
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error %d: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)", status);
-		return -1;
-	}
-
-	status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
-				     buffersize, thrdata->res, 0, NULL, NULL);
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
-		return -1;
-	}
-
-	/* The amount of work scanned can fluctuate when intensity changes
-	 * and since we do this one cycle behind, we increment the work more
-	 * than enough to prevent repeating work */
-	work->blk.nonce += gpu->max_hashes;
-
-	/* This finish flushes the readbuffer set with CL_FALSE in clEnqueueReadBuffer */
-	clFinish(clState->commandQueue);
-
-	/* FOUND entry is used as a counter to say how many nonces exist */
-	if (thrdata->res[found]) {
-		/* Clear the buffer again */
-		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
-					      buffersize, blank_res, 0, NULL, NULL);
-		if (unlikely(status != CL_SUCCESS)) {
-			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
-			return -1;
-		}
-		applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
-		postcalc_hash_async(thr, work, thrdata->res);
-		memset(thrdata->res, 0, buffersize);
-		/* This finish flushes the writebuffer set with CL_FALSE in clEnqueueWriteBuffer */
-		clFinish(clState->commandQueue);
-	}
-
-	return hashes;
-}
-
-static void opencl_thread_shutdown(struct thr_info *thr)
-{
-	const int thr_id = thr->id;
-	_clState *clState = clStates[thr_id];
-
-	clReleaseKernel(clState->kernel);
-	clReleaseProgram(clState->program);
-	clReleaseCommandQueue(clState->commandQueue);
-	clReleaseContext(clState->context);
-}
-
-struct device_drv opencl_drv = {
-	.drv_id = DRIVER_opencl,
-	.dname = "opencl",
-	.name = "GPU",
-	.drv_detect = opencl_detect,
-	.reinit_device = reinit_opencl_device,
-#ifdef HAVE_ADL
-	.get_statline_before = get_opencl_statline_before,
-#endif
-	.get_statline = get_opencl_statline,
-	.thread_prepare = opencl_thread_prepare,
-	.thread_init = opencl_thread_init,
-	.prepare_work = opencl_prepare_work,
-	.scanhash = opencl_scanhash,
-	.thread_shutdown = opencl_thread_shutdown,
-};
-#endif

+ 0 - 35
driver-opencl.h

@@ -1,35 +0,0 @@
-#ifndef __DEVICE_GPU_H__
-#define __DEVICE_GPU_H__
-
-#include "miner.h"
-
-
-extern void print_ndevs(int *ndevs);
-extern void *reinit_gpu(void *userdata);
-extern char *set_gpu_map(char *arg);
-extern char *set_gpu_engine(char *arg);
-extern char *set_gpu_fan(char *arg);
-extern char *set_gpu_memclock(char *arg);
-extern char *set_gpu_memdiff(char *arg);
-extern char *set_gpu_powertune(char *arg);
-extern char *set_gpu_vddc(char *arg);
-extern char *set_temp_overheat(char *arg);
-extern char *set_temp_target(char *arg);
-extern char *set_intensity(char *arg);
-extern char *set_vector(char *arg);
-extern char *set_worksize(char *arg);
-#ifdef USE_SCRYPT
-extern char *set_shaders(char *arg);
-extern char *set_lookup_gap(char *arg);
-extern char *set_thread_concurrency(char *arg);
-#endif
-extern char *set_kernel(char *arg);
-void manage_gpu(void);
-extern void pause_dynamic_threads(int gpu);
-
-extern bool have_opencl;
-extern int opt_platform_id;
-
-extern struct device_drv opencl_drv;
-
-#endif /* __DEVICE_GPU_H__ */

+ 0 - 234
findnonce.c

@@ -1,234 +0,0 @@
-/*
- * Copyright 2011-2013 Con Kolivas
- * Copyright 2011 Nils Schneider
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.  See COPYING for more details.
- */
-
-#include "config.h"
-#ifdef HAVE_OPENCL
-
-#include <stdio.h>
-#include <inttypes.h>
-#include <pthread.h>
-#include <string.h>
-
-#include "findnonce.h"
-#include "scrypt.h"
-
-const uint32_t SHA256_K[64] = {
-	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
-	0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
-	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
-	0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
-	0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
-	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
-	0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
-	0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
-	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-#define rotate(x,y) ((x<<y) | (x>>(sizeof(x)*8-y)))
-#define rotr(x,y) ((x>>y) | (x<<(sizeof(x)*8-y)))
-
-#define R(a, b, c, d, e, f, g, h, w, k) \
-	h = h + (rotate(e, 26) ^ rotate(e, 21) ^ rotate(e, 7)) + (g ^ (e & (f ^ g))) + k + w; \
-	d = d + h; \
-	h = h + (rotate(a, 30) ^ rotate(a, 19) ^ rotate(a, 10)) + ((a & b) | (c & (a | b)))
-
-void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
-{
-	cl_uint A, B, C, D, E, F, G, H;
-
-	A = state[0];
-	B = state[1];
-	C = state[2];
-	D = state[3];
-	E = state[4];
-	F = state[5];
-	G = state[6];
-	H = state[7];
-
-	R(A, B, C, D, E, F, G, H, data[0], SHA256_K[0]);
-	R(H, A, B, C, D, E, F, G, data[1], SHA256_K[1]);
-	R(G, H, A, B, C, D, E, F, data[2], SHA256_K[2]);
-
-	blk->cty_a = A;
-	blk->cty_b = B;
-	blk->cty_c = C;
-	blk->cty_d = D;
-
-	blk->D1A = D + 0xb956c25b;
-
-	blk->cty_e = E;
-	blk->cty_f = F;
-	blk->cty_g = G;
-	blk->cty_h = H;
-
-	blk->ctx_a = state[0];
-	blk->ctx_b = state[1];
-	blk->ctx_c = state[2];
-	blk->ctx_d = state[3];
-	blk->ctx_e = state[4];
-	blk->ctx_f = state[5];
-	blk->ctx_g = state[6];
-	blk->ctx_h = state[7];
-
-	blk->merkle = data[0];
-	blk->ntime = data[1];
-	blk->nbits = data[2];
-
-	blk->W16 = blk->fW0 = data[0] + (rotr(data[1], 7) ^ rotr(data[1], 18) ^ (data[1] >> 3));
-	blk->W17 = blk->fW1 = data[1] + (rotr(data[2], 7) ^ rotr(data[2], 18) ^ (data[2] >> 3)) + 0x01100000;
-	blk->PreVal4 = blk->fcty_e = blk->ctx_e + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5;
-	blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
-	blk->PreVal4_2 = blk->PreVal4 + blk->T1;
-	blk->PreVal0 = blk->PreVal4 + blk->ctx_a;
-	blk->PreW31 = 0x00000280 + (rotr(blk->W16,  7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3));
-	blk->PreW32 = blk->W16 + (rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3));
-	blk->PreW18 = data[2] + (rotr(blk->W16, 17) ^ rotr(blk->W16, 19) ^ (blk->W16 >> 10));
-	blk->PreW19 = 0x11002000 + (rotr(blk->W17, 17) ^ rotr(blk->W17, 19) ^ (blk->W17 >> 10));
-
-
-	blk->W2 = data[2];
-
-	blk->W2A = blk->W2 + (rotr(blk->W16, 19) ^ rotr(blk->W16, 17) ^ (blk->W16 >> 10));
-	blk->W17_2 = 0x11002000 + (rotr(blk->W17, 19) ^ rotr(blk->W17, 17) ^ (blk->W17 >> 10));
-
-	blk->fW2 = data[2] + (rotr(blk->fW0, 17) ^ rotr(blk->fW0, 19) ^ (blk->fW0 >> 10));
-	blk->fW3 = 0x11002000 + (rotr(blk->fW1, 17) ^ rotr(blk->fW1, 19) ^ (blk->fW1 >> 10));
-	blk->fW15 = 0x00000280 + (rotr(blk->fW0, 7) ^ rotr(blk->fW0, 18) ^ (blk->fW0 >> 3));
-	blk->fW01r = blk->fW0 + (rotr(blk->fW1, 7) ^ rotr(blk->fW1, 18) ^ (blk->fW1 >> 3));
-
-
-	blk->PreVal4addT1 = blk->PreVal4 + blk->T1;
-	blk->T1substate0 = blk->ctx_a - blk->T1;
-
-	blk->C1addK5 = blk->cty_c + SHA256_K[5];
-	blk->B1addK6 = blk->cty_b + SHA256_K[6];
-	blk->PreVal0addK7 = blk->PreVal0 + SHA256_K[7];
-	blk->W16addK16 = blk->W16 + SHA256_K[16];
-	blk->W17addK17 = blk->W17 + SHA256_K[17];
-
-	blk->zeroA = blk->ctx_a + 0x98c7e2a2;
-	blk->zeroB = blk->ctx_a + 0xfc08884d;
-	blk->oneA = blk->ctx_b + 0x90bb1e3c;
-	blk->twoA = blk->ctx_c + 0x50c6645b;
-	blk->threeA = blk->ctx_d + 0x3ac42e24;
-	blk->fourA = blk->ctx_e + SHA256_K[4];
-	blk->fiveA = blk->ctx_f + SHA256_K[5];
-	blk->sixA = blk->ctx_g + SHA256_K[6];
-	blk->sevenA = blk->ctx_h + SHA256_K[7];
-}
-
-#if 0 // not used any more
-
-#define P(t) (W[(t)&0xF] = W[(t-16)&0xF] + (rotate(W[(t-15)&0xF], 25) ^ rotate(W[(t-15)&0xF], 14) ^ (W[(t-15)&0xF] >> 3)) + W[(t-7)&0xF] + (rotate(W[(t-2)&0xF], 15) ^ rotate(W[(t-2)&0xF], 13) ^ (W[(t-2)&0xF] >> 10)))
-
-#define IR(u) \
-  R(A, B, C, D, E, F, G, H, W[u+0], SHA256_K[u+0]); \
-  R(H, A, B, C, D, E, F, G, W[u+1], SHA256_K[u+1]); \
-  R(G, H, A, B, C, D, E, F, W[u+2], SHA256_K[u+2]); \
-  R(F, G, H, A, B, C, D, E, W[u+3], SHA256_K[u+3]); \
-  R(E, F, G, H, A, B, C, D, W[u+4], SHA256_K[u+4]); \
-  R(D, E, F, G, H, A, B, C, W[u+5], SHA256_K[u+5]); \
-  R(C, D, E, F, G, H, A, B, W[u+6], SHA256_K[u+6]); \
-  R(B, C, D, E, F, G, H, A, W[u+7], SHA256_K[u+7])
-#define FR(u) \
-  R(A, B, C, D, E, F, G, H, P(u+0), SHA256_K[u+0]); \
-  R(H, A, B, C, D, E, F, G, P(u+1), SHA256_K[u+1]); \
-  R(G, H, A, B, C, D, E, F, P(u+2), SHA256_K[u+2]); \
-  R(F, G, H, A, B, C, D, E, P(u+3), SHA256_K[u+3]); \
-  R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
-  R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5]); \
-  R(C, D, E, F, G, H, A, B, P(u+6), SHA256_K[u+6]); \
-  R(B, C, D, E, F, G, H, A, P(u+7), SHA256_K[u+7])
-
-#define PIR(u) \
-  R(F, G, H, A, B, C, D, E, W[u+3], SHA256_K[u+3]); \
-  R(E, F, G, H, A, B, C, D, W[u+4], SHA256_K[u+4]); \
-  R(D, E, F, G, H, A, B, C, W[u+5], SHA256_K[u+5]); \
-  R(C, D, E, F, G, H, A, B, W[u+6], SHA256_K[u+6]); \
-  R(B, C, D, E, F, G, H, A, W[u+7], SHA256_K[u+7])
-
-#define PFR(u) \
-  R(A, B, C, D, E, F, G, H, P(u+0), SHA256_K[u+0]); \
-  R(H, A, B, C, D, E, F, G, P(u+1), SHA256_K[u+1]); \
-  R(G, H, A, B, C, D, E, F, P(u+2), SHA256_K[u+2]); \
-  R(F, G, H, A, B, C, D, E, P(u+3), SHA256_K[u+3]); \
-  R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
-  R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5])
-
-#endif
-
-struct pc_data {
-	struct thr_info *thr;
-	struct work *work;
-	uint32_t res[SCRYPT_MAXBUFFERS];
-	pthread_t pth;
-	int found;
-};
-
-static void *postcalc_hash(void *userdata)
-{
-	struct pc_data *pcd = (struct pc_data *)userdata;
-	struct thr_info *thr = pcd->thr;
-	unsigned int entry = 0;
-	int found = opt_scrypt ? SCRYPT_FOUND : FOUND;
-
-	pthread_detach(pthread_self());
-
-	/* To prevent corrupt values in FOUND from trying to read beyond the
-	 * end of the res[] array */
-	if (unlikely(pcd->res[found] & ~found)) {
-		applog(LOG_WARNING, "%s%d: invalid nonce count - HW error",
-				thr->cgpu->drv->name, thr->cgpu->device_id);
-		hw_errors++;
-		thr->cgpu->hw_errors++;
-		pcd->res[found] &= found;
-	}
-
-	for (entry = 0; entry < pcd->res[found]; entry++) {
-		uint32_t nonce = pcd->res[entry];
-
-		applog(LOG_DEBUG, "OCL NONCE %u found in slot %d", nonce, entry);
-		submit_nonce(thr, pcd->work, nonce);
-	}
-
-	discard_work(pcd->work);
-	free(pcd);
-
-	return NULL;
-}
-
-void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res)
-{
-	struct pc_data *pcd = malloc(sizeof(struct pc_data));
-	int buffersize;
-
-	if (unlikely(!pcd)) {
-		applog(LOG_ERR, "Failed to malloc pc_data in postcalc_hash_async");
-		return;
-	}
-
-	pcd->thr = thr;
-	pcd->work = copy_work(work);
-	buffersize = opt_scrypt ? SCRYPT_BUFFERSIZE : BUFFERSIZE;
-	memcpy(&pcd->res, res, buffersize);
-
-	if (pthread_create(&pcd->pth, NULL, postcalc_hash, (void *)pcd)) {
-		applog(LOG_ERR, "Failed to create postcalc_hash thread");
-		return;
-	}
-}
-#endif /* HAVE_OPENCL */

+ 0 - 19
findnonce.h

@@ -1,19 +0,0 @@
-#ifndef __FINDNONCE_H__
-#define __FINDNONCE_H__
-#include "miner.h"
-#include "config.h"
-
-#define MAXTHREADS (0xFFFFFFFEULL)
-#define MAXBUFFERS (0x10)
-#define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS)
-#define FOUND (0x0F)
-
-#define SCRYPT_MAXBUFFERS (0x100)
-#define SCRYPT_BUFFERSIZE (sizeof(uint32_t) * SCRYPT_MAXBUFFERS)
-#define SCRYPT_FOUND (0xFF)
-
-#ifdef HAVE_OPENCL
-extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
-extern void postcalc_hash_async(struct thr_info *thr, struct work *work, uint32_t *res);
-#endif /* HAVE_OPENCL */
-#endif /*__FINDNONCE_H__*/

+ 1 - 78
miner.h

@@ -35,14 +35,6 @@ extern char *curly;
 #include <semaphore.h>
 #include <semaphore.h>
 #endif
 #endif
 
 
-#ifdef HAVE_OPENCL
-#ifdef __APPLE_CC__
-#include <OpenCL/opencl.h>
-#else
-#include <CL/cl.h>
-#endif
-#endif /* HAVE_OPENCL */
-
 #ifdef STDC_HEADERS
 #ifdef STDC_HEADERS
 # include <stdlib.h>
 # include <stdlib.h>
 # include <stddef.h>
 # include <stddef.h>
@@ -252,7 +244,6 @@ static inline int fsync (int fd)
 	DRIVER_ADD_COMMAND(avalon)
 	DRIVER_ADD_COMMAND(avalon)
 
 
 #define DRIVER_PARSE_COMMANDS(DRIVER_ADD_COMMAND) \
 #define DRIVER_PARSE_COMMANDS(DRIVER_ADD_COMMAND) \
-	DRIVER_ADD_COMMAND(opencl) \
 	FPGA_PARSE_COMMANDS(DRIVER_ADD_COMMAND) \
 	FPGA_PARSE_COMMANDS(DRIVER_ADD_COMMAND) \
 	ASIC_PARSE_COMMANDS(DRIVER_ADD_COMMAND)
 	ASIC_PARSE_COMMANDS(DRIVER_ADD_COMMAND)
 
 
@@ -522,45 +513,12 @@ struct cgpu_info {
 	int64_t max_hashes;
 	int64_t max_hashes;
 
 
 	const char *kname;
 	const char *kname;
-#ifdef HAVE_OPENCL
-	bool mapped;
-	int virtual_gpu;
-	int virtual_adl;
-	int intensity;
-	bool dynamic;
-
-	cl_uint vwidth;
-	size_t work_size;
-	enum cl_kernels kernel;
-	cl_ulong max_alloc;
-
-#ifdef USE_SCRYPT
-	int opt_lg, lookup_gap;
-	size_t opt_tc, thread_concurrency;
-	size_t shaders;
-#endif
-	struct timeval tv_gpustart;
-	int intervals;
-#endif
 
 
 	bool new_work;
 	bool new_work;
 
 
 	float temp;
 	float temp;
 	int cutofftemp;
 	int cutofftemp;
 
 
-#ifdef HAVE_ADL
-	bool has_adl;
-	struct gpu_adl adl;
-
-	int gpu_engine;
-	int min_engine;
-	int gpu_fan;
-	int min_fan;
-	int gpu_memclock;
-	int gpu_memdiff;
-	int gpu_powertune;
-	float gpu_vddc;
-#endif
 	int diff1;
 	int diff1;
 	double diff_accepted;
 	double diff_accepted;
 	double diff_rejected;
 	double diff_rejected;
@@ -1202,40 +1160,6 @@ extern uint64_t best_diff;
 extern struct timeval block_timeval;
 extern struct timeval block_timeval;
 extern char *workpadding;
 extern char *workpadding;
 
 
-#ifdef HAVE_OPENCL
-typedef struct {
-	cl_uint ctx_a; cl_uint ctx_b; cl_uint ctx_c; cl_uint ctx_d;
-	cl_uint ctx_e; cl_uint ctx_f; cl_uint ctx_g; cl_uint ctx_h;
-	cl_uint cty_a; cl_uint cty_b; cl_uint cty_c; cl_uint cty_d;
-	cl_uint cty_e; cl_uint cty_f; cl_uint cty_g; cl_uint cty_h;
-	cl_uint merkle; cl_uint ntime; cl_uint nbits; cl_uint nonce;
-	cl_uint fW0; cl_uint fW1; cl_uint fW2; cl_uint fW3; cl_uint fW15;
-	cl_uint fW01r; cl_uint fcty_e; cl_uint fcty_e2;
-	cl_uint W16; cl_uint W17; cl_uint W2;
-	cl_uint PreVal4; cl_uint T1;
-	cl_uint C1addK5; cl_uint D1A; cl_uint W2A; cl_uint W17_2;
-	cl_uint PreVal4addT1; cl_uint T1substate0;
-	cl_uint PreVal4_2;
-	cl_uint PreVal0;
-	cl_uint PreW18;
-	cl_uint PreW19;
-	cl_uint PreW31;
-	cl_uint PreW32;
-
-	/* For diakgcn */
-	cl_uint B1addK6, PreVal0addK7, W16addK16, W17addK17;
-	cl_uint zeroA, zeroB;
-	cl_uint oneA, twoA, threeA, fourA, fiveA, sixA, sevenA;
-#ifdef USE_SCRYPT
-	struct work *work;
-#endif
-} dev_blk_ctx;
-#else
-typedef struct {
-	uint32_t nonce;
-} dev_blk_ctx;
-#endif
-
 struct curl_ent {
 struct curl_ent {
 	CURL *curl;
 	CURL *curl;
 	struct list_head node;
 	struct list_head node;
@@ -1413,8 +1337,7 @@ struct work {
 
 
 	int		rolls;
 	int		rolls;
 	int		drv_rolllimit; /* How much the driver can roll ntime */
 	int		drv_rolllimit; /* How much the driver can roll ntime */
-
-	dev_blk_ctx	blk;
+	uint32_t	nonce; /* For devices that hash sole work */
 
 
 	struct thr_info	*thr;
 	struct thr_info	*thr;
 	int		thr_id;
 	int		thr_id;

+ 0 - 848
ocl.c

@@ -1,848 +0,0 @@
-/*
- * Copyright 2011-2012 Con Kolivas
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.  See COPYING for more details.
- */
-
-#include "config.h"
-#ifdef HAVE_OPENCL
-
-#include <signal.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <limits.h>
-#include <sys/types.h>
-
-#ifdef WIN32
-	#include <winsock2.h>
-#else
-	#include <sys/socket.h>
-	#include <netinet/in.h>
-	#include <netdb.h>
-#endif
-
-#include <time.h>
-#include <sys/time.h>
-#include <pthread.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include "findnonce.h"
-#include "ocl.h"
-
-int opt_platform_id = -1;
-
-char *file_contents(const char *filename, int *length)
-{
-	char *fullpath = alloca(PATH_MAX);
-	void *buffer;
-	FILE *f;
-
-	strcpy(fullpath, opt_kernel_path);
-	strcat(fullpath, filename);
-
-	/* Try in the optional kernel path or installed prefix first */
-	f = fopen(fullpath, "rb");
-	if (!f) {
-		/* Then try from the path cgminer was called */
-		strcpy(fullpath, cgminer_path);
-		strcat(fullpath, filename);
-		f = fopen(fullpath, "rb");
-	}
-	/* Finally try opening it directly */
-	if (!f)
-		f = fopen(filename, "rb");
-
-	if (!f) {
-		applog(LOG_ERR, "Unable to open %s or %s for reading", filename, fullpath);
-		return NULL;
-	}
-
-	fseek(f, 0, SEEK_END);
-	*length = ftell(f);
-	fseek(f, 0, SEEK_SET);
-
-	buffer = malloc(*length+1);
-	*length = fread(buffer, 1, *length, f);
-	fclose(f);
-	((char*)buffer)[*length] = '\0';
-
-	return (char*)buffer;
-}
-
-int clDevicesNum(void) {
-	cl_int status;
-	char pbuff[256];
-	cl_uint numDevices;
-	cl_uint numPlatforms;
-	int most_devices = -1;
-	cl_platform_id *platforms;
-	cl_platform_id platform = NULL;
-	unsigned int i, mdplatform = 0;
-
-	status = clGetPlatformIDs(0, NULL, &numPlatforms);
-	/* If this fails, assume no GPUs. */
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: clGetPlatformsIDs failed (no OpenCL SDK installed?)", status);
-		return -1;
-	}
-
-	if (numPlatforms == 0) {
-		applog(LOG_ERR, "clGetPlatformsIDs returned no platforms (no OpenCL SDK installed?)");
-		return -1;
-	}
-
-	platforms = (cl_platform_id *)alloca(numPlatforms*sizeof(cl_platform_id));
-	status = clGetPlatformIDs(numPlatforms, platforms, NULL);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Getting Platform Ids. (clGetPlatformsIDs)", status);
-		return -1;
-	}
-
-	for (i = 0; i < numPlatforms; i++) {
-		if (opt_platform_id >= 0 && (int)i != opt_platform_id)
-			continue;
-
-		status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
-		if (status != CL_SUCCESS) {
-			applog(LOG_ERR, "Error %d: Getting Platform Info. (clGetPlatformInfo)", status);
-			return -1;
-		}
-		platform = platforms[i];
-		applog(LOG_INFO, "CL Platform %d vendor: %s", i, pbuff);
-		status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(pbuff), pbuff, NULL);
-		if (status == CL_SUCCESS)
-			applog(LOG_INFO, "CL Platform %d name: %s", i, pbuff);
-		status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(pbuff), pbuff, NULL);
-		if (status == CL_SUCCESS)
-			applog(LOG_INFO, "CL Platform %d version: %s", i, pbuff);
-		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
-		if (status != CL_SUCCESS) {
-			applog(LOG_INFO, "Error %d: Getting Device IDs (num)", status);
-			continue;
-		}
-		applog(LOG_INFO, "Platform %d devices: %d", i, numDevices);
-		if ((int)numDevices > most_devices) {
-			most_devices = numDevices;
-			mdplatform = i;
-		}
-		if (numDevices) {
-			unsigned int j;
-			cl_device_id *devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));
-
-			clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
-			for (j = 0; j < numDevices; j++) {
-				clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
-				applog(LOG_INFO, "\t%i\t%s", j, pbuff);
-			}
-			free(devices);
-		}
-	}
-
-	if (opt_platform_id < 0)
-		opt_platform_id = mdplatform;;
-
-	return most_devices;
-}
-
-static int advance(char **area, unsigned *remaining, const char *marker)
-{
-	char *find = memmem(*area, *remaining, marker, strlen(marker));
-
-	if (!find) {
-		applog(LOG_DEBUG, "Marker \"%s\" not found", marker);
-		return 0;
-	}
-	*remaining -= find - *area;
-	*area = find;
-	return 1;
-}
-
-#define OP3_INST_BFE_UINT	4ULL
-#define OP3_INST_BFE_INT	5ULL
-#define OP3_INST_BFI_INT	6ULL
-#define OP3_INST_BIT_ALIGN_INT	12ULL
-#define OP3_INST_BYTE_ALIGN_INT	13ULL
-
-void patch_opcodes(char *w, unsigned remaining)
-{
-	uint64_t *opcode = (uint64_t *)w;
-	int patched = 0;
-	int count_bfe_int = 0;
-	int count_bfe_uint = 0;
-	int count_byte_align = 0;
-	while (42) {
-		int clamp = (*opcode >> (32 + 31)) & 0x1;
-		int dest_rel = (*opcode >> (32 + 28)) & 0x1;
-		int alu_inst = (*opcode >> (32 + 13)) & 0x1f;
-		int s2_neg = (*opcode >> (32 + 12)) & 0x1;
-		int s2_rel = (*opcode >> (32 + 9)) & 0x1;
-		int pred_sel = (*opcode >> 29) & 0x3;
-		if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) {
-			if (alu_inst == OP3_INST_BFE_INT) {
-				count_bfe_int++;
-			} else if (alu_inst == OP3_INST_BFE_UINT) {
-				count_bfe_uint++;
-			} else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) {
-				count_byte_align++;
-				// patch this instruction to BFI_INT
-				*opcode &= 0xfffc1fffffffffffULL;
-				*opcode |= OP3_INST_BFI_INT << (32 + 13);
-				patched++;
-			}
-		}
-		if (remaining <= 8)
-			break;
-		opcode++;
-		remaining -= 8;
-	}
-	applog(LOG_DEBUG, "Potential OP3 instructions identified: "
-		"%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN",
-		count_bfe_int, count_bfe_uint, count_byte_align);
-	applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched);
-}
-
-_clState *initCl(unsigned int gpu, char *name, size_t nameSize)
-{
-	_clState *clState = calloc(1, sizeof(_clState));
-	bool patchbfi = false, prog_built = false;
-	struct cgpu_info *cgpu = &gpus[gpu];
-	cl_platform_id platform = NULL;
-	char pbuff[256], vbuff[255];
-	cl_platform_id* platforms;
-	cl_uint preferred_vwidth;
-	cl_device_id *devices;
-	cl_uint numPlatforms;
-	cl_uint numDevices;
-	cl_int status;
-
-	status = clGetPlatformIDs(0, NULL, &numPlatforms);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Getting Platforms. (clGetPlatformsIDs)", status);
-		return NULL;
-	}
-
-	platforms = (cl_platform_id *)alloca(numPlatforms*sizeof(cl_platform_id));
-	status = clGetPlatformIDs(numPlatforms, platforms, NULL);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Getting Platform Ids. (clGetPlatformsIDs)", status);
-		return NULL;
-	}
-
-	if (opt_platform_id >= (int)numPlatforms) {
-		applog(LOG_ERR, "Specified platform that does not exist");
-		return NULL;
-	}
-
-	status = clGetPlatformInfo(platforms[opt_platform_id], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Getting Platform Info. (clGetPlatformInfo)", status);
-		return NULL;
-	}
-	platform = platforms[opt_platform_id];
-
-	if (platform == NULL) {
-		perror("NULL platform found!\n");
-		return NULL;
-	}
-
-	applog(LOG_INFO, "CL Platform vendor: %s", pbuff);
-	status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(pbuff), pbuff, NULL);
-	if (status == CL_SUCCESS)
-		applog(LOG_INFO, "CL Platform name: %s", pbuff);
-	status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(vbuff), vbuff, NULL);
-	if (status == CL_SUCCESS)
-		applog(LOG_INFO, "CL Platform version: %s", vbuff);
-
-	status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Getting Device IDs (num)", status);
-		return NULL;
-	}
-
-	if (numDevices > 0 ) {
-		devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));
-
-		/* Now, get the device list data */
-
-		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
-		if (status != CL_SUCCESS) {
-			applog(LOG_ERR, "Error %d: Getting Device IDs (list)", status);
-			return NULL;
-		}
-
-		applog(LOG_INFO, "List of devices:");
-
-		unsigned int i;
-		for (i = 0; i < numDevices; i++) {
-			status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
-			if (status != CL_SUCCESS) {
-				applog(LOG_ERR, "Error %d: Getting Device Info", status);
-				return NULL;
-			}
-
-			applog(LOG_INFO, "\t%i\t%s", i, pbuff);
-		}
-
-		if (gpu < numDevices) {
-			status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
-			if (status != CL_SUCCESS) {
-				applog(LOG_ERR, "Error %d: Getting Device Info", status);
-				return NULL;
-			}
-
-			applog(LOG_INFO, "Selected %i: %s", gpu, pbuff);
-			strncpy(name, pbuff, nameSize);
-		} else {
-			applog(LOG_ERR, "Invalid GPU %i", gpu);
-			return NULL;
-		}
-
-	} else return NULL;
-
-	cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
-
-	clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Creating Context. (clCreateContextFromType)", status);
-		return NULL;
-	}
-
-	/////////////////////////////////////////////////////////////////
-	// Create an OpenCL command queue
-	/////////////////////////////////////////////////////////////////
-	clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
-						     CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status);
-	if (status != CL_SUCCESS) /* Try again without OOE enable */
-		clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Creating Command Queue. (clCreateCommandQueue)", status);
-		return NULL;
-	}
-
-	/* Check for BFI INT support. Hopefully people don't mix devices with
-	 * and without it! */
-	char * extensions = malloc(1024);
-	const char * camo = "cl_amd_media_ops";
-	char *find;
-
-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_EXTENSIONS", status);
-		return NULL;
-	}
-	find = strstr(extensions, camo);
-	if (find)
-		clState->hasBitAlign = true;
-		
-	/* Check for OpenCL >= 1.0 support, needed for global offset parameter usage. */
-	char * devoclver = malloc(1024);
-	const char * ocl10 = "OpenCL 1.0";
-	const char * ocl11 = "OpenCL 1.1";
-
-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_VERSION, 1024, (void *)devoclver, NULL);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_VERSION", status);
-		return NULL;
-	}
-	find = strstr(devoclver, ocl10);
-	if (!find) {
-		clState->hasOpenCL11plus = true;
-		find = strstr(devoclver, ocl11);
-		if (!find)
-			clState->hasOpenCL12plus = true;
-	}
-
-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status);
-		return NULL;
-	}
-	applog(LOG_DEBUG, "Preferred vector width reported %d", preferred_vwidth);
-
-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE", status);
-		return NULL;
-	}
-	applog(LOG_DEBUG, "Max work group size reported %d", (int)(clState->max_work_size));
-
-	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status);
-		return NULL;
-	}
-	applog(LOG_DEBUG, "Max mem alloc size is %lu", (long unsigned int)(cgpu->max_alloc));
-
-	/* Create binary filename based on parameters passed to opencl
-	 * compiler to ensure we only load a binary that matches what would
-	 * have otherwise created. The filename is:
-	 * name + kernelname +/- g(offset) + v + vectors + w + work_size + l + sizeof(long) + .bin
-	 * For scrypt the filename is:
-	 * name + kernelname + g + lg + lookup_gap + tc + thread_concurrency + w + work_size + l + sizeof(long) + .bin
-	 */
-	char binaryfilename[255];
-	char filename[255];
-	char numbuf[16];
-
-	if (cgpu->kernel == KL_NONE) {
-		if (opt_scrypt) {
-			applog(LOG_INFO, "Selecting scrypt kernel");
-			clState->chosen_kernel = KL_SCRYPT;
-		} else if (!strstr(name, "Tahiti") &&
-			/* Detect all 2.6 SDKs not with Tahiti and use diablo kernel */
-			(strstr(vbuff, "844.4") ||  // Linux 64 bit ATI 2.6 SDK
-			 strstr(vbuff, "851.4") ||  // Windows 64 bit ""
-			 strstr(vbuff, "831.4") ||
-			 strstr(vbuff, "898.1") ||  // 12.2 driver SDK 
-			 strstr(vbuff, "923.1") ||  // 12.4
-			 strstr(vbuff, "938.2") ||  // SDK 2.7
-			 strstr(vbuff, "1113.2"))) {// SDK 2.8
-				applog(LOG_INFO, "Selecting diablo kernel");
-				clState->chosen_kernel = KL_DIABLO;
-		/* Detect all 7970s, older ATI and NVIDIA and use poclbm */
-		} else if (strstr(name, "Tahiti") || !clState->hasBitAlign) {
-			applog(LOG_INFO, "Selecting poclbm kernel");
-			clState->chosen_kernel = KL_POCLBM;
-		/* Use phatk for the rest R5xxx R6xxx */
-		} else {
-			applog(LOG_INFO, "Selecting phatk kernel");
-			clState->chosen_kernel = KL_PHATK;
-		}
-		cgpu->kernel = clState->chosen_kernel;
-	} else {
-		clState->chosen_kernel = cgpu->kernel;
-		if (clState->chosen_kernel == KL_PHATK &&
-		    (strstr(vbuff, "844.4") || strstr(vbuff, "851.4") ||
-		     strstr(vbuff, "831.4") || strstr(vbuff, "898.1") ||
-		     strstr(vbuff, "923.1") || strstr(vbuff, "938.2") ||
-		     strstr(vbuff, "1113.2"))) {
-			applog(LOG_WARNING, "WARNING: You have selected the phatk kernel.");
-			applog(LOG_WARNING, "You are running SDK 2.6+ which performs poorly with this kernel.");
-			applog(LOG_WARNING, "Downgrade your SDK and delete any .bin files before starting again.");
-			applog(LOG_WARNING, "Or allow cgminer to automatically choose a more suitable kernel.");
-		}
-	}
-
-	/* For some reason 2 vectors is still better even if the card says
-	 * otherwise, and many cards lie about their max so use 256 as max
-	 * unless explicitly set on the command line. Tahiti prefers 1 */
-	if (strstr(name, "Tahiti"))
-		preferred_vwidth = 1;
-	else if (preferred_vwidth > 2)
-		preferred_vwidth = 2;
-
-	switch (clState->chosen_kernel) {
-		case KL_POCLBM:
-			strcpy(filename, POCLBM_KERNNAME".cl");
-			strcpy(binaryfilename, POCLBM_KERNNAME);
-			break;
-		case KL_PHATK:
-			strcpy(filename, PHATK_KERNNAME".cl");
-			strcpy(binaryfilename, PHATK_KERNNAME);
-			break;
-		case KL_DIAKGCN:
-			strcpy(filename, DIAKGCN_KERNNAME".cl");
-			strcpy(binaryfilename, DIAKGCN_KERNNAME);
-			break;
-		case KL_SCRYPT:
-			strcpy(filename, SCRYPT_KERNNAME".cl");
-			strcpy(binaryfilename, SCRYPT_KERNNAME);
-			/* Scrypt only supports vector 1 */
-			cgpu->vwidth = 1;
-			break;
-		case KL_NONE: /* Shouldn't happen */
-		case KL_DIABLO:
-			strcpy(filename, DIABLO_KERNNAME".cl");
-			strcpy(binaryfilename, DIABLO_KERNNAME);
-			break;
-	}
-
-	if (cgpu->vwidth)
-		clState->vwidth = cgpu->vwidth;
-	else {
-		clState->vwidth = preferred_vwidth;
-		cgpu->vwidth = preferred_vwidth;
-	}
-
-	if (((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) &&
-		clState->vwidth == 1 && clState->hasOpenCL11plus) || opt_scrypt)
-			clState->goffset = true;
-
-	if (cgpu->work_size && cgpu->work_size <= clState->max_work_size)
-		clState->wsize = cgpu->work_size;
-	else if (opt_scrypt)
-		clState->wsize = 256;
-	else if (strstr(name, "Tahiti"))
-		clState->wsize = 64;
-	else
-		clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
-	cgpu->work_size = clState->wsize;
-
-#ifdef USE_SCRYPT
-	if (opt_scrypt) {
-		if (!cgpu->opt_lg) {
-			applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
-			cgpu->lookup_gap = 2;
-		} else
-			cgpu->lookup_gap = cgpu->opt_lg;
-
-		if (!cgpu->opt_tc) {
-			unsigned int sixtyfours;
-
-			sixtyfours =  cgpu->max_alloc / 131072 / 64 - 1;
-			cgpu->thread_concurrency = sixtyfours * 64;
-			if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
-				cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
-				if (cgpu->thread_concurrency > cgpu->shaders * 5)
-					cgpu->thread_concurrency = cgpu->shaders * 5;
-			}
-			applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %d", gpu, (int)(cgpu->thread_concurrency));
-		} else
-			cgpu->thread_concurrency = cgpu->opt_tc;
-	}
-#endif
-
-	FILE *binaryfile;
-	size_t *binary_sizes;
-	char **binaries;
-	int pl;
-	char *source = file_contents(filename, &pl);
-	size_t sourceSize[] = {(size_t)pl};
-	cl_uint slot, cpnd;
-
-	slot = cpnd = 0;
-
-	if (!source)
-		return NULL;
-
-	binary_sizes = calloc(sizeof(size_t) * MAX_GPUDEVICES * 4, 1);
-	if (unlikely(!binary_sizes)) {
-		applog(LOG_ERR, "Unable to calloc binary_sizes");
-		return NULL;
-	}
-	binaries = calloc(sizeof(char *) * MAX_GPUDEVICES * 4, 1);
-	if (unlikely(!binaries)) {
-		applog(LOG_ERR, "Unable to calloc binaries");
-		return NULL;
-	}
-
-	strcat(binaryfilename, name);
-	if (clState->goffset)
-		strcat(binaryfilename, "g");
-	if (opt_scrypt) {
-#ifdef USE_SCRYPT
-		sprintf(numbuf, "lg%utc%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency);
-		strcat(binaryfilename, numbuf);
-#endif
-	} else {
-		sprintf(numbuf, "v%d", clState->vwidth);
-		strcat(binaryfilename, numbuf);
-	}
-	sprintf(numbuf, "w%d", (int)clState->wsize);
-	strcat(binaryfilename, numbuf);
-	sprintf(numbuf, "l%d", (int)sizeof(long));
-	strcat(binaryfilename, numbuf);
-	strcat(binaryfilename, ".bin");
-
-	binaryfile = fopen(binaryfilename, "rb");
-	if (!binaryfile) {
-		applog(LOG_DEBUG, "No binary found, generating from source");
-	} else {
-		struct stat binary_stat;
-
-		if (unlikely(stat(binaryfilename, &binary_stat))) {
-			applog(LOG_DEBUG, "Unable to stat binary, generating from source");
-			fclose(binaryfile);
-			goto build;
-		}
-		if (!binary_stat.st_size)
-			goto build;
-
-		binary_sizes[slot] = binary_stat.st_size;
-		binaries[slot] = (char *)calloc(binary_sizes[slot], 1);
-		if (unlikely(!binaries[slot])) {
-			applog(LOG_ERR, "Unable to calloc binaries");
-			fclose(binaryfile);
-			return NULL;
-		}
-
-		if (fread(binaries[slot], 1, binary_sizes[slot], binaryfile) != binary_sizes[slot]) {
-			applog(LOG_ERR, "Unable to fread binaries");
-			fclose(binaryfile);
-			free(binaries[slot]);
-			goto build;
-		}
-
-		clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[slot], (const unsigned char **)binaries, &status, NULL);
-		if (status != CL_SUCCESS) {
-			applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithBinary)", status);
-			fclose(binaryfile);
-			free(binaries[slot]);
-			goto build;
-		}
-
-		fclose(binaryfile);
-		applog(LOG_DEBUG, "Loaded binary image %s", binaryfilename);
-
-		goto built;
-	}
-
-	/////////////////////////////////////////////////////////////////
-	// Load CL file, build CL program object, create CL kernel object
-	/////////////////////////////////////////////////////////////////
-
-build:
-	clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithSource)", status);
-		return NULL;
-	}
-
-	/* create a cl program executable for all the devices specified */
-	char *CompilerOptions = calloc(1, 256);
-
-#ifdef USE_SCRYPT
-	if (opt_scrypt)
-		sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
-			cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize);
-	else
-#endif
-	{
-		sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d -D WORKVEC=%d",
-			(int)clState->wsize, clState->vwidth, (int)clState->wsize * clState->vwidth);
-	}
-	applog(LOG_DEBUG, "Setting worksize to %d", (int)(clState->wsize));
-	if (clState->vwidth > 1)
-		applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->vwidth);
-
-	if (clState->hasBitAlign) {
-		strcat(CompilerOptions, " -D BITALIGN");
-		applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN");
-		if (!clState->hasOpenCL12plus &&
-		    (strstr(name, "Cedar") ||
-		     strstr(name, "Redwood") ||
-		     strstr(name, "Juniper") ||
-		     strstr(name, "Cypress" ) ||
-		     strstr(name, "Hemlock" ) ||
-		     strstr(name, "Caicos" ) ||
-		     strstr(name, "Turks" ) ||
-		     strstr(name, "Barts" ) ||
-		     strstr(name, "Cayman" ) ||
-		     strstr(name, "Antilles" ) ||
-		     strstr(name, "Wrestler" ) ||
-		     strstr(name, "Zacate" ) ||
-		     strstr(name, "WinterPark" )))
-			patchbfi = true;
-	} else
-		applog(LOG_DEBUG, "cl_amd_media_ops not found, will not set BITALIGN");
-
-	if (patchbfi) {
-		strcat(CompilerOptions, " -D BFI_INT");
-		applog(LOG_DEBUG, "BFI_INT patch requiring device found, patched source with BFI_INT");
-	} else
-		applog(LOG_DEBUG, "BFI_INT patch requiring device not found, will not BFI_INT patch");
-
-	if (clState->goffset)
-		strcat(CompilerOptions, " -D GOFFSET");
-
-	if (!clState->hasOpenCL11plus)
-		strcat(CompilerOptions, " -D OCL1");
-
-	applog(LOG_DEBUG, "CompilerOptions: %s", CompilerOptions);
-	status = clBuildProgram(clState->program, 1, &devices[gpu], CompilerOptions , NULL, NULL);
-	free(CompilerOptions);
-
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Building Program (clBuildProgram)", status);
-		size_t logSize;
-		status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
-
-		char *log = malloc(logSize);
-		status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
-		applog(LOG_ERR, "%s", log);
-		return NULL;
-	}
-
-	prog_built = true;
-
-#ifdef __APPLE__
-	/* OSX OpenCL breaks reading off binaries with >1 GPU so always build
-	 * from source. */
-	goto built;
-#endif
-
-	status = clGetProgramInfo(clState->program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &cpnd, NULL);
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error %d: Getting program info CL_PROGRAM_NUM_DEVICES. (clGetProgramInfo)", status);
-		return NULL;
-	}
-
-	status = clGetProgramInfo(clState->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*cpnd, binary_sizes, NULL);
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error %d: Getting program info CL_PROGRAM_BINARY_SIZES. (clGetProgramInfo)", status);
-		return NULL;
-	}
-
-	/* The actual compiled binary ends up in a RANDOM slot! Grr, so we have
-	 * to iterate over all the binary slots and find where the real program
-	 * is. What the heck is this!? */
-	for (slot = 0; slot < cpnd; slot++)
-		if (binary_sizes[slot])
-			break;
-
-	/* copy over all of the generated binaries. */
-	applog(LOG_DEBUG, "Binary size for gpu %d found in binary slot %d: %d", gpu, slot, (int)(binary_sizes[slot]));
-	if (!binary_sizes[slot]) {
-		applog(LOG_ERR, "OpenCL compiler generated a zero sized binary, FAIL!");
-		return NULL;
-	}
-	binaries[slot] = calloc(sizeof(char) * binary_sizes[slot], 1);
-	status = clGetProgramInfo(clState->program, CL_PROGRAM_BINARIES, sizeof(char *) * cpnd, binaries, NULL );
-	if (unlikely(status != CL_SUCCESS)) {
-		applog(LOG_ERR, "Error %d: Getting program info. CL_PROGRAM_BINARIES (clGetProgramInfo)", status);
-		return NULL;
-	}
-
-	/* Patch the kernel if the hardware supports BFI_INT but it needs to
-	 * be hacked in */
-	if (patchbfi) {
-		unsigned remaining = binary_sizes[slot];
-		char *w = binaries[slot];
-		unsigned int start, length;
-
-		/* Find 2nd incidence of .text, and copy the program's
-		* position and length at a fixed offset from that. Then go
-		* back and find the 2nd incidence of \x7ELF (rewind by one
-		* from ELF) and then patch the opcocdes */
-		if (!advance(&w, &remaining, ".text"))
-			goto build;
-		w++; remaining--;
-		if (!advance(&w, &remaining, ".text")) {
-			/* 32 bit builds only one ELF */
-			w--; remaining++;
-		}
-		memcpy(&start, w + 285, 4);
-		memcpy(&length, w + 289, 4);
-		w = binaries[slot]; remaining = binary_sizes[slot];
-		if (!advance(&w, &remaining, "ELF"))
-			goto build;
-		w++; remaining--;
-		if (!advance(&w, &remaining, "ELF")) {
-			/* 32 bit builds only one ELF */
-			w--; remaining++;
-		}
-		w--; remaining++;
-		w += start; remaining -= start;
-		applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching",
-			w, remaining);
-		patch_opcodes(w, length);
-
-		status = clReleaseProgram(clState->program);
-		if (status != CL_SUCCESS) {
-			applog(LOG_ERR, "Error %d: Releasing program. (clReleaseProgram)", status);
-			return NULL;
-		}
-
-		clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[slot], (const unsigned char **)&binaries[slot], &status, NULL);
-		if (status != CL_SUCCESS) {
-			applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithBinary)", status);
-			return NULL;
-		}
-
-		/* Program needs to be rebuilt */
-		prog_built = false;
-	}
-
-	free(source);
-
-	/* Save the binary to be loaded next time */
-	binaryfile = fopen(binaryfilename, "wb");
-	if (!binaryfile) {
-		/* Not a fatal problem, just means we build it again next time */
-		applog(LOG_DEBUG, "Unable to create file %s", binaryfilename);
-	} else {
-		if (unlikely(fwrite(binaries[slot], 1, binary_sizes[slot], binaryfile) != binary_sizes[slot])) {
-			applog(LOG_ERR, "Unable to fwrite to binaryfile");
-			return NULL;
-		}
-		fclose(binaryfile);
-	}
-built:
-	if (binaries[slot])
-		free(binaries[slot]);
-	free(binaries);
-	free(binary_sizes);
-
-	applog(LOG_INFO, "Initialising kernel %s with%s bitalign, %d vectors and worksize %d",
-	       filename, clState->hasBitAlign ? "" : "out", clState->vwidth, (int)(clState->wsize));
-
-	if (!prog_built) {
-		/* create a cl program executable for all the devices specified */
-		status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
-		if (status != CL_SUCCESS) {
-			applog(LOG_ERR, "Error %d: Building Program (clBuildProgram)", status);
-			size_t logSize;
-			status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
-
-			char *log = malloc(logSize);
-			status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
-			applog(LOG_ERR, "%s", log);
-			return NULL;
-		}
-	}
-
-	/* get a kernel object handle for a kernel with the given name */
-	clState->kernel = clCreateKernel(clState->program, "search", &status);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: Creating Kernel from program. (clCreateKernel)", status);
-		return NULL;
-	}
-
-#ifdef USE_SCRYPT
-	if (opt_scrypt) {
-		size_t ipt = (1024 / cgpu->lookup_gap + (1024 % cgpu->lookup_gap > 0));
-		size_t bufsize = 128 * ipt * cgpu->thread_concurrency;
-
-		/* Use the max alloc value which has been rounded to a power of
-		 * 2 greater >= required amount earlier */
-		if (bufsize > cgpu->max_alloc) {
-			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu",
-						gpu, (long unsigned int)(cgpu->max_alloc));
-			applog(LOG_WARNING, "Your scrypt settings come to %d", (int)bufsize);
-		}
-		applog(LOG_DEBUG, "Creating scrypt buffer sized %d", (int)bufsize);
-		clState->padbufsize = bufsize;
-
-		/* This buffer is weird and might work to some degree even if
-		 * the create buffer call has apparently failed, so check if we
-		 * get anything back before we call it a failure. */
-		clState->padbuffer8 = NULL;
-		clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
-		if (status != CL_SUCCESS && !clState->padbuffer8) {
-			applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease TC or increase LG", status);
-			return NULL;
-		}
-
-		clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
-		if (status != CL_SUCCESS) {
-			applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
-			return NULL;
-		}
-		clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, SCRYPT_BUFFERSIZE, NULL, &status);
-	} else
-#endif
-	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
-	if (status != CL_SUCCESS) {
-		applog(LOG_ERR, "Error %d: clCreateBuffer (outputBuffer)", status);
-		return NULL;
-	}
-
-	return clState;
-}
-#endif /* HAVE_OPENCL */
-

+ 0 - 42
ocl.h

@@ -1,42 +0,0 @@
-#ifndef __OCL_H__
-#define __OCL_H__
-
-#include "config.h"
-
-#include <stdbool.h>
-#ifdef HAVE_OPENCL
-#ifdef __APPLE_CC__
-#include <OpenCL/opencl.h>
-#else
-#include <CL/cl.h>
-#endif
-
-#include "miner.h"
-
-typedef struct {
-	cl_context context;
-	cl_kernel kernel;
-	cl_command_queue commandQueue;
-	cl_program program;
-	cl_mem outputBuffer;
-#ifdef USE_SCRYPT
-	cl_mem CLbuffer0;
-	cl_mem padbuffer8;
-	size_t padbufsize;
-	void * cldata;
-#endif
-	bool hasBitAlign;
-	bool hasOpenCL11plus;
-	bool hasOpenCL12plus;
-	bool goffset;
-	cl_uint vwidth;
-	size_t max_work_size;
-	size_t wsize;
-	enum cl_kernels chosen_kernel;
-} _clState;
-
-extern char *file_contents(const char *filename, int *length);
-extern int clDevicesNum(void);
-extern _clState *initCl(unsigned int gpu, char *name, size_t nameSize);
-#endif /* HAVE_OPENCL */
-#endif /* __OCL_H__ */

+ 0 - 417
phatk121016.cl

@@ -1,417 +0,0 @@
-// This file is taken and modified from the public-domain poclbm project, and
-// I have therefore decided to keep it public-domain.
-// Modified version copyright 2011-2012 Con Kolivas
-
-#ifdef VECTORS4
-	typedef uint4 u;
-#elif defined VECTORS2
-	typedef uint2 u;
-#else
-	typedef uint u;
-#endif
-
-__constant uint K[64] = { 
-    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-__constant uint ConstW[128] = {
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x80000000U, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000280U,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x80000000U, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000100U,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
-};
-
-__constant uint H[8] = { 
-	0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
-};
-
-
-#ifdef BITALIGN
-	#pragma OPENCL EXTENSION cl_amd_media_ops : enable
-	#define rot(x, y) amd_bitalign(x, x, (uint)(32 - y))
-
-// This part is not from the stock poclbm kernel. It's part of an optimization
-// added in the Phoenix Miner.
-
-// Some AMD devices have Vals[0] BFI_INT opcode, which behaves exactly like the
-// SHA-256 Ch function, but provides it in exactly one instruction. If
-// detected, use it for Ch. Otherwise, construct Ch out of simpler logical
-// primitives.
-
- #ifdef BFI_INT
-	// Well, slight problem... It turns out BFI_INT isn't actually exposed to
-	// OpenCL (or CAL IL for that matter) in any way. However, there is 
-	// a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
-	// amd_bytealign, takes the same inputs, and provides the same output. 
-	// We can use that as a placeholder for BFI_INT and have the application 
-	// patch it after compilation.
-	
-	// This is the BFI_INT function
-	#define Ch(x, y, z) amd_bytealign(x,y,z)
-	// Ma can also be implemented in terms of BFI_INT...
-	#define Ma(z, x, y) amd_bytealign(z^x,y,x)
- #else // BFI_INT
-	// Later SDKs optimise this to BFI INT without patching and GCN
-	// actually fails if manually patched with BFI_INT
-
-	#define Ch(x, y, z) bitselect((u)z, (u)y, (u)x)
-	#define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
-	#define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
- #endif
-#else // BITALIGN
-	#define Ch(x, y, z) (z ^ (x & (y ^ z)))
-	#define Ma(x, y, z) ((x & z) | (y & (x | z)))
-	#define rot(x, y) rotate((u)x, (u)y)
-	#define rotr(x, y) rotate((u)x, (u)(32-y))
-#endif
-
-
-
-//Various intermediate calculations for each SHA round
-#define s0(n) (S0(Vals[(0 + 128 - (n)) % 8]))
-#define S0(n) (rot(n, 30u)^rot(n, 19u)^rot(n,10u))
-
-#define s1(n) (S1(Vals[(4 + 128 - (n)) % 8]))
-#define S1(n) (rot(n, 26u)^rot(n, 21u)^rot(n, 7u))
-
-#define ch(n) Ch(Vals[(4 + 128 - (n)) % 8],Vals[(5 + 128 - (n)) % 8],Vals[(6 + 128 - (n)) % 8])
-#define maj(n) Ma(Vals[(1 + 128 - (n)) % 8],Vals[(2 + 128 - (n)) % 8],Vals[(0 + 128 - (n)) % 8])
-
-//t1 calc when W is already calculated
-#define t1(n) K[(n) % 64] + Vals[(7 + 128 - (n)) % 8] +  W[(n)] + s1(n) + ch(n) 
-
-//t1 calc which calculates W
-#define t1W(n) K[(n) % 64] + Vals[(7 + 128 - (n)) % 8] +  W(n) + s1(n) + ch(n)
-
-//Used for constant W Values (the compiler optimizes out zeros)
-#define t1C(n) (K[(n) % 64]+ ConstW[(n)]) + Vals[(7 + 128 - (n)) % 8] + s1(n) + ch(n)
-
-//t2 Calc
-#define t2(n)  maj(n) + s0(n)
-
-#define rotC(x,n) (x<<n | x >> (32-n))
-
-//W calculation used for SHA round
-#define W(n) (W[n] = P4(n) + P3(n) + P2(n) + P1(n))
-
-
-
-//Partial W calculations (used for the begining where only some values are nonzero)
-#define P1(n) ((rot(W[(n)-2],15u)^rot(W[(n)-2],13u)^((W[(n)-2])>>10U)))
-#define P2(n) ((rot(W[(n)-15],25u)^rot(W[(n)-15],14u)^((W[(n)-15])>>3U)))
-
-
-#define p1(x) ((rot(x,15u)^rot(x,13u)^((x)>>10U)))
-#define p2(x) ((rot(x,25u)^rot(x,14u)^((x)>>3U)))
-
-
-#define P3(n)  W[n-7]
-#define P4(n)  W[n-16]
-
-
-//Partial Calcs for constant W values
-#define P1C(n) ((rotC(ConstW[(n)-2],15)^rotC(ConstW[(n)-2],13)^((ConstW[(n)-2])>>10U)))
-#define P2C(n) ((rotC(ConstW[(n)-15],25)^rotC(ConstW[(n)-15],14)^((ConstW[(n)-15])>>3U)))
-#define P3C(x)  ConstW[x-7]
-#define P4C(x)  ConstW[x-16]
-
-//SHA round with built in W calc
-#define sharoundW(n) Barrier1(n);  Vals[(3 + 128 - (n)) % 8] += t1W(n); Vals[(7 + 128 - (n)) % 8] = t1W(n) + t2(n);  
-
-//SHA round without W calc
-#define sharound(n)  Barrier2(n); Vals[(3 + 128 - (n)) % 8] += t1(n); Vals[(7 + 128 - (n)) % 8] = t1(n) + t2(n);
-
-//SHA round for constant W values
-#define sharoundC(n)  Barrier3(n); Vals[(3 + 128 - (n)) % 8] += t1C(n); Vals[(7 + 128 - (n)) % 8] = t1C(n) + t2(n);
-
-//The compiler is stupid... I put this in there only to stop the compiler from (de)optimizing the order
-#define Barrier1(n) t1 = t1C((n+1))
-#define Barrier2(n) t1 = t1C((n))
-#define Barrier3(n) t1 = t1C((n))
-
-//#define WORKSIZE 256
-#define MAXBUFFERS (4095)
-
-__kernel 
- __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-void search(	const uint state0, const uint state1, const uint state2, const uint state3,
-						const uint state4, const uint state5, const uint state6, const uint state7,
-						const uint B1, const uint C1, const uint D1,
-						const uint F1, const uint G1, const uint H1,
-						const u base,
-						const uint W16, const uint W17,
-						const uint PreVal4, const uint PreVal0,
-						const uint PreW18, const uint PreW19,
-						const uint PreW31, const uint PreW32,
-						
-						volatile __global uint * output)
-{
-
-
-	u W[124];
-	u Vals[8];
-
-//Dummy Variable to prevent compiler from reordering between rounds
-	u t1;
-
-	//Vals[0]=state0;
-	Vals[1]=B1;
-	Vals[2]=C1;
-	Vals[3]=D1;
-	//Vals[4]=PreVal4;
-	Vals[5]=F1;
-	Vals[6]=G1;
-	Vals[7]=H1;
-
-	W[16] = W16;
-	W[17] = W17;
-
-#ifdef VECTORS4
-	//Less dependencies to get both the local id and group id and then add them
-	W[3] = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
-	uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U);
-	//Since only the 2 LSB is opposite between the nonces, we can save an instruction by flipping the 4 bits in W18 rather than the 1 bit in W3
-	W[18] = PreW18 + (u){r, r ^ 0x2004000U, r ^ 0x4008000U, r ^ 0x600C000U};
-#elif defined VECTORS2
-	W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
-	uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U);
-	W[18] = PreW18 + (u){r, r ^ 0x2004000U};
-#else
-	W[3] = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
-	u r = rot(W[3],25u)^rot(W[3],14u)^((W[3])>>3U);
-	W[18] = PreW18 + r;
-#endif
-	//the order of the W calcs and Rounds is like this because the compiler needs help finding how to order the instructions
-
-
-
-	Vals[4] = PreVal4 + W[3];
-	Vals[0] = PreVal0 + W[3];
-
-	sharoundC(4);
-	W[19] = PreW19 + W[3];
-	sharoundC(5);
-	W[20] = P4C(20) + P1(20);
-	sharoundC(6);
-	W[21] = P1(21);
-	sharoundC(7);
-	W[22] = P3C(22) + P1(22);
-	sharoundC(8);
-	W[23] = W[16] + P1(23);
-	sharoundC(9);
-	W[24] = W[17] + P1(24);
-	sharoundC(10);
-	W[25] = P1(25) + P3(25);
-	W[26] = P1(26) + P3(26);
-	sharoundC(11);
-	W[27] = P1(27) + P3(27);
-	W[28] = P1(28) + P3(28);
-	sharoundC(12);
-	W[29] = P1(29) + P3(29);
-	sharoundC(13);
-	W[30] = P1(30) + P2C(30) + P3(30);
-	W[31] = PreW31 + (P1(31) + P3(31));
-	sharoundC(14);
-	W[32] = PreW32 + (P1(32) + P3(32));
-	sharoundC(15);
-	sharound(16);
-	sharound(17);
-	sharound(18);
-	sharound(19);
-	sharound(20);
-	sharound(21);
-	sharound(22);
-	sharound(23);
-	sharound(24);
-	sharound(25);
-	sharound(26);
-	sharound(27);
-	sharound(28);
-	sharound(29);
-	sharound(30);
-	sharound(31);
-	sharound(32);
-	sharoundW(33);
-	sharoundW(34);
-	sharoundW(35);
-	sharoundW(36);
-	sharoundW(37);	
-	sharoundW(38);
-	sharoundW(39);
-	sharoundW(40);
-	sharoundW(41);
-	sharoundW(42);
-	sharoundW(43);
-	sharoundW(44);
-	sharoundW(45);
-	sharoundW(46);
-	sharoundW(47);
-	sharoundW(48);
-	sharoundW(49);
-	sharoundW(50);
-	sharoundW(51);
-	sharoundW(52);
-	sharoundW(53);
-	sharoundW(54);
-	sharoundW(55);
-	sharoundW(56);
-	sharoundW(57);
-	sharoundW(58);
-	sharoundW(59);
-	sharoundW(60);
-	sharoundW(61);
-	sharoundW(62);
-	sharoundW(63);
-
-	W[64]=state0+Vals[0];
-	W[65]=state1+Vals[1];
-	W[66]=state2+Vals[2];
-	W[67]=state3+Vals[3];
-	W[68]=state4+Vals[4];
-	W[69]=state5+Vals[5];
-	W[70]=state6+Vals[6];
-	W[71]=state7+Vals[7];
-
-	Vals[0]=H[0];
-	Vals[1]=H[1];
-	Vals[2]=H[2];
-	Vals[3]=H[3];
-	Vals[4]=H[4];
-	Vals[5]=H[5];
-	Vals[6]=H[6];
-	Vals[7]=H[7];
-
-	//sharound(64 + 0);
-	const u Temp = (0xb0edbdd0U + K[0]) +  W[64];
-	Vals[7] = Temp + 0x08909ae5U;
-	Vals[3] = 0xa54ff53aU + Temp;
-	
-#define P124(n) P2(n) + P1(n) + P4(n)
-
-
-	W[64 + 16] = + P2(64 + 16) + P4(64 + 16);
-	sharound(64 + 1);
-	W[64 + 17] = P1C(64 + 17) + P2(64 + 17) + P4(64 + 17);
-	sharound(64 + 2);
-	W[64 + 18] = P124(64 + 18);
-	sharound(64 + 3);
-	W[64 + 19] = P124(64 + 19);
-	sharound(64 + 4);
-	W[64 + 20] = P124(64 + 20);
-	sharound(64 + 5);
-	W[64 + 21] = P124(64 + 21);
-	sharound(64 + 6);
-	W[64 + 22] = P4(64 + 22) + P3C(64 + 22) + P2(64 + 22) + P1(64 + 22);
-	sharound(64 + 7);
-	W[64 + 23] = P4(64 + 23) + P3(64 + 23) + P2C(64 + 23) + P1(64 + 23);
-	sharoundC(64 + 8);
-	W[64 + 24] =   P1(64 + 24) + P4C(64 + 24) + P3(64 + 24);
-	sharoundC(64 + 9);
-	W[64 + 25] = P3(64 + 25) + P1(64 + 25);
-	sharoundC(64 + 10);
-	W[64 + 26] = P3(64 + 26) + P1(64 + 26);
-	sharoundC(64 + 11);
-	W[64 + 27] = P3(64 + 27) + P1(64 + 27);
-	sharoundC(64 + 12);
-	W[64 + 28] = P3(64 + 28) + P1(64 + 28);
-	sharoundC(64 + 13);
-	W[64 + 29] = P1(64 + 29) + P3(64 + 29);
-	W[64 + 30] = P3(64 + 30) + P2C(64 + 30) + P1(64 + 30);
-	sharoundC(64 + 14);
-	W[64 + 31] = P4C(64 + 31) + P3(64 + 31) + P2(64 + 31) + P1(64 + 31);
-	sharoundC(64 + 15);
-	sharound(64 + 16);
-	sharound(64 + 17);
-	sharound(64 + 18);
-	sharound(64 + 19);
-	sharound(64 + 20);
-	sharound(64 + 21);
-	sharound(64 + 22);
-	sharound(64 + 23);
-	sharound(64 + 24);
-	sharound(64 + 25);
-	sharound(64 + 26);
-	sharound(64 + 27);
-	sharound(64 + 28);
-	sharound(64 + 29);
-	sharound(64 + 30);
-	sharound(64 + 31);
-	sharoundW(64 + 32);
-	sharoundW(64 + 33);
-	sharoundW(64 + 34);
-	sharoundW(64 + 35);
-	sharoundW(64 + 36);
-	sharoundW(64 + 37);
-	sharoundW(64 + 38);
-	sharoundW(64 + 39);
-	sharoundW(64 + 40);
-	sharoundW(64 + 41);
-	sharoundW(64 + 42);
-	sharoundW(64 + 43);
-	sharoundW(64 + 44);
-	sharoundW(64 + 45);
-	sharoundW(64 + 46);
-	sharoundW(64 + 47);
-	sharoundW(64 + 48);
-	sharoundW(64 + 49);
-	sharoundW(64 + 50);
-	sharoundW(64 + 51);
-	sharoundW(64 + 52);
-	sharoundW(64 + 53);
-	sharoundW(64 + 54);
-	sharoundW(64 + 55);
-	sharoundW(64 + 56);
-	sharoundW(64 + 57);
-	sharoundW(64 + 58);
-
-	W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) -
-		(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64))  + s1(64+59)+ ch(59+64)));
-
-#define FOUND (0x0F)
-#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-
-#ifdef VECTORS4
-	bool result = W[117].x & W[117].y & W[117].z & W[117].w;
-	if (!result) {
-		if (!W[117].x)
-			SETFOUND(W[3].x);
-		if (!W[117].y)
-			SETFOUND(W[3].y);
-		if (!W[117].z)
-			SETFOUND(W[3].z);
-		if (!W[117].w)
-			SETFOUND(W[3].w);
-	}
-#elif defined VECTORS2
-	bool result = W[117].x & W[117].y;
-	if (!result) {
-		if (!W[117].x)
-			SETFOUND(W[3].x);
-		if (!W[117].y)
-			SETFOUND(W[3].y);
-	}
-#else
-	if (!W[117])
-		SETFOUND(W[3]);
-#endif
-}

+ 0 - 1388
poclbm130302.cl

@@ -1,1388 +0,0 @@
-// -ck modified kernel taken from Phoenix taken from poclbm, with aspects of
-// phatk and others.
-// Modified version copyright 2011-2012 Con Kolivas
-
-// This file is taken and modified from the public-domain poclbm project, and
-// we have therefore decided to keep it public-domain in Phoenix.
-
-#ifdef VECTORS4
-	typedef uint4 u;
-#elif defined VECTORS2
-	typedef uint2 u;
-#else
-	typedef uint u;
-#endif
-
-__constant uint K[87] = {
-    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
-
-	0xc19bf3f4U,
-	0x80000000U,
-	0x00000280U,
-	0x00a00055U,
-	0xf377ed68U,
-	0xa54ff53aU,
-	0x08909ae5U,
-	0x90bb1e3cU,
-	0x9b05688cU,
-	0xca0b3af3U,
-	0x3c6ef372U,
-	0xbb67ae85U,
-	0x6a09e667U,
-	0x50c6645bU,
-	0x510e527fU,
-	0x3ac42e24U,
-	0x5807aa98U,
-	0xc19bf274U,
-	0x00a00000U,
-	0x00000100U,
-	0x11002000U,
-	0x00400022U,
-	0x136032edU
-};
-
-#define	xc19bf3f4U	K[64]
-#define	x80000000U	K[65]
-#define	x00000280U	K[66]
-#define	x00a00055U	K[67]
-#define	xf377ed68U	K[68]
-#define	xa54ff53aU	K[69]
-#define	x08909ae5U	K[70]
-#define	x90bb1e3cU	K[71]
-#define	x9b05688cU	K[72]
-#define	xca0b3af3U	K[73]
-#define	x3c6ef372U	K[74]
-#define	xbb67ae85U	K[75]
-#define	x6a09e667U	K[76]
-#define	x50c6645bU	K[77]
-#define	x510e527fU	K[78]
-#define	x3ac42e24U	K[79]
-#define	x5807aa98U	K[80]
-#define	xc19bf274U	K[81]
-#define	x00a00000U	K[82]
-#define	x00000100U	K[83]
-#define	x11002000U	K[84]
-#define	x00400022U	K[85]
-#define	x136032edU	K[86]
-
-// This part is not from the stock poclbm kernel. It's part of an optimization
-// added in the Phoenix Miner.
-
-// Some AMD devices have a BFI_INT opcode, which behaves exactly like the
-// SHA-256 ch function, but provides it in exactly one instruction. If
-// detected, use it for ch. Otherwise, construct ch out of simpler logical
-// primitives.
-
-#ifdef BITALIGN
-	#pragma OPENCL EXTENSION cl_amd_media_ops : enable
-	#define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
-#else
-	#define rotr(x, y) rotate((u)x, (u)(32 - y))
-#endif
-#ifdef BFI_INT
-	// Well, slight problem... It turns out BFI_INT isn't actually exposed to
-	// OpenCL (or CAL IL for that matter) in any way. However, there is 
-	// a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
-	// amd_bytealign, takes the same inputs, and provides the same output. 
-	// We can use that as a placeholder for BFI_INT and have the application 
-	// patch it after compilation.
-	
-	// This is the BFI_INT function
-	#define ch(x, y, z) amd_bytealign(x, y, z)
-	
-	// Ma can also be implemented in terms of BFI_INT...
-	#define Ma(x, y, z) amd_bytealign( (z^x), (y), (x) )
-
-	// AMD's KernelAnalyzer throws errors compiling the kernel if we use
-	// amd_bytealign on constants with vectors enabled, so we use this to avoid
-	// problems. (this is used 4 times, and likely optimized out by the compiler.)
-	#define Ma2(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
-#else // BFI_INT
-	//GCN actually fails if manually patched with BFI_INT
-
-	#define ch(x, y, z) bitselect((u)z, (u)y, (u)x)
-	#define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
-	#define Ma2(x, y, z) Ma(x, y, z)
-#endif
-
-
-__kernel
-__attribute__((vec_type_hint(u)))
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-void search(const uint state0, const uint state1, const uint state2, const uint state3,
-	const uint state4, const uint state5, const uint state6, const uint state7,
-	const uint b1, const uint c1,
-	const uint f1, const uint g1, const uint h1,
-#ifndef GOFFSET
-	const u base,
-#endif
-	const uint fw0, const uint fw1, const uint fw2, const uint fw3, const uint fw15, const uint fw01r,
-	const uint D1A, const uint C1addK5, const uint B1addK6,
-	const uint W16addK16, const uint W17addK17,
-	const uint PreVal4addT1, const uint Preval0,
-	volatile __global uint * output)
-{
-	u Vals[24];
-	u *W = &Vals[8];
-
-#ifdef GOFFSET
-	const u nonce = (uint)(get_global_id(0));
-#else
-	const u nonce = base + (uint)(get_global_id(0));
-#endif
-
-Vals[5]=Preval0;
-Vals[5]+=nonce;
-
-Vals[0]=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],b1,c1);
-Vals[0]+=D1A;
-
-Vals[2]=Vals[0];
-Vals[2]+=h1;
-
-Vals[1]=PreVal4addT1;
-Vals[1]+=nonce;
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-
-Vals[6]=C1addK5;
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],b1);
-
-Vals[3]=Vals[6];
-Vals[3]+=g1;
-Vals[0]+=Ma2(g1,Vals[1],f1);
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma2(f1,Vals[0],Vals[1]);
-
-Vals[7]=B1addK6;
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-
-Vals[4]=Vals[7];
-Vals[4]+=f1;
-
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[7];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=K[8];
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[9];
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[10];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[11];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[12];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[13];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[14];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=xc19bf3f4U;
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=W16addK16;
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=W17addK17;
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-W[2]=(rotr(nonce,7)^rotr(nonce,18)^(nonce>>3U));
-W[2]+=fw2;
-Vals[4]+=W[2];
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[18];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-W[3]=nonce;
-W[3]+=fw3;
-Vals[1]+=W[3];
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[19];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-W[4]=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
-W[4]+=x80000000U;
-Vals[0]+=W[4];
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[20];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-W[5]=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
-Vals[6]+=W[5];
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[21];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-W[6]=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
-W[6]+=x00000280U;
-Vals[7]+=W[6];
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[22];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-W[7]=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
-W[7]+=fw0;
-Vals[5]+=W[7];
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[23];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-W[8]=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
-W[8]+=fw1;
-Vals[2]+=W[8];
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=K[24];
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-W[9]=W[2];
-W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
-Vals[3]+=W[9];
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[25];
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-W[10]=W[3];
-W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
-Vals[4]+=W[10];
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[26];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-W[11]=W[4];
-W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
-Vals[1]+=W[11];
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[27];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-W[12]=W[5];
-W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
-Vals[0]+=W[12];
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[28];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-W[13]=W[6];
-W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
-Vals[6]+=W[13];
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[29];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-W[14]=x00a00055U;
-W[14]+=W[7];
-W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
-Vals[7]+=W[14];
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[30];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-W[15]=fw15;
-W[15]+=W[8];
-W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
-Vals[5]+=W[15];
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[31];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-W[0]=fw01r;
-W[0]+=W[9];
-W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
-Vals[2]+=W[0];
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=K[32];
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-W[1]=fw1;
-W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
-W[1]+=W[10];
-W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
-Vals[3]+=W[1];
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[33];
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
-W[2]+=W[11];
-W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
-Vals[4]+=W[2];
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[34];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
-W[3]+=W[12];
-W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
-Vals[1]+=W[3];
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[35];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
-W[4]+=W[13];
-W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
-Vals[0]+=W[4];
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[36];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
-W[5]+=W[14];
-W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
-Vals[6]+=W[5];
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[37];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
-W[6]+=W[15];
-W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
-Vals[7]+=W[6];
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[38];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
-W[7]+=W[0];
-W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
-Vals[5]+=W[7];
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[39];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
-W[8]+=W[1];
-W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
-Vals[2]+=W[8];
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=K[40];
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
-W[9]+=W[2];
-W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
-Vals[3]+=W[9];
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[41];
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
-W[10]+=W[3];
-W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
-Vals[4]+=W[10];
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[42];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
-W[11]+=W[4];
-W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
-Vals[1]+=W[11];
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[43];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
-W[12]+=W[5];
-W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
-Vals[0]+=W[12];
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[44];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
-W[13]+=W[6];
-W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
-Vals[6]+=W[13];
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[45];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
-W[14]+=W[7];
-W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
-Vals[7]+=W[14];
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[46];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
-W[15]+=W[8];
-W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
-Vals[5]+=W[15];
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[47];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
-W[0]+=W[9];
-W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
-Vals[2]+=W[0];
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=K[48];
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
-W[1]+=W[10];
-W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
-Vals[3]+=W[1];
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[49];
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
-W[2]+=W[11];
-W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
-Vals[4]+=W[2];
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[50];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
-W[3]+=W[12];
-W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
-Vals[1]+=W[3];
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[51];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
-W[4]+=W[13];
-W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
-Vals[0]+=W[4];
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[52];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
-W[5]+=W[14];
-W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
-Vals[6]+=W[5];
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[53];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
-W[6]+=W[15];
-W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
-Vals[7]+=W[6];
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[54];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
-W[7]+=W[0];
-W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
-Vals[5]+=W[7];
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[55];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
-W[8]+=W[1];
-W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
-Vals[2]+=W[8];
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=K[56];
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
-W[9]+=W[2];
-W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
-Vals[3]+=W[9];
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[57];
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
-W[10]+=W[3];
-W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
-Vals[4]+=W[10];
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[58];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
-W[11]+=W[4];
-W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
-Vals[1]+=W[11];
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[59];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
-W[12]+=W[5];
-W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
-Vals[0]+=W[12];
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[60];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
-W[13]+=W[6];
-W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
-Vals[6]+=W[13];
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[61];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-Vals[7]+=W[14];
-Vals[7]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
-Vals[7]+=W[7];
-Vals[7]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[62];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-Vals[5]+=W[15];
-Vals[5]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
-Vals[5]+=W[8];
-Vals[5]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[63];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-Vals[5]+=state0;
-
-W[7]=state7;
-W[7]+=Vals[2];
-
-Vals[2]=xf377ed68U;
-Vals[2]+=Vals[5];
-W[0]=Vals[5];
-Vals[5]=x6a09e667U;
-
-W[3]=state3;
-W[3]+=Vals[0];
-
-Vals[0]=xa54ff53aU;
-Vals[0]+=Vals[2];
-Vals[2]+=x08909ae5U;
-
-W[6]=state6;
-W[6]+=Vals[3];
-
-Vals[3]=x90bb1e3cU;
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=(x9b05688cU^(Vals[0]&xca0b3af3U));
-
-Vals[7]+=state1;
-Vals[3]+=Vals[7];
-W[1]=Vals[7];
-Vals[7]=xbb67ae85U;
-
-W[2]=state2;
-W[2]+=Vals[6];
-
-Vals[6]=x3c6ef372U;
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma2(Vals[7],Vals[2],Vals[5]);
-
-W[5]=state5;
-W[5]+=Vals[4];
-
-Vals[4]=x50c6645bU;
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],x510e527fU);
-Vals[4]+=W[2];
-
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma2(Vals[5],Vals[3],Vals[2]);
-
-W[4]=state4;
-W[4]+=Vals[1];
-
-Vals[1]=x3ac42e24U;
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=W[3];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[4];
-Vals[0]+=W[4];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[5];
-Vals[6]+=W[5];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[6];
-Vals[7]+=W[6];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[7];
-Vals[5]+=W[7];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=x5807aa98U;
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[9];
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[10];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[11];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[12];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[13];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[14];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=xc19bf274U;
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
-Vals[2]+=W[0];
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=K[16];
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
-W[1]+=x00a00000U;
-Vals[3]+=W[1];
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[17];
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
-W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
-Vals[4]+=W[2];
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[18];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
-W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
-Vals[1]+=W[3];
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[19];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
-W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
-Vals[0]+=W[4];
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[20];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
-W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
-Vals[6]+=W[5];
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[21];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
-W[6]+=x00000100U;
-W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
-Vals[7]+=W[6];
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[22];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-W[7]+=x11002000U;
-W[7]+=W[0];
-W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
-Vals[5]+=W[7];
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[23];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-W[8]=x80000000U;
-W[8]+=W[1];
-W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
-Vals[2]+=W[8];
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=K[24];
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-W[9]=W[2];
-W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
-Vals[3]+=W[9];
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[25];
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-W[10]=W[3];
-W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
-Vals[4]+=W[10];
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[26];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-W[11]=W[4];
-W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
-Vals[1]+=W[11];
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[27];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-W[12]=W[5];
-W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
-Vals[0]+=W[12];
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[28];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-W[13]=W[6];
-W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
-Vals[6]+=W[13];
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[29];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-W[14]=x00400022U;
-W[14]+=W[7];
-W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
-Vals[7]+=W[14];
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[30];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-W[15]=x00000100U;
-W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
-W[15]+=W[8];
-W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
-Vals[5]+=W[15];
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[31];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
-W[0]+=W[9];
-W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
-Vals[2]+=W[0];
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=K[32];
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
-W[1]+=W[10];
-W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
-Vals[3]+=W[1];
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[33];
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
-W[2]+=W[11];
-W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
-Vals[4]+=W[2];
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[34];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
-W[3]+=W[12];
-W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
-Vals[1]+=W[3];
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[35];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
-W[4]+=W[13];
-W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
-Vals[0]+=W[4];
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[36];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
-W[5]+=W[14];
-W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
-Vals[6]+=W[5];
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[37];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
-W[6]+=W[15];
-W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
-Vals[7]+=W[6];
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[38];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
-W[7]+=W[0];
-W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
-Vals[5]+=W[7];
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[39];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
-W[8]+=W[1];
-W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
-Vals[2]+=W[8];
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=K[40];
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
-W[9]+=W[2];
-W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
-Vals[3]+=W[9];
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[41];
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
-W[10]+=W[3];
-W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
-Vals[4]+=W[10];
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[42];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-W[11]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
-W[11]+=W[4];
-W[11]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
-Vals[1]+=W[11];
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[43];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-W[12]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
-W[12]+=W[5];
-W[12]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
-Vals[0]+=W[12];
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[44];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-W[13]+=(rotr(W[14],7)^rotr(W[14],18)^(W[14]>>3U));
-W[13]+=W[6];
-W[13]+=(rotr(W[11],17)^rotr(W[11],19)^(W[11]>>10U));
-Vals[6]+=W[13];
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[45];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-W[14]+=(rotr(W[15],7)^rotr(W[15],18)^(W[15]>>3U));
-W[14]+=W[7];
-W[14]+=(rotr(W[12],17)^rotr(W[12],19)^(W[12]>>10U));
-Vals[7]+=W[14];
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[46];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-W[15]+=(rotr(W[0],7)^rotr(W[0],18)^(W[0]>>3U));
-W[15]+=W[8];
-W[15]+=(rotr(W[13],17)^rotr(W[13],19)^(W[13]>>10U));
-Vals[5]+=W[15];
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[47];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-W[0]+=(rotr(W[1],7)^rotr(W[1],18)^(W[1]>>3U));
-W[0]+=W[9];
-W[0]+=(rotr(W[14],17)^rotr(W[14],19)^(W[14]>>10U));
-Vals[2]+=W[0];
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=K[48];
-Vals[0]+=Vals[2];
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-
-W[1]+=(rotr(W[2],7)^rotr(W[2],18)^(W[2]>>3U));
-W[1]+=W[10];
-W[1]+=(rotr(W[15],17)^rotr(W[15],19)^(W[15]>>10U));
-Vals[3]+=W[1];
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[49];
-Vals[6]+=Vals[3];
-Vals[3]+=(rotr(Vals[2],2)^rotr(Vals[2],13)^rotr(Vals[2],22));
-Vals[3]+=Ma(Vals[7],Vals[2],Vals[5]);
-
-W[2]+=(rotr(W[3],7)^rotr(W[3],18)^(W[3]>>3U));
-W[2]+=W[11];
-W[2]+=(rotr(W[0],17)^rotr(W[0],19)^(W[0]>>10U));
-Vals[4]+=W[2];
-Vals[4]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
-Vals[4]+=ch(Vals[6],Vals[0],Vals[1]);
-Vals[4]+=K[50];
-Vals[7]+=Vals[4];
-Vals[4]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[4]+=Ma(Vals[5],Vals[3],Vals[2]);
-
-W[3]+=(rotr(W[4],7)^rotr(W[4],18)^(W[4]>>3U));
-W[3]+=W[12];
-W[3]+=(rotr(W[1],17)^rotr(W[1],19)^(W[1]>>10U));
-Vals[1]+=W[3];
-Vals[1]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
-Vals[1]+=ch(Vals[7],Vals[6],Vals[0]);
-Vals[1]+=K[51];
-Vals[5]+=Vals[1];
-Vals[1]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[1]+=Ma(Vals[2],Vals[4],Vals[3]);
-
-W[4]+=(rotr(W[5],7)^rotr(W[5],18)^(W[5]>>3U));
-W[4]+=W[13];
-W[4]+=(rotr(W[2],17)^rotr(W[2],19)^(W[2]>>10U));
-Vals[0]+=W[4];
-Vals[0]+=(rotr(Vals[5],6)^rotr(Vals[5],11)^rotr(Vals[5],25));
-Vals[0]+=ch(Vals[5],Vals[7],Vals[6]);
-Vals[0]+=K[52];
-Vals[2]+=Vals[0];
-Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
-Vals[0]+=Ma(Vals[3],Vals[1],Vals[4]);
-
-W[5]+=(rotr(W[6],7)^rotr(W[6],18)^(W[6]>>3U));
-W[5]+=W[14];
-W[5]+=(rotr(W[3],17)^rotr(W[3],19)^(W[3]>>10U));
-Vals[6]+=W[5];
-Vals[6]+=(rotr(Vals[2],6)^rotr(Vals[2],11)^rotr(Vals[2],25));
-Vals[6]+=ch(Vals[2],Vals[5],Vals[7]);
-Vals[6]+=K[53];
-Vals[3]+=Vals[6];
-Vals[6]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
-Vals[6]+=Ma(Vals[4],Vals[0],Vals[1]);
-
-W[6]+=(rotr(W[7],7)^rotr(W[7],18)^(W[7]>>3U));
-W[6]+=W[15];
-W[6]+=(rotr(W[4],17)^rotr(W[4],19)^(W[4]>>10U));
-Vals[7]+=W[6];
-Vals[7]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[7]+=ch(Vals[3],Vals[2],Vals[5]);
-Vals[7]+=K[54];
-Vals[4]+=Vals[7];
-Vals[7]+=(rotr(Vals[6],2)^rotr(Vals[6],13)^rotr(Vals[6],22));
-Vals[7]+=Ma(Vals[1],Vals[6],Vals[0]);
-
-W[7]+=(rotr(W[8],7)^rotr(W[8],18)^(W[8]>>3U));
-W[7]+=W[0];
-W[7]+=(rotr(W[5],17)^rotr(W[5],19)^(W[5]>>10U));
-Vals[5]+=W[7];
-Vals[5]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[5]+=ch(Vals[4],Vals[3],Vals[2]);
-Vals[5]+=K[55];
-Vals[1]+=Vals[5];
-Vals[5]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));
-Vals[5]+=Ma(Vals[0],Vals[7],Vals[6]);
-
-W[8]+=(rotr(W[9],7)^rotr(W[9],18)^(W[9]>>3U));
-W[8]+=W[1];
-W[8]+=(rotr(W[6],17)^rotr(W[6],19)^(W[6]>>10U));
-Vals[2]+=W[8];
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-Vals[2]+=K[56];
-Vals[0]+=Vals[2];
-
-W[9]+=(rotr(W[10],7)^rotr(W[10],18)^(W[10]>>3U));
-W[9]+=W[2];
-W[9]+=(rotr(W[7],17)^rotr(W[7],19)^(W[7]>>10U));
-Vals[3]+=W[9];
-Vals[3]+=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=ch(Vals[0],Vals[1],Vals[4]);
-Vals[3]+=K[57];
-Vals[3]+=Vals[6];
-
-W[10]+=(rotr(W[11],7)^rotr(W[11],18)^(W[11]>>3U));
-W[10]+=W[3];
-W[10]+=(rotr(W[8],17)^rotr(W[8],19)^(W[8]>>10U));
-Vals[4]+=W[10];
-Vals[4]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
-Vals[4]+=ch(Vals[3],Vals[0],Vals[1]);
-Vals[4]+=K[58];
-Vals[4]+=Vals[7];
-Vals[1]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
-Vals[1]+=ch(Vals[4],Vals[3],Vals[0]);
-Vals[1]+=W[11];
-Vals[1]+=(rotr(W[12],7)^rotr(W[12],18)^(W[12]>>3U));
-Vals[1]+=W[4];
-Vals[1]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
-Vals[1]+=K[59];
-Vals[1]+=Vals[5];
-
-Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
-Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
-Vals[2]+=W[12];
-Vals[2]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
-Vals[2]+=W[5];
-Vals[2]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
-Vals[2]+=Vals[0];
-Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
-Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
-
-#define FOUND (0x0F)
-#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-
-#if defined(VECTORS2) || defined(VECTORS4)
-	if (any(Vals[2] == x136032edU)) {
-		if (Vals[2].x == x136032edU)
-			SETFOUND(nonce.x);
-		if (Vals[2].y == x136032edU)
-			SETFOUND(nonce.y);
-#if defined(VECTORS4)
-		if (Vals[2].z == x136032edU)
-			SETFOUND(nonce.z);
-		if (Vals[2].w == x136032edU)
-			SETFOUND(nonce.w);
-#endif
-	}
-#else
-	if (Vals[2] == x136032edU)
-		SETFOUND(nonce);
-#endif
-}

+ 0 - 853
scrypt130511.cl

@@ -1,853 +0,0 @@
-/*-
- * Copyright 2009 Colin Percival, 2011 ArtForz, 2011 pooler, 2012 mtrlt,
- * 2012-2013 Con Kolivas.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * This file was originally written by Colin Percival as part of the Tarsnap
- * online backup system.
- */
-
-__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
-__constant uint K[] = {
-	0x428a2f98U,
-	0x71374491U,
-	0xb5c0fbcfU,
-	0xe9b5dba5U,
-	0x3956c25bU,
-	0x59f111f1U,
-	0x923f82a4U,
-	0xab1c5ed5U,
-	0xd807aa98U,
-	0x12835b01U,
-	0x243185beU, // 10
-	0x550c7dc3U,
-	0x72be5d74U,
-	0x80deb1feU,
-	0x9bdc06a7U,
-	0xe49b69c1U,
-	0xefbe4786U,
-	0x0fc19dc6U,
-	0x240ca1ccU,
-	0x2de92c6fU,
-	0x4a7484aaU, // 20
-	0x5cb0a9dcU,
-	0x76f988daU,
-	0x983e5152U,
-	0xa831c66dU,
-	0xb00327c8U,
-	0xbf597fc7U,
-	0xc6e00bf3U,
-	0xd5a79147U,
-	0x06ca6351U,
-	0x14292967U, // 30
-	0x27b70a85U,
-	0x2e1b2138U,
-	0x4d2c6dfcU,
-	0x53380d13U,
-	0x650a7354U,
-	0x766a0abbU,
-	0x81c2c92eU,
-	0x92722c85U,
-	0xa2bfe8a1U,
-	0xa81a664bU, // 40
-	0xc24b8b70U,
-	0xc76c51a3U,
-	0xd192e819U,
-	0xd6990624U,
-	0xf40e3585U,
-	0x106aa070U,
-	0x19a4c116U,
-	0x1e376c08U,
-	0x2748774cU,
-	0x34b0bcb5U, // 50
-	0x391c0cb3U,
-	0x4ed8aa4aU,
-	0x5b9cca4fU,
-	0x682e6ff3U,
-	0x748f82eeU,
-	0x78a5636fU,
-	0x84c87814U,
-	0x8cc70208U,
-	0x90befffaU,
-	0xa4506cebU, // 60
-	0xbef9a3f7U,
-	0xc67178f2U,
-	0x98c7e2a2U,
-	0xfc08884dU,
-	0xcd2a11aeU,
-	0x510e527fU,
-	0x9b05688cU,
-	0xC3910C8EU,
-	0xfb6feee7U,
-	0x2a01a605U, // 70
-	0x0c2e12e0U,
-	0x4498517BU,
-	0x6a09e667U,
-	0xa4ce148bU,
-	0x95F61999U,
-	0xc19bf174U,
-	0xBB67AE85U,
-	0x3C6EF372U,
-	0xA54FF53AU,
-	0x1F83D9ABU, // 80
-	0x5BE0CD19U,
-	0x5C5C5C5CU,
-	0x36363636U,
-	0x80000000U,
-	0x000003FFU,
-	0x00000280U,
-	0x000004a0U,
-	0x00000300U
-};
-
-#define rotl(x,y) rotate(x,y)
-#define Ch(x,y,z) bitselect(z,y,x)
-#define Maj(x,y,z) Ch((x^z),y,z)
-
-#define EndianSwap(n) (rotl(n & ES[0], 24U)|rotl(n & ES[1], 8U))
-
-#define Tr2(x)		(rotl(x, 30U) ^ rotl(x, 19U) ^ rotl(x, 10U))
-#define Tr1(x)		(rotl(x, 26U) ^ rotl(x, 21U) ^ rotl(x, 7U))
-#define Wr2(x)		(rotl(x, 25U) ^ rotl(x, 14U) ^ (x>>3U))
-#define Wr1(x)		(rotl(x, 15U) ^ rotl(x, 13U) ^ (x>>10U))
-
-#define RND(a, b, c, d, e, f, g, h, k)	\
-	h += Tr1(e); 			\
-	h += Ch(e, f, g); 		\
-	h += k;				\
-	d += h;				\
-	h += Tr2(a); 			\
-	h += Maj(a, b, c);
-
-void SHA256(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3)
-{
-	uint4 S0 = *state0;
-	uint4 S1 = *state1;
-	
-#define A S0.x
-#define B S0.y
-#define C S0.z
-#define D S0.w
-#define E S1.x
-#define F S1.y
-#define G S1.z
-#define H S1.w
-
-	uint4 W[4];
-
-	W[ 0].x = block0.x;
-	RND(A,B,C,D,E,F,G,H, W[0].x+ K[0]);
-	W[ 0].y = block0.y;
-	RND(H,A,B,C,D,E,F,G, W[0].y+ K[1]);
-	W[ 0].z = block0.z;
-	RND(G,H,A,B,C,D,E,F, W[0].z+ K[2]);
-	W[ 0].w = block0.w;
-	RND(F,G,H,A,B,C,D,E, W[0].w+ K[3]);
-
-	W[ 1].x = block1.x;
-	RND(E,F,G,H,A,B,C,D, W[1].x+ K[4]);
-	W[ 1].y = block1.y;
-	RND(D,E,F,G,H,A,B,C, W[1].y+ K[5]);
-	W[ 1].z = block1.z;
-	RND(C,D,E,F,G,H,A,B, W[1].z+ K[6]);
-	W[ 1].w = block1.w;
-	RND(B,C,D,E,F,G,H,A, W[1].w+ K[7]);
-
-	W[ 2].x = block2.x;
-	RND(A,B,C,D,E,F,G,H, W[2].x+ K[8]);
-	W[ 2].y = block2.y;
-	RND(H,A,B,C,D,E,F,G, W[2].y+ K[9]);
-	W[ 2].z = block2.z;
-	RND(G,H,A,B,C,D,E,F, W[2].z+ K[10]);
-	W[ 2].w = block2.w;
-	RND(F,G,H,A,B,C,D,E, W[2].w+ K[11]);
-
-	W[ 3].x = block3.x;
-	RND(E,F,G,H,A,B,C,D, W[3].x+ K[12]);
-	W[ 3].y = block3.y;
-	RND(D,E,F,G,H,A,B,C, W[3].y+ K[13]);
-	W[ 3].z = block3.z;
-	RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]);
-	W[ 3].w = block3.w;
-	RND(B,C,D,E,F,G,H,A, W[3].w+ K[76]);
-
-	W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y);
-	RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]);
-
-	W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z);
-	RND(H,A,B,C,D,E,F,G, W[0].y+ K[16]);
-
-	W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w);
-	RND(G,H,A,B,C,D,E,F, W[0].z+ K[17]);
-
-	W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x);
-	RND(F,G,H,A,B,C,D,E, W[0].w+ K[18]);
-
-	W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y);
-	RND(E,F,G,H,A,B,C,D, W[1].x+ K[19]);
-
-	W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z);
-	RND(D,E,F,G,H,A,B,C, W[1].y+ K[20]);
-
-	W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w);
-	RND(C,D,E,F,G,H,A,B, W[1].z+ K[21]);
-
-	W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x);
-	RND(B,C,D,E,F,G,H,A, W[1].w+ K[22]);
-
-	W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y);
-	RND(A,B,C,D,E,F,G,H, W[2].x+ K[23]);
-
-	W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z);
-	RND(H,A,B,C,D,E,F,G, W[2].y+ K[24]);
-
-	W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w);
-	RND(G,H,A,B,C,D,E,F, W[2].z+ K[25]);
-
-	W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x);
-	RND(F,G,H,A,B,C,D,E, W[2].w+ K[26]);
-
-	W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y);
-	RND(E,F,G,H,A,B,C,D, W[3].x+ K[27]);
-
-	W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z);
-	RND(D,E,F,G,H,A,B,C, W[3].y+ K[28]);
-
-	W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w);
-	RND(C,D,E,F,G,H,A,B, W[3].z+ K[29]);
-
-	W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x);
-	RND(B,C,D,E,F,G,H,A, W[3].w+ K[30]);
-
-	W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y);
-	RND(A,B,C,D,E,F,G,H, W[0].x+ K[31]);
-
-	W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z);
-	RND(H,A,B,C,D,E,F,G, W[0].y+ K[32]);
-
-	W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w);
-	RND(G,H,A,B,C,D,E,F, W[0].z+ K[33]);
-
-	W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x);
-	RND(F,G,H,A,B,C,D,E, W[0].w+ K[34]);
-
-	W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y);
-	RND(E,F,G,H,A,B,C,D, W[1].x+ K[35]);
-
-	W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z);
-	RND(D,E,F,G,H,A,B,C, W[1].y+ K[36]);
-
-	W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w);
-	RND(C,D,E,F,G,H,A,B, W[1].z+ K[37]);
-
-	W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x);
-	RND(B,C,D,E,F,G,H,A, W[1].w+ K[38]);
-
-	W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y);
-	RND(A,B,C,D,E,F,G,H, W[2].x+ K[39]);
-
-	W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z);
-	RND(H,A,B,C,D,E,F,G, W[2].y+ K[40]);
-
-	W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w);
-	RND(G,H,A,B,C,D,E,F, W[2].z+ K[41]);
-
-	W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x);
-	RND(F,G,H,A,B,C,D,E, W[2].w+ K[42]);
-
-	W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y);
-	RND(E,F,G,H,A,B,C,D, W[3].x+ K[43]);
-
-	W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z);
-	RND(D,E,F,G,H,A,B,C, W[3].y+ K[44]);
-
-	W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w);
-	RND(C,D,E,F,G,H,A,B, W[3].z+ K[45]);
-
-	W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x);
-	RND(B,C,D,E,F,G,H,A, W[3].w+ K[46]);
-
-	W[ 0].x += Wr1(W[ 3].z) + W[ 2].y + Wr2(W[ 0].y);
-	RND(A,B,C,D,E,F,G,H, W[0].x+ K[47]);
-
-	W[ 0].y += Wr1(W[ 3].w) + W[ 2].z + Wr2(W[ 0].z);
-	RND(H,A,B,C,D,E,F,G, W[0].y+ K[48]);
-
-	W[ 0].z += Wr1(W[ 0].x) + W[ 2].w + Wr2(W[ 0].w);
-	RND(G,H,A,B,C,D,E,F, W[0].z+ K[49]);
-
-	W[ 0].w += Wr1(W[ 0].y) + W[ 3].x + Wr2(W[ 1].x);
-	RND(F,G,H,A,B,C,D,E, W[0].w+ K[50]);
-
-	W[ 1].x += Wr1(W[ 0].z) + W[ 3].y + Wr2(W[ 1].y);
-	RND(E,F,G,H,A,B,C,D, W[1].x+ K[51]);
-
-	W[ 1].y += Wr1(W[ 0].w) + W[ 3].z + Wr2(W[ 1].z);
-	RND(D,E,F,G,H,A,B,C, W[1].y+ K[52]);
-
-	W[ 1].z += Wr1(W[ 1].x) + W[ 3].w + Wr2(W[ 1].w);
-	RND(C,D,E,F,G,H,A,B, W[1].z+ K[53]);
-
-	W[ 1].w += Wr1(W[ 1].y) + W[ 0].x + Wr2(W[ 2].x);
-	RND(B,C,D,E,F,G,H,A, W[1].w+ K[54]);
-
-	W[ 2].x += Wr1(W[ 1].z) + W[ 0].y + Wr2(W[ 2].y);
-	RND(A,B,C,D,E,F,G,H, W[2].x+ K[55]);
-
-	W[ 2].y += Wr1(W[ 1].w) + W[ 0].z + Wr2(W[ 2].z);
-	RND(H,A,B,C,D,E,F,G, W[2].y+ K[56]);
-
-	W[ 2].z += Wr1(W[ 2].x) + W[ 0].w + Wr2(W[ 2].w);
-	RND(G,H,A,B,C,D,E,F, W[2].z+ K[57]);
-
-	W[ 2].w += Wr1(W[ 2].y) + W[ 1].x + Wr2(W[ 3].x);
-	RND(F,G,H,A,B,C,D,E, W[2].w+ K[58]);
-
-	W[ 3].x += Wr1(W[ 2].z) + W[ 1].y + Wr2(W[ 3].y);
-	RND(E,F,G,H,A,B,C,D, W[3].x+ K[59]);
-
-	W[ 3].y += Wr1(W[ 2].w) + W[ 1].z + Wr2(W[ 3].z);
-	RND(D,E,F,G,H,A,B,C, W[3].y+ K[60]);
-
-	W[ 3].z += Wr1(W[ 3].x) + W[ 1].w + Wr2(W[ 3].w);
-	RND(C,D,E,F,G,H,A,B, W[3].z+ K[61]);
-
-	W[ 3].w += Wr1(W[ 3].y) + W[ 2].x + Wr2(W[ 0].x);
-	RND(B,C,D,E,F,G,H,A, W[3].w+ K[62]);
-	
-#undef A
-#undef B
-#undef C
-#undef D
-#undef E
-#undef F
-#undef G
-#undef H
-
-	*state0 += S0;
-	*state1 += S1;
-}
-
-void SHA256_fresh(uint4*restrict state0,uint4*restrict state1, const uint4 block0, const uint4 block1, const uint4 block2, const uint4 block3)
-{
-#define A (*state0).x
-#define B (*state0).y
-#define C (*state0).z
-#define D (*state0).w
-#define E (*state1).x
-#define F (*state1).y
-#define G (*state1).z
-#define H (*state1).w
-
-	uint4 W[4];
-
-	W[0].x = block0.x;
-	D= K[63] +W[0].x;
-	H= K[64] +W[0].x;
-
-	W[0].y = block0.y;
-	C= K[65] +Tr1(D)+Ch(D, K[66], K[67])+W[0].y;
-	G= K[68] +C+Tr2(H)+Ch(H, K[69] ,K[70]);
-
-	W[0].z = block0.z;
-	B= K[71] +Tr1(C)+Ch(C,D,K[66])+W[0].z;
-	F= K[72] +B+Tr2(G)+Maj(G,H, K[73]);
-
-	W[0].w = block0.w;
-	A= K[74] +Tr1(B)+Ch(B,C,D)+W[0].w;
-	E= K[75] +A+Tr2(F)+Maj(F,G,H);
-
-	W[1].x = block1.x;
-	RND(E,F,G,H,A,B,C,D, W[1].x+ K[4]);
-	W[1].y = block1.y;
-	RND(D,E,F,G,H,A,B,C, W[1].y+ K[5]);
-	W[1].z = block1.z;
-	RND(C,D,E,F,G,H,A,B, W[1].z+ K[6]);
-	W[1].w = block1.w;
-	RND(B,C,D,E,F,G,H,A, W[1].w+ K[7]);
-	
-	W[2].x = block2.x;
-	RND(A,B,C,D,E,F,G,H, W[2].x+ K[8]);
-	W[2].y = block2.y;
-	RND(H,A,B,C,D,E,F,G, W[2].y+ K[9]);
-	W[2].z = block2.z;
-	RND(G,H,A,B,C,D,E,F, W[2].z+ K[10]);
-	W[2].w = block2.w;
-	RND(F,G,H,A,B,C,D,E, W[2].w+ K[11]);
-	
-	W[3].x = block3.x;
-	RND(E,F,G,H,A,B,C,D, W[3].x+ K[12]);
-	W[3].y = block3.y;
-	RND(D,E,F,G,H,A,B,C, W[3].y+ K[13]);
-	W[3].z = block3.z;
-	RND(C,D,E,F,G,H,A,B, W[3].z+ K[14]);
-	W[3].w = block3.w;
-	RND(B,C,D,E,F,G,H,A, W[3].w+ K[76]);
-
-	W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y);
-	RND(A,B,C,D,E,F,G,H, W[0].x+ K[15]);
-
-	W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z);
-	RND(H,A,B,C,D,E,F,G, W[0].y+ K[16]);
-
-	W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w);
-	RND(G,H,A,B,C,D,E,F, W[0].z+ K[17]);
-
-	W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x);
-	RND(F,G,H,A,B,C,D,E, W[0].w+ K[18]);
-
-	W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y);
-	RND(E,F,G,H,A,B,C,D, W[1].x+ K[19]);
-
-	W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z);
-	RND(D,E,F,G,H,A,B,C, W[1].y+ K[20]);
-
-	W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w);
-	RND(C,D,E,F,G,H,A,B, W[1].z+ K[21]);
-
-	W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x);
-	RND(B,C,D,E,F,G,H,A, W[1].w+ K[22]);
-
-	W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y);
-	RND(A,B,C,D,E,F,G,H, W[2].x+ K[23]);
-
-	W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z);
-	RND(H,A,B,C,D,E,F,G, W[2].y+ K[24]);
-
-	W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w);
-	RND(G,H,A,B,C,D,E,F, W[2].z+ K[25]);
-
-	W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x);
-	RND(F,G,H,A,B,C,D,E, W[2].w+ K[26]);
-
-	W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y);
-	RND(E,F,G,H,A,B,C,D, W[3].x+ K[27]);
-
-	W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z);
-	RND(D,E,F,G,H,A,B,C, W[3].y+ K[28]);
-
-	W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w);
-	RND(C,D,E,F,G,H,A,B, W[3].z+ K[29]);
-
-	W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x);
-	RND(B,C,D,E,F,G,H,A, W[3].w+ K[30]);
-
-	W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y);
-	RND(A,B,C,D,E,F,G,H, W[0].x+ K[31]);
-
-	W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z);
-	RND(H,A,B,C,D,E,F,G, W[0].y+ K[32]);
-
-	W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w);
-	RND(G,H,A,B,C,D,E,F, W[0].z+ K[33]);
-
-	W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x);
-	RND(F,G,H,A,B,C,D,E, W[0].w+ K[34]);
-
-	W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y);
-	RND(E,F,G,H,A,B,C,D, W[1].x+ K[35]);
-
-	W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z);
-	RND(D,E,F,G,H,A,B,C, W[1].y+ K[36]);
-
-	W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w);
-	RND(C,D,E,F,G,H,A,B, W[1].z+ K[37]);
-
-	W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x);
-	RND(B,C,D,E,F,G,H,A, W[1].w+ K[38]);
-
-	W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y);
-	RND(A,B,C,D,E,F,G,H, W[2].x+ K[39]);
-
-	W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z);
-	RND(H,A,B,C,D,E,F,G, W[2].y+ K[40]);
-
-	W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w);
-	RND(G,H,A,B,C,D,E,F, W[2].z+ K[41]);
-
-	W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x);
-	RND(F,G,H,A,B,C,D,E, W[2].w+ K[42]);
-
-	W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y);
-	RND(E,F,G,H,A,B,C,D, W[3].x+ K[43]);
-
-	W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z);
-	RND(D,E,F,G,H,A,B,C, W[3].y+ K[44]);
-
-	W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w);
-	RND(C,D,E,F,G,H,A,B, W[3].z+ K[45]);
-
-	W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x);
-	RND(B,C,D,E,F,G,H,A, W[3].w+ K[46]);
-
-	W[0].x += Wr1(W[3].z) + W[2].y + Wr2(W[0].y);
-	RND(A,B,C,D,E,F,G,H, W[0].x+ K[47]);
-
-	W[0].y += Wr1(W[3].w) + W[2].z + Wr2(W[0].z);
-	RND(H,A,B,C,D,E,F,G, W[0].y+ K[48]);
-
-	W[0].z += Wr1(W[0].x) + W[2].w + Wr2(W[0].w);
-	RND(G,H,A,B,C,D,E,F, W[0].z+ K[49]);
-
-	W[0].w += Wr1(W[0].y) + W[3].x + Wr2(W[1].x);
-	RND(F,G,H,A,B,C,D,E, W[0].w+ K[50]);
-
-	W[1].x += Wr1(W[0].z) + W[3].y + Wr2(W[1].y);
-	RND(E,F,G,H,A,B,C,D, W[1].x+ K[51]);
-
-	W[1].y += Wr1(W[0].w) + W[3].z + Wr2(W[1].z);
-	RND(D,E,F,G,H,A,B,C, W[1].y+ K[52]);
-
-	W[1].z += Wr1(W[1].x) + W[3].w + Wr2(W[1].w);
-	RND(C,D,E,F,G,H,A,B, W[1].z+ K[53]);
-
-	W[1].w += Wr1(W[1].y) + W[0].x + Wr2(W[2].x);
-	RND(B,C,D,E,F,G,H,A, W[1].w+ K[54]);
-
-	W[2].x += Wr1(W[1].z) + W[0].y + Wr2(W[2].y);
-	RND(A,B,C,D,E,F,G,H, W[2].x+ K[55]);
-
-	W[2].y += Wr1(W[1].w) + W[0].z + Wr2(W[2].z);
-	RND(H,A,B,C,D,E,F,G, W[2].y+ K[56]);
-
-	W[2].z += Wr1(W[2].x) + W[0].w + Wr2(W[2].w);
-	RND(G,H,A,B,C,D,E,F, W[2].z+ K[57]);
-
-	W[2].w += Wr1(W[2].y) + W[1].x + Wr2(W[3].x);
-	RND(F,G,H,A,B,C,D,E, W[2].w+ K[58]);
-
-	W[3].x += Wr1(W[2].z) + W[1].y + Wr2(W[3].y);
-	RND(E,F,G,H,A,B,C,D, W[3].x+ K[59]);
-
-	W[3].y += Wr1(W[2].w) + W[1].z + Wr2(W[3].z);
-	RND(D,E,F,G,H,A,B,C, W[3].y+ K[60]);
-
-	W[3].z += Wr1(W[3].x) + W[1].w + Wr2(W[3].w);
-	RND(C,D,E,F,G,H,A,B, W[3].z+ K[61]);
-
-	W[3].w += Wr1(W[3].y) + W[2].x + Wr2(W[0].x);
-	RND(B,C,D,E,F,G,H,A, W[3].w+ K[62]);
-	
-#undef A
-#undef B
-#undef C
-#undef D
-#undef E
-#undef F
-#undef G
-#undef H
-
-	*state0 += (uint4)(K[73], K[77], K[78], K[79]);
-	*state1 += (uint4)(K[66], K[67], K[80], K[81]);
-}
-
-__constant uint fixedW[64] =
-{
-	0x428a2f99,0xf1374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
-	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf794,
-	0xf59b89c2,0x73924787,0x23c6886e,0xa42ca65c,0x15ed3627,0x4d6edcbf,0xe28217fc,0xef02488f,
-	0xb707775c,0x0468c23f,0xe7e72b4c,0x49e1f1a2,0x4b99c816,0x926d1570,0xaa0fc072,0xadb36e2c,
-	0xad87a3ea,0xbcb1d3a3,0x7b993186,0x562b9420,0xbff3ca0c,0xda4b0c23,0x6cd8711a,0x8f337caa,
-	0xc91b1417,0xc359dce1,0xa83253a7,0x3b13c12d,0x9d3d725d,0xd9031a84,0xb1a03340,0x16f58012,
-	0xe64fb6a2,0xe84d923a,0xe93a5730,0x09837686,0x078ff753,0x29833341,0xd5de0b7e,0x6948ccf4,
-	0xe0a1adbe,0x7c728e11,0x511c78e4,0x315b45bd,0xfca71413,0xea28f96a,0x79703128,0x4e1ef848,
-};
-
-void SHA256_fixed(uint4*restrict state0,uint4*restrict state1)
-{
-	uint4 S0 = *state0;
-	uint4 S1 = *state1;
-
-#define A S0.x
-#define B S0.y
-#define C S0.z
-#define D S0.w
-#define E S1.x
-#define F S1.y
-#define G S1.z
-#define H S1.w
-
-	RND(A,B,C,D,E,F,G,H, fixedW[0]);
-	RND(H,A,B,C,D,E,F,G, fixedW[1]);
-	RND(G,H,A,B,C,D,E,F, fixedW[2]);
-	RND(F,G,H,A,B,C,D,E, fixedW[3]);
-	RND(E,F,G,H,A,B,C,D, fixedW[4]);
-	RND(D,E,F,G,H,A,B,C, fixedW[5]);
-	RND(C,D,E,F,G,H,A,B, fixedW[6]);
-	RND(B,C,D,E,F,G,H,A, fixedW[7]);
-	RND(A,B,C,D,E,F,G,H, fixedW[8]);
-	RND(H,A,B,C,D,E,F,G, fixedW[9]);
-	RND(G,H,A,B,C,D,E,F, fixedW[10]);
-	RND(F,G,H,A,B,C,D,E, fixedW[11]);
-	RND(E,F,G,H,A,B,C,D, fixedW[12]);
-	RND(D,E,F,G,H,A,B,C, fixedW[13]);
-	RND(C,D,E,F,G,H,A,B, fixedW[14]);
-	RND(B,C,D,E,F,G,H,A, fixedW[15]);
-	RND(A,B,C,D,E,F,G,H, fixedW[16]);
-	RND(H,A,B,C,D,E,F,G, fixedW[17]);
-	RND(G,H,A,B,C,D,E,F, fixedW[18]);
-	RND(F,G,H,A,B,C,D,E, fixedW[19]);
-	RND(E,F,G,H,A,B,C,D, fixedW[20]);
-	RND(D,E,F,G,H,A,B,C, fixedW[21]);
-	RND(C,D,E,F,G,H,A,B, fixedW[22]);
-	RND(B,C,D,E,F,G,H,A, fixedW[23]);
-	RND(A,B,C,D,E,F,G,H, fixedW[24]);
-	RND(H,A,B,C,D,E,F,G, fixedW[25]);
-	RND(G,H,A,B,C,D,E,F, fixedW[26]);
-	RND(F,G,H,A,B,C,D,E, fixedW[27]);
-	RND(E,F,G,H,A,B,C,D, fixedW[28]);
-	RND(D,E,F,G,H,A,B,C, fixedW[29]);
-	RND(C,D,E,F,G,H,A,B, fixedW[30]);
-	RND(B,C,D,E,F,G,H,A, fixedW[31]);
-	RND(A,B,C,D,E,F,G,H, fixedW[32]);
-	RND(H,A,B,C,D,E,F,G, fixedW[33]);
-	RND(G,H,A,B,C,D,E,F, fixedW[34]);
-	RND(F,G,H,A,B,C,D,E, fixedW[35]);
-	RND(E,F,G,H,A,B,C,D, fixedW[36]);
-	RND(D,E,F,G,H,A,B,C, fixedW[37]);
-	RND(C,D,E,F,G,H,A,B, fixedW[38]);
-	RND(B,C,D,E,F,G,H,A, fixedW[39]);
-	RND(A,B,C,D,E,F,G,H, fixedW[40]);
-	RND(H,A,B,C,D,E,F,G, fixedW[41]);
-	RND(G,H,A,B,C,D,E,F, fixedW[42]);
-	RND(F,G,H,A,B,C,D,E, fixedW[43]);
-	RND(E,F,G,H,A,B,C,D, fixedW[44]);
-	RND(D,E,F,G,H,A,B,C, fixedW[45]);
-	RND(C,D,E,F,G,H,A,B, fixedW[46]);
-	RND(B,C,D,E,F,G,H,A, fixedW[47]);
-	RND(A,B,C,D,E,F,G,H, fixedW[48]);
-	RND(H,A,B,C,D,E,F,G, fixedW[49]);
-	RND(G,H,A,B,C,D,E,F, fixedW[50]);
-	RND(F,G,H,A,B,C,D,E, fixedW[51]);
-	RND(E,F,G,H,A,B,C,D, fixedW[52]);
-	RND(D,E,F,G,H,A,B,C, fixedW[53]);
-	RND(C,D,E,F,G,H,A,B, fixedW[54]);
-	RND(B,C,D,E,F,G,H,A, fixedW[55]);
-	RND(A,B,C,D,E,F,G,H, fixedW[56]);
-	RND(H,A,B,C,D,E,F,G, fixedW[57]);
-	RND(G,H,A,B,C,D,E,F, fixedW[58]);
-	RND(F,G,H,A,B,C,D,E, fixedW[59]);
-	RND(E,F,G,H,A,B,C,D, fixedW[60]);
-	RND(D,E,F,G,H,A,B,C, fixedW[61]);
-	RND(C,D,E,F,G,H,A,B, fixedW[62]);
-	RND(B,C,D,E,F,G,H,A, fixedW[63]);
-	
-#undef A
-#undef B
-#undef C
-#undef D
-#undef E
-#undef F
-#undef G
-#undef H
-	*state0 += S0;
-	*state1 += S1;
-}
-
-void shittify(uint4 B[8])
-{
-	uint4 tmp[4];
-	tmp[0] = (uint4)(B[1].x,B[2].y,B[3].z,B[0].w);
-	tmp[1] = (uint4)(B[2].x,B[3].y,B[0].z,B[1].w);
-	tmp[2] = (uint4)(B[3].x,B[0].y,B[1].z,B[2].w);
-	tmp[3] = (uint4)(B[0].x,B[1].y,B[2].z,B[3].w);
-	
-#pragma unroll
-	for(uint i=0; i<4; ++i)
-		B[i] = EndianSwap(tmp[i]);
-
-	tmp[0] = (uint4)(B[5].x,B[6].y,B[7].z,B[4].w);
-	tmp[1] = (uint4)(B[6].x,B[7].y,B[4].z,B[5].w);
-	tmp[2] = (uint4)(B[7].x,B[4].y,B[5].z,B[6].w);
-	tmp[3] = (uint4)(B[4].x,B[5].y,B[6].z,B[7].w);
-	
-#pragma unroll
-	for(uint i=0; i<4; ++i)
-		B[i+4] = EndianSwap(tmp[i]);
-}
-
-void unshittify(uint4 B[8])
-{
-	uint4 tmp[4];
-	tmp[0] = (uint4)(B[3].x,B[2].y,B[1].z,B[0].w);
-	tmp[1] = (uint4)(B[0].x,B[3].y,B[2].z,B[1].w);
-	tmp[2] = (uint4)(B[1].x,B[0].y,B[3].z,B[2].w);
-	tmp[3] = (uint4)(B[2].x,B[1].y,B[0].z,B[3].w);
-	
-#pragma unroll
-	for(uint i=0; i<4; ++i)
-		B[i] = EndianSwap(tmp[i]);
-
-	tmp[0] = (uint4)(B[7].x,B[6].y,B[5].z,B[4].w);
-	tmp[1] = (uint4)(B[4].x,B[7].y,B[6].z,B[5].w);
-	tmp[2] = (uint4)(B[5].x,B[4].y,B[7].z,B[6].w);
-	tmp[3] = (uint4)(B[6].x,B[5].y,B[4].z,B[7].w);
-	
-#pragma unroll
-	for(uint i=0; i<4; ++i)
-		B[i+4] = EndianSwap(tmp[i]);
-}
-
-void salsa(uint4 B[8])
-{
-	uint4 w[4];
-
-#pragma unroll
-	for(uint i=0; i<4; ++i)
-		w[i] = (B[i]^=B[i+4]);
-
-#pragma unroll
-	for(uint i=0; i<4; ++i)
-	{
-		w[0] ^= rotl(w[3]     +w[2]     , 7U);
-		w[1] ^= rotl(w[0]     +w[3]     , 9U);
-		w[2] ^= rotl(w[1]     +w[0]     ,13U);
-		w[3] ^= rotl(w[2]     +w[1]     ,18U);
-		w[2] ^= rotl(w[3].wxyz+w[0].zwxy, 7U);
-		w[1] ^= rotl(w[2].wxyz+w[3].zwxy, 9U);
-		w[0] ^= rotl(w[1].wxyz+w[2].zwxy,13U);
-		w[3] ^= rotl(w[0].wxyz+w[1].zwxy,18U);
-	}
-
-#pragma unroll
-	for(uint i=0; i<4; ++i)
-		w[i] = (B[i+4]^=(B[i]+=w[i]));
-
-#pragma unroll
-	for(uint i=0; i<4; ++i)
-	{
-		w[0] ^= rotl(w[3]     +w[2]     , 7U);
-		w[1] ^= rotl(w[0]     +w[3]     , 9U);
-		w[2] ^= rotl(w[1]     +w[0]     ,13U);
-		w[3] ^= rotl(w[2]     +w[1]     ,18U);
-		w[2] ^= rotl(w[3].wxyz+w[0].zwxy, 7U);
-		w[1] ^= rotl(w[2].wxyz+w[3].zwxy, 9U);
-		w[0] ^= rotl(w[1].wxyz+w[2].zwxy,13U);
-		w[3] ^= rotl(w[0].wxyz+w[1].zwxy,18U);
-	}
-
-#pragma unroll
-	for(uint i=0; i<4; ++i)
-		B[i+4] += w[i];
-}
-
-#define Coord(x,y,z) x+y*(x ## SIZE)+z*(y ## SIZE)*(x ## SIZE)
-#define CO Coord(z,x,y)
-
-void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
-{
-	shittify(X);
-	const uint zSIZE = 8;
-	const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
-	const uint xSIZE = CONCURRENT_THREADS;
-	uint x = get_global_id(0)%xSIZE;
-
-	for(uint y=0; y<1024/LOOKUP_GAP; ++y)
-	{
-#pragma unroll
-		for(uint z=0; z<zSIZE; ++z)
-			lookup[CO] = X[z];
-		for(uint i=0; i<LOOKUP_GAP; ++i) 
-			salsa(X);
-	}
-#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
-	{
-		uint y = (1024/LOOKUP_GAP);
-#pragma unroll
-		for(uint z=0; z<zSIZE; ++z)
-			lookup[CO] = X[z];
-		for(uint i=0; i<1024%LOOKUP_GAP; ++i)
-			salsa(X); 
-	}
-#endif
-	for (uint i=0; i<1024; ++i) 
-	{
-		uint4 V[8];
-		uint j = X[7].x & K[85];
-		uint y = (j/LOOKUP_GAP);
-#pragma unroll
-		for(uint z=0; z<zSIZE; ++z)
-			V[z] = lookup[CO];
-
-#if (LOOKUP_GAP == 1)
-#elif (LOOKUP_GAP == 2)
-		if (j&1)
-			salsa(V);
-#else
-		uint val = j%LOOKUP_GAP;
-		for (uint z=0; z<val; ++z) 
-			salsa(V);
-#endif
-
-#pragma unroll
-		for(uint z=0; z<zSIZE; ++z)
-			X[z] ^= V[z];
-		salsa(X);
-	}
-	unshittify(X);
-}
-
-#define SCRYPT_FOUND (0xFF)
-#define SETFOUND(Xnonce) output[output[SCRYPT_FOUND]++] = Xnonce
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search(__global const uint4 * restrict input,
-volatile __global uint*restrict output, __global uint4*restrict padcache,
-const uint4 midstate0, const uint4 midstate16, const uint target)
-{
-	uint gid = get_global_id(0);
-	uint4 X[8];
-	uint4 tstate0, tstate1, ostate0, ostate1, tmp0, tmp1;
-	uint4 data = (uint4)(input[4].x,input[4].y,input[4].z,gid);
-	uint4 pad0 = midstate0, pad1 = midstate16;
-
-	SHA256(&pad0,&pad1, data, (uint4)(K[84],0,0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[86]));
-	SHA256_fresh(&ostate0,&ostate1, pad0^ K[82], pad1^ K[82], K[82], K[82]);
-	SHA256_fresh(&tstate0,&tstate1, pad0^ K[83], pad1^ K[83], K[83], K[83]);
-
-	tmp0 = tstate0;
-	tmp1 = tstate1;
-	SHA256(&tstate0, &tstate1, input[0],input[1],input[2],input[3]);
-
-#pragma unroll
-	for (uint i=0; i<4; i++) 
-	{
-		pad0 = tstate0;
-		pad1 = tstate1;
-		X[i*2 ] = ostate0;
-		X[i*2+1] = ostate1;
-
-		SHA256(&pad0,&pad1, data, (uint4)(i+1,K[84],0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[87]));
-		SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
-	}
-	scrypt_core(X,padcache);
-	SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]);
-	SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7]);
-	SHA256_fixed(&tmp0,&tmp1);
-	SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
-
-	bool result = (EndianSwap(ostate1.w) <= target);
-	if (result)
-		SETFOUND(gid);
-}