Use diablo kernel on all future SDKs for Tahiti and set preferred vector width to...
authorCon Kolivas <kernel@kolivas.org>
Thu, 23 Feb 2012 09:24:32 +0000 (20:24 +1100)
committerCon Kolivas <kernel@kolivas.org>
Thu, 23 Feb 2012 09:24:32 +0000 (20:24 +1100)
ocl.c

diff --git a/ocl.c b/ocl.c
index 1a355ef..e5eb6ec 100644 (file)
--- a/ocl.c
+++ b/ocl.c
@@ -332,32 +332,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
        }
        applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size);
 
-       /* For some reason 2 vectors is still better even if the card says
-        * otherwise, and many cards lie about their max so use 256 as max
-        * unless explicitly set on the command line. 79x0 cards perform
-        * better without vectors */
-       if (preferred_vwidth > 1) {
-               if (strstr(name, "Tahiti"))
-                       preferred_vwidth = 1;
-               else
-                       preferred_vwidth = 2;
-       }
-
-       if (gpus[gpu].vwidth)
-               clState->vwidth = gpus[gpu].vwidth;
-       else {
-               clState->vwidth = preferred_vwidth;
-               gpus[gpu].vwidth = preferred_vwidth;
-       }
-
-       if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size)
-               clState->wsize = gpus[gpu].work_size;
-       else if (strstr(name, "Tahiti"))
-               clState->wsize = 64;
-       else
-               clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
-       gpus[gpu].work_size = clState->wsize;
-
        /* Create binary filename based on parameters passed to opencl
         * compiler to ensure we only load a binary that matches what would
         * have otherwise created. The filename is:
@@ -378,9 +352,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
                                applog(LOG_INFO, "Selecting diablo kernel");
                                clState->chosen_kernel = KL_DIABLO;
                        }
-               } else if (strstr(vbuff, "898.1")) { // Windows 64 bit 12.2 driver
-                       applog(LOG_INFO, "Selecting diablo kernel");
-                       clState->chosen_kernel = KL_DIABLO;
+               } else if (strstr(vbuff, "898.1") || // Windows 64 bit 12.2 driver
+                          strstr(name, "Tahiti")) { // All non SDK 2.6 79x0
+                               applog(LOG_INFO, "Selecting diablo kernel");
+                               clState->chosen_kernel = KL_DIABLO;
                } else if (clState->hasBitAlign) {
                        applog(LOG_INFO, "Selecting phatk kernel");
                        clState->chosen_kernel = KL_PHATK;
@@ -393,10 +368,18 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
        } else
                clState->chosen_kernel = gpus[gpu].kernel;
 
+       /* For some reason 2 vectors is still better even if the card says
+        * otherwise, and many cards lie about their max so use 256 as max
+        * unless explicitly set on the command line. */
+       if (preferred_vwidth > 2)
+               preferred_vwidth = 2;
+
        switch (clState->chosen_kernel) {
                case KL_POCLBM:
                        strcpy(filename, POCLBM_KERNNAME".cl");
                        strcpy(binaryfilename, POCLBM_KERNNAME);
+                       /* This kernel prefers to not use vectors */
+                       preferred_vwidth = 1;
                        break;
                case KL_PHATK:
                        strcpy(filename, PHATK_KERNNAME".cl");
@@ -413,6 +396,21 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
                        break;
        }
 
+       if (gpus[gpu].vwidth)
+               clState->vwidth = gpus[gpu].vwidth;
+       else {
+               clState->vwidth = preferred_vwidth;
+               gpus[gpu].vwidth = preferred_vwidth;
+       }
+
+       if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size)
+               clState->wsize = gpus[gpu].work_size;
+       else if (strstr(name, "Tahiti"))
+               clState->wsize = 64;
+       else
+               clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
+       gpus[gpu].work_size = clState->wsize;
+
        FILE *binaryfile;
        size_t *binary_sizes;
        char **binaries;