diff options
-rw-r--r-- | include/clew/clew.h | 1 | ||||
-rw-r--r-- | include/opencl/openclwrapper.hxx | 1 | ||||
-rw-r--r-- | opencl/source/openclwrapper.cxx | 5 | ||||
-rw-r--r-- | sc/source/core/data/formulacell.cxx | 134 |
4 files changed, 123 insertions, 18 deletions
diff --git a/include/clew/clew.h b/include/clew/clew.h index 94b6c29d9262..e5cfaf0836be 100644 --- a/include/clew/clew.h +++ b/include/clew/clew.h @@ -416,6 +416,7 @@ typedef struct _cl_image_format { // cl_device_info
#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
#define CL_DEVICE_NAME 0x102B
diff --git a/include/opencl/openclwrapper.hxx b/include/opencl/openclwrapper.hxx index 04fe1e3725e7..fe677729460b 100644 --- a/include/opencl/openclwrapper.hxx +++ b/include/opencl/openclwrapper.hxx @@ -51,6 +51,7 @@ struct GPUEnv int mnCmdQueuePos; bool mnKhrFp64Flag; bool mnAmdFp64Flag; + cl_uint mnPreferredVectorWidthFloat; }; extern OPENCL_DLLPUBLIC GPUEnv gpuEnv; diff --git a/opencl/source/openclwrapper.cxx b/opencl/source/openclwrapper.cxx index ea0f3f864053..79aabba7f37b 100644 --- a/opencl/source/openclwrapper.cxx +++ b/opencl/source/openclwrapper.cxx @@ -501,6 +501,11 @@ bool initOpenCLRunEnv( GPUEnv *gpuInfo ) gpuInfo->mnKhrFp64Flag = bKhrFp64; gpuInfo->mnAmdFp64Flag = bAmdFp64; + gpuInfo->mnPreferredVectorWidthFloat = 0; + + clGetDeviceInfo(gpuInfo->mpArryDevsID[0], CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), + &gpuInfo->mnPreferredVectorWidthFloat, NULL); + return false; } diff --git a/sc/source/core/data/formulacell.cxx b/sc/source/core/data/formulacell.cxx index 27796ae7db49..14d3d44ba89d 100644 --- a/sc/source/core/data/formulacell.cxx +++ b/sc/source/core/data/formulacell.cxx @@ -20,6 +20,7 @@ #include <sal/config.h> #include <cassert> +#include <cstdlib> #include "formulacell.hxx" #include "grouptokenconverter.hxx" @@ -54,6 +55,7 @@ #include "types.hxx" #include "scopetools.hxx" #include "refupdatecontext.hxx" +#include <opencl/openclwrapper.hxx> #include <tokenstringcontext.hxx> #include <refhint.hxx> #include <listenerquery.hxx> @@ -3899,6 +3901,36 @@ ScFormulaCell::CompareState ScFormulaCell::CompareByTokenArray( ScFormulaCell& r return bInvariant ? EqualInvariant : EqualRelativeRef; } +namespace { + +// Split N into optimally equal-sized pieces, each not larger than K. +// Return value P is number of pieces. A returns the number of pieces +// one larger than N/P, 0..P-1. + +int splitup(int N, int K, int& A) +{ + assert(N > 0); + assert(K > 0); + + A = 0; + + if (N <= K) + return 1; + + const int ideal_num_parts = N / K; + if (ideal_num_parts * K == N) + return ideal_num_parts; + + const int num_parts = ideal_num_parts + 1; + const int nominal_part_size = N / num_parts; + + A = N - num_parts * nominal_part_size; + + return num_parts; +} + +} // anonymous namespace + bool ScFormulaCell::InterpretFormulaGroup() { if (!officecfg::Office::Common::Misc::UseOpenCL::get()) @@ -3934,28 +3966,94 @@ bool ScFormulaCell::InterpretFormulaGroup() if (mxGroup->mbInvariant && false) return InterpretInvariantFormulaGroup(); - ScTokenArray aCode; - ScGroupTokenConverter aConverter(aCode, *pDocument, *this, mxGroup->mpTopCell->aPos); - std::vector<ScTokenArray*> aLoopControl; - if (!aConverter.convert(*pCode, aLoopControl)) - { - SAL_INFO("sc.opencl", "conversion of group " << this << " failed, disabling"); - mxGroup->meCalcState = sc::GroupCalcDisabled; - return false; - } + int nMaxGroupLength = INT_MAX; + +#ifdef WNT + // Heuristic: Certain old low-end OpenCL implementations don't + // work for us with too large group lengths. 1000 was determined + // empirically to be a good compromise. Looking at the preferred + // float vector width seems to be a way to detect these devices. + if (opencl::gpuEnv.mnPreferredVectorWidthFloat == 4) + nMaxGroupLength = 1000; +#endif + + if (std::getenv("SC_MAX_GROUP_LENGTH")) + nMaxGroupLength = std::atoi(std::getenv("SC_MAX_GROUP_LENGTH")); + + int nNumOnePlus; + const int nNumParts = splitup(GetSharedLength(), nMaxGroupLength, nNumOnePlus); - // The converted code does not have RPN tokens yet. The interpreter will - // generate them. - mxGroup->meCalcState = sc::GroupCalcRunning; - sc::FormulaGroupInterpreter *pInterpreter = sc::FormulaGroupInterpreter::getStatic(); - if (pInterpreter == NULL || - !pInterpreter->interpret(*pDocument, mxGroup->mpTopCell->aPos, mxGroup, aCode)) + int nOffset = 0; + int nCurChunkSize; + ScAddress aOrigPos = mxGroup->mpTopCell->aPos; + for (int i = 0; i < nNumParts; i++, nOffset += nCurChunkSize) { - SAL_INFO("sc.opencl", "interpreting group " << mxGroup << " (state " << (int) mxGroup->meCalcState << ") failed, disabling"); - mxGroup->meCalcState = sc::GroupCalcDisabled; - return false; + nCurChunkSize = GetSharedLength()/nNumParts + (i < nNumOnePlus ? 1 : 0); + + ScFormulaCellGroupRef xGroup; + + if (nNumParts == 1) + xGroup = mxGroup; + else + { + // Ugly hack + xGroup = new ScFormulaCellGroup(); + xGroup->mpTopCell = mxGroup->mpTopCell; + xGroup->mpTopCell->aPos = aOrigPos; + xGroup->mpTopCell->aPos.IncRow(nOffset); + xGroup->mbInvariant = mxGroup->mbInvariant; + xGroup->mnLength = nCurChunkSize; + xGroup->mpCode = mxGroup->mpCode; + } + + ScTokenArray aCode; + ScGroupTokenConverter aConverter(aCode, *pDocument, *this, xGroup->mpTopCell->aPos); + std::vector<ScTokenArray*> aLoopControl; + if (!aConverter.convert(*pCode, aLoopControl)) + { + SAL_INFO("sc.opencl", "conversion of group " << this << " failed, disabling"); + mxGroup->meCalcState = sc::GroupCalcDisabled; + + // Undo the hack above + if (nNumParts > 1) + { + mxGroup->mpTopCell->aPos = aOrigPos; + xGroup->mpTopCell = NULL; + xGroup->mpCode = NULL; + } + + return false; + } + + // The converted code does not have RPN tokens yet. The interpreter will + // generate them. + xGroup->meCalcState = mxGroup->meCalcState = sc::GroupCalcRunning; + sc::FormulaGroupInterpreter *pInterpreter = sc::FormulaGroupInterpreter::getStatic(); + if (pInterpreter == NULL || + !pInterpreter->interpret(*pDocument, xGroup->mpTopCell->aPos, xGroup, aCode)) + { + SAL_INFO("sc.opencl", "interpreting group " << mxGroup << " (state " << (int) mxGroup->meCalcState << ") failed, disabling"); + mxGroup->meCalcState = sc::GroupCalcDisabled; + + // Undo the hack above + if (nNumParts > 1) + { + mxGroup->mpTopCell->aPos = aOrigPos; + xGroup->mpTopCell = NULL; + xGroup->mpCode = NULL; + } + + return false; + } + if (nNumParts > 1) + { + xGroup->mpTopCell = NULL; + xGroup->mpCode = NULL; + } } + if (nNumParts > 1) + mxGroup->mpTopCell->aPos = aOrigPos; mxGroup->meCalcState = sc::GroupCalcEnabled; return true; } |