//--------------------------------------------------------------------------------- // // Little Color Management System, fast floating point extensions // Copyright (c) 1998-2020 Marti Maria Saguer, all rights reserved // // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // //--------------------------------------------------------------------------------- #include "fast_float_internal.h" // Optimization for floating point tetrahedral interpolation typedef struct { const cmsInterpParams* p; // Tetrahedrical interpolation parameters. This is a not-owned pointer. } FloatCMYKData; // Precomputes tables on input devicelink. static FloatCMYKData* FloatCMYKAlloc(cmsContext ContextID, const cmsInterpParams* p) { FloatCMYKData* fd; fd = (FloatCMYKData*) _cmsMallocZero(ContextID, sizeof(FloatCMYKData)); if (fd == NULL) return NULL; fd ->p = p; return fd; } static int XFormSampler(cmsContext ContextID, CMSREGISTER const cmsFloat32Number In[], CMSREGISTER cmsFloat32Number Out[], CMSREGISTER void* Cargo) { // Evaluate in 16 bits cmsPipelineEvalFloat(ContextID, In, Out, (cmsPipeline*) Cargo); // Always succeed return TRUE; } cmsINLINE cmsFloat32Number LinearInterpInt(cmsFloat32Number a, cmsFloat32Number l, cmsFloat32Number h) { return (h - l) * a + l; } // To prevent out of bounds indexing cmsINLINE cmsFloat32Number fclamp100(cmsFloat32Number v) { return v < 0.0f ? 0.0f : (v > 100.0f ? 100.0f : v); } // A optimized interpolation for 8-bit input. #define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan]) static void FloatCMYKCLUTEval(cmsContext ContextID, struct _cmstransform_struct *CMMcargo, const void* Input, void* Output, cmsUInt32Number PixelsPerLine, cmsUInt32Number LineCount, const cmsStride* Stride) { cmsFloat32Number c, m, y, k; cmsFloat32Number px, py, pz, pk; int x0, y0, z0, k0; int X0, Y0, Z0, K0, X1, Y1, Z1, K1; cmsFloat32Number rx, ry, rz, rk; cmsFloat32Number c0, c1 = 0, c2 = 0, c3 = 0; cmsUInt32Number OutChan; FloatCMYKData* pcmyk = (FloatCMYKData*) _cmsGetTransformUserData(CMMcargo); const cmsInterpParams* p = pcmyk ->p; cmsUInt32Number TotalOut = p -> nOutputs; cmsUInt32Number TotalPlusAlpha; const cmsFloat32Number* LutTable = (const cmsFloat32Number*)p->Table; cmsUInt32Number i, ii; const cmsUInt8Number* cin; const cmsUInt8Number* min; const cmsUInt8Number* yin; const cmsUInt8Number* kin; const cmsUInt8Number* ain = NULL; cmsFloat32Number Tmp1[cmsMAXCHANNELS], Tmp2[cmsMAXCHANNELS]; cmsUInt8Number* out[cmsMAXCHANNELS]; cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number SourceIncrements[cmsMAXCHANNELS]; cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number DestIncrements[cmsMAXCHANNELS]; cmsUInt32Number InputFormat = cmsGetTransformInputFormat(ContextID, (cmsHTRANSFORM) CMMcargo); cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat(ContextID, (cmsHTRANSFORM) CMMcargo); cmsUInt32Number nchans, nalpha; cmsUInt32Number strideIn, strideOut; _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements); if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) nalpha = 0; strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { cin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; min = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn; yin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn; kin = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn; if (nalpha) ain = (const cmsUInt8Number*)Input + SourceStartingOrder[4] + strideIn; TotalPlusAlpha = TotalOut; if (ain) TotalPlusAlpha++; for (ii = 0; ii < TotalPlusAlpha; ii++) out[ii] = (cmsUInt8Number*)Output + DestStartingOrder[ii] + strideOut; for (ii = 0; ii < PixelsPerLine; ii++) { c = fclamp100(*(cmsFloat32Number*)cin) / 100.0f; m = fclamp100(*(cmsFloat32Number*)min) / 100.0f; y = fclamp100(*(cmsFloat32Number*)yin) / 100.0f; k = fclamp100(*(cmsFloat32Number*)kin) / 100.0f; cin += SourceIncrements[0]; min += SourceIncrements[1]; yin += SourceIncrements[2]; kin += SourceIncrements[3]; pk = c * p->Domain[0]; // C px = m * p->Domain[1]; // M py = y * p->Domain[2]; // Y pz = k * p->Domain[3]; // K k0 = (int)_cmsQuickFloor(pk); rk = (pk - (cmsFloat32Number)k0); x0 = (int)_cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0); y0 = (int)_cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0); z0 = (int)_cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0); K0 = p->opta[3] * k0; K1 = K0 + (c >= 1.0 ? 0 : p->opta[3]); X0 = p->opta[2] * x0; X1 = X0 + (m >= 1.0 ? 0 : p->opta[2]); Y0 = p->opta[1] * y0; Y1 = Y0 + (y >= 1.0 ? 0 : p->opta[1]); Z0 = p->opta[0] * z0; Z1 = Z0 + (k >= 1.0 ? 0 : p->opta[0]); for (OutChan = 0; OutChan < TotalOut; OutChan++) { c0 = DENS(X0, Y0, Z0); if (rx >= ry && ry >= rz) { c1 = DENS(X1, Y0, Z0) - c0; c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0); c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); } else if (rx >= rz && rz >= ry) { c1 = DENS(X1, Y0, Z0) - c0; c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0); } else if (rz >= rx && rx >= ry) { c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1); c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); c3 = DENS(X0, Y0, Z1) - c0; } else if (ry >= rx && rx >= rz) { c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0); c2 = DENS(X0, Y1, Z0) - c0; c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); } else if (ry >= rz && rz >= rx) { c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); c2 = DENS(X0, Y1, Z0) - c0; c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0); } else if (rz >= ry && ry >= rx) { c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1); c3 = DENS(X0, Y0, Z1) - c0; } else { c1 = c2 = c3 = 0; } Tmp1[OutChan] = c0 + c1 * rx + c2 * ry + c3 * rz; } LutTable = (cmsFloat32Number*)p->Table; LutTable += K1; for (OutChan = 0; OutChan < p->nOutputs; OutChan++) { c0 = DENS(X0, Y0, Z0); if (rx >= ry && ry >= rz) { c1 = DENS(X1, Y0, Z0) - c0; c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0); c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); } else if (rx >= rz && rz >= ry) { c1 = DENS(X1, Y0, Z0) - c0; c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0); } else if (rz >= rx && rx >= ry) { c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1); c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); c3 = DENS(X0, Y0, Z1) - c0; } else if (ry >= rx && rx >= rz) { c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0); c2 = DENS(X0, Y1, Z0) - c0; c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); } else if (ry >= rz && rz >= rx) { c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); c2 = DENS(X0, Y1, Z0) - c0; c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0); } else if (rz >= ry && ry >= rx) { c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1); c3 = DENS(X0, Y0, Z1) - c0; } else { c1 = c2 = c3 = 0; } Tmp2[OutChan] = c0 + c1 * rx + c2 * ry + c3 * rz; } for (OutChan = 0; OutChan < p->nOutputs; OutChan++) { *(cmsFloat32Number*)(out[OutChan]) = LinearInterpInt(rk, Tmp1[OutChan], Tmp2[OutChan]); out[OutChan] += DestIncrements[OutChan]; } if (ain) *out[TotalOut] = *ain; } strideIn += Stride->BytesPerLineIn; strideOut += Stride->BytesPerLineOut; } } #undef DENS // -------------------------------------------------------------------------------------------------------------- cmsBool OptimizeCLUTCMYKTransform(cmsContext ContextID, _cmsTransformFn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeDataFn, cmsPipeline** Lut, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags) { cmsPipeline* OriginalLut; int nGridPoints; cmsPipeline* OptimizedLUT = NULL; cmsStage* OptimizedCLUTmpe; cmsStage* mpe; FloatCMYKData* pcmyk; _cmsStageCLutData* data; // For empty transforms, do nothing if (*Lut == NULL) return FALSE; // This is a loosy optimization! does not apply in floating-point cases if (!T_FLOAT(*InputFormat) || !T_FLOAT(*OutputFormat)) return FALSE; // Only on 8-bit if (T_BYTES(*InputFormat) != 4 || T_BYTES(*OutputFormat) != 4) return FALSE; // Only on CMYK if (T_COLORSPACE(*InputFormat) != PT_CMYK) return FALSE; OriginalLut = *Lut; // Named color pipelines cannot be optimized either for (mpe = cmsPipelineGetPtrToFirstStage(ContextID, OriginalLut); mpe != NULL; mpe = cmsStageNext(ContextID, mpe)) { if (cmsStageType(ContextID, mpe) == cmsSigNamedColorElemType) return FALSE; } nGridPoints = _cmsReasonableGridpointsByColorspace(cmsSigRgbData, *dwFlags); // Create the result LUT OptimizedLUT = cmsPipelineAlloc(ContextID, 4, cmsPipelineOutputChannels(ContextID, OriginalLut)); if (OptimizedLUT == NULL) goto Error; // Allocate the CLUT for result OptimizedCLUTmpe = cmsStageAllocCLutFloat(ContextID, nGridPoints, 4, cmsPipelineOutputChannels(ContextID, OriginalLut), NULL); // Add the CLUT to the destination LUT cmsPipelineInsertStage(ContextID, OptimizedLUT, cmsAT_BEGIN, OptimizedCLUTmpe); // Resample the LUT if (!cmsStageSampleCLutFloat(ContextID, OptimizedCLUTmpe, XFormSampler, (void*)OriginalLut, 0)) goto Error; // Set the evaluator, copy parameters data = (_cmsStageCLutData*) cmsStageData(ContextID, OptimizedCLUTmpe); pcmyk = FloatCMYKAlloc(ContextID, data ->Params); if (pcmyk == NULL) return FALSE; // And return the obtained LUT cmsPipelineFree(ContextID, OriginalLut); *Lut = OptimizedLUT; *TransformFn = (_cmsTransformFn)FloatCMYKCLUTEval; *UserData = pcmyk; *FreeDataFn = _cmsFree; *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER; return TRUE; Error: if (OptimizedLUT != NULL) cmsPipelineFree(ContextID, OptimizedLUT); return FALSE; }