一种基于DeltaE(CIE 1976)的找色算法Cuda实现_即时看

手机

光庭信息跌4.57% 2021上市超募11亿2022扣非降74% 时快讯

搜狐汽车全球快讯 | 大众汽车最新专利曝光：仪表支持拆卸可用手机、平板替代-环球关注

家电

一种基于DeltaE(CIE 1976)的找色算法Cuda实现_即时看

2023-07-05 12:01:46 来源：博客园

(相关资料图)

书接上文一种基于DeltaE(CIE 1976)的找色算法

Delta E 是评估色彩准确度的重要测量指标。摄影师、影片编辑和平面设计师等创意专业人士都应重视这项标准，因其是选择专业级显示器的重要考虑因素。常见的找色算法都是基于颜色RGB上的数值差，这种方法虽然快捷，但是和人眼视觉上的色彩并不相同。这里采用Delta E的评估标准找色更符合人眼的直观感觉。上文使用CPU计算，采用了一些优化方法但是都不尽如人意，这里使用cuda加速提高这个算法的可用度。

//计算颜色之间的Delta E//<= 1.0：人眼无法感知差异//1 - 2：仔细观察可以感知差异//2 - 10：随意一看便可以感知差异//11 - 49：色彩的相似程度大于相反程度//100：色彩完全失真#include "cuda_runtime.h"#include "device_launch_parameters.h"#include #include #include cudaError_t addWithCuda(int *c, const int *a, const int *b, unsigned int size);struct Color_BGR{    int B, G, R;};struct Color_Lab{    float L, a, b;};Color_Lab BGR2Lab(Color_BGR x){#define gamma(x) (((x) > 0.04045) ? std::pow(((x)+0.055f) / 1.055f, 2.4f) : ((x) / 12.92));    const float param_13 = 1.0f / 3.0f;    const float param_16116 = 16.0f / 116.0f;    const float Xn = 0.950456f;    const float Yn = 1.0f;    const float Zn = 1.088754f;    float RR = gamma(x.R / 255.0);    float GG = gamma(x.G / 255.0);    float BB = gamma(x.B / 255.0);    float X, Y, Z, fX, fY, fZ;    X = 0.4124564f * RR + 0.3575761f * GG + 0.1804375f * BB;    Y = 0.2126729f * RR + 0.7151522f * GG + 0.0721750f * BB;    Z = 0.0193339f * RR + 0.1191920f * GG + 0.9503041f * BB;    X /= (Xn);    Y /= (Yn);    Z /= (Zn);    if (Y > 0.008856f)        fY = std::pow(Y, param_13);    else        fY = 7.787f * Y + param_16116;    if (X > 0.008856f)        fX = std::pow(X, param_13);    else        fX = 7.787f * X + param_16116;    if (Z > 0.008856)        fZ = std::pow(Z, param_13);    else        fZ = 7.787f * Z + param_16116;    float L, a, b;    L = 116.0f * fY - 16.0f;    L = L > 0.0f ? L : 0.0f;    a = 500.0f * (fX - fY);    b = 200.0f * (fY - fZ);    return { L,a,b };}cudaError_t FindColorCuda(Color_BGR *src, float *ret,Color_Lab target,unsigned int size);__global__ void FindColorCudaKernel(Color_BGR *src, float* ret, Color_Lab target){    int i = blockIdx.x * 256 + threadIdx.x;#define gamma(x) (((x) > 0.04045) ? pow(((x)+0.055f) / 1.055f, 2.4f) : ((x) / 12.92));    const float param_13 = 1.0f / 3.0f;    const float param_16116 = 16.0f / 116.0f;    const float Xn = 0.950456f;    const float Yn = 1.0f;    const float Zn = 1.088754f;    float RR = gamma(src[i].R / 255.0);    float GG = gamma(src[i].G / 255.0);    float BB = gamma(src[i].B / 255.0);    float X, Y, Z, fX, fY, fZ;    X = 0.4124564f * RR + 0.3575761f * GG + 0.1804375f * BB;    Y = 0.2126729f * RR + 0.7151522f * GG + 0.0721750f * BB;    Z = 0.0193339f * RR + 0.1191920f * GG + 0.9503041f * BB;    X /= (Xn);    Y /= (Yn);    Z /= (Zn);    if (Y > 0.008856f)        fY = pow(Y, param_13);    else        fY = 7.787f * Y + param_16116;    if (X > 0.008856f)        fX = pow(X, param_13);    else        fX = 7.787f * X + param_16116;    if (Z > 0.008856)        fZ = pow(Z, param_13);    else        fZ = 7.787f * Z + param_16116;    float L, a, b;    L = 116.0f * fY - 16.0f;    L = L > 0.0f ? L : 0.0f;    a = 500.0f * (fX - fY);    b = 200.0f * (fY - fZ);    ret[i] = sqrt((L - target.L) * (L - target.L) + (a - target.a) * (a - target.a) + (b - target.b) * (b - target.b));}Color_BGR src_mat[1024 * 1024];float ret_mat[1024 * 1024];int main(){    for (int i = 0; i < 1024 * 1024; i++)    {        src_mat[i] = { std::rand() % 256,std::rand() % 256, std::rand() % 256 };    }    //Pre Run for Best Speed    cudaError_t cudaStatus = FindColorCuda(src_mat, ret_mat, BGR2Lab({ 190,35,41 }), 1024 * 1024);    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "FindColorCuda failed!");        return 1;    }    int st = clock();    // Add vectors in parallel.    cudaStatus = FindColorCuda(src_mat, ret_mat, BGR2Lab({190,35,41}), 1024 * 1024);    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "FindColorCuda failed!");        return 1;    }    printf("Cost: %d\n", clock() - st);    int count = 0;    for (int i = 0; i < 1024*1024 ; i++)    {        if (ret_mat[i] < 2)            count++;    }    printf("%d", count);    // cudaDeviceReset must be called before exiting in order for profiling and    // tracing tools such as Nsight and Visual Profiler to show complete traces.    cudaStatus = cudaDeviceReset();    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaDeviceReset failed!");        return 1;    }    return 0;}//HelpercudaError_t FindColorCuda(Color_BGR* src, float* ret, Color_Lab target, unsigned int size){    Color_BGR* dev_src = nullptr;    float* dev_ret = nullptr;    cudaError cudaStatus;    // Choose which GPU to run on, change this on a multi-GPU system.    cudaStatus = cudaSetDevice(0);    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");        goto Error;    }    // Allocate GPU buffers for three vectors (two input, one output)    .    cudaStatus = cudaMalloc((void**)&dev_src, size * sizeof(Color_BGR));    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaMalloc failed!");        goto Error;    }    cudaStatus = cudaMalloc((void**)&dev_ret, size * sizeof(float));    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaMalloc failed!");        goto Error;    }    // Copy input vectors from host memory to GPU buffers.    cudaStatus = cudaMemcpy(dev_src, src, size * sizeof(Color_BGR), cudaMemcpyHostToDevice);    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaMemcpy failed!");        goto Error;    }    FindColorCudaKernel <<>> (dev_src,dev_ret,target);    // Check for any errors launching the kernel    cudaStatus = cudaGetLastError();    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "FindColorCuda launch failed: %s\n", cudaGetErrorString(cudaStatus));        goto Error;    }    // cudaDeviceSynchronize waits for the kernel to finish, and returns    // any errors encountered during the launch.    cudaStatus = cudaDeviceSynchronize();    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);        goto Error;    }    cudaStatus = cudaMemcpy( ret, dev_ret, size * sizeof(float), cudaMemcpyDeviceToHost);    if (cudaStatus != cudaSuccess) {        fprintf(stderr, "cudaMemcpy failed!");        goto Error;    }Error:    cudaFree(dev_ret);    cudaFree(dev_src);    return cudaStatus;}

在4060 LapTop 上取得 8ms（1024*1024）的成绩

关键词：