2016-05-19 4 views
1

Visual Studio 2015では、「コードビルドプロジェクトfor Windows」(CPU用)を作成します。このプロジェクトには、私がまったく触れていないコードが付属しています。それは基本的にベクトル加算を行います。ただし、ベクトルの追加はTemplate.clファイルで行われます。別個の.clファイルが機能しません。エラーMSB3722

Error MSB3722 The command ""C:\Program Files (x86)\Intel\OpenCL SDK\bin\x86\ioc32.exe" -cmd=build -input="blahblah\user\visual studio 2015\Projects\OpenCLProject3\OpenCLProject3\Template.cl" -output="Debug\Template.out" -VS -device=CPU_2_0 -simd=default -targetos=current -bo=" "" exited with code 5. Please verify that you have sufficient rights to run this command. OpenCLProject3 C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\V140\BuildCustomizations\IntelOpenCL.targets 98

をしかし、私は私のcppファイルにカーネルをコピーし、それを文字列として持っている場合、それは実行されます。私はこのプロジェクトをコンパイルしようとすると、それは私に、次のエラーが発生します。文字列は次のようになります。また

const char* prog1 = "__kernel void Add(__global int* pA, __global int* pB, __global int* pC){const int x = get_global_id(0);const int y = get_global_id(1);const int width = get_global_size(0);const int id = y * width + x;pC[id] = pA[id] + pB[id];}" 

、代わりにソースファイルから読み取るので、私はちょうど、機能CreateAndBuildProgramため& PROG1にアドレスを呼び出しています。私はエラーコードを削除した

--References 
--External 
--Headers 
--OpenCL 
    --Template.cl 
--Source Files 
    --OpenCLProject3.cpp 
    --utils.cpp 

注:ここでは

は、Visual Studioプロジェクトツリーの構造です。ビジュアルスタジオ2015でコードビルドプロジェクトを生成すると、まったく同じコードと構造が得られます。

ここにホストコード(OpenCLProject3.cpp)があります。ここで

#include <stdio.h> 
    #include <stdlib.h> 
    #include <tchar.h> 
    #include <memory.h> 
    #include <vector> 

    #include "CL\cl.h" 
    #include "utils.h" 

    //for perf. counters 
    #include <Windows.h> 


    // Macros for OpenCL versions 
    #define OPENCL_VERSION_1_2 1.2f 
    #define OPENCL_VERSION_2_0 2.0f 

    struct ocl_args_d_t 
    { 
     ocl_args_d_t(); 
     ~ocl_args_d_t(); 

     // Regular OpenCL objects: 
     cl_context  context;   // hold the context handler 
     cl_device_id  device;   // hold the selected device handler 
     cl_command_queue commandQueue;  // hold the commands-queue handler 
     cl_program  program;   // hold the program handler 
     cl_kernel  kernel;   // hold the kernel handler 
     float   platformVersion; // hold the OpenCL platform version (default 1.2) 
     float   deviceVersion;  // hold the OpenCL device version (default. 1.2) 
     float   compilerVersion; // hold the device OpenCL C version (default. 1.2) 

     // Objects that are specific for algorithm implemented in this sample 
     cl_mem   srcA;    // hold first source buffer 
     cl_mem   srcB;    // hold second source buffer 
     cl_mem   dstMem;   // hold destination buffer 
    }; 

    ocl_args_d_t::ocl_args_d_t(): 
      context(NULL), 
      device(NULL), 
      commandQueue(NULL), 
      program(NULL), 
      kernel(NULL), 
      platformVersion(OPENCL_VERSION_1_2), 
      deviceVersion(OPENCL_VERSION_1_2), 
      compilerVersion(OPENCL_VERSION_1_2), 
      srcA(NULL), 
      srcB(NULL), 
      dstMem(NULL) 
    { 
    } 

    ocl_args_d_t::~ocl_args_d_t() 
    { 
     cl_int err = CL_SUCCESS; 

     if (kernel) 
     { 
      err = clReleaseKernel(kernel); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseKernel returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (program) 
     { 
      err = clReleaseProgram(program); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseProgram returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (srcA) 
     { 
      err = clReleaseMemObject(srcA); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseMemObject returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (srcB) 
     { 
      err = clReleaseMemObject(srcB); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseMemObject returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (dstMem) 
     { 
      err = clReleaseMemObject(dstMem); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseMemObject returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (commandQueue) 
     { 
      err = clReleaseCommandQueue(commandQueue); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseCommandQueue returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (device) 
     { 
      err = clReleaseDevice(device); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseDevice returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 
     if (context) 
     { 
      err = clReleaseContext(context); 
      if (CL_SUCCESS != err) 
      { 
       LogError("Error: clReleaseContext returned '%s'.\n", TranslateOpenCLError(err)); 
      } 
     } 


    } 


    bool CheckPreferredPlatformMatch(cl_platform_id platform, const char* preferredPlatform) 
    { 
     size_t stringLength = 0; 
     cl_int err = CL_SUCCESS; 
     bool match = false; 

     // In order to read the platform's name, we first read the platform's name string length (param_value is NULL). 
     // The value returned in stringLength 
     err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &stringLength); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetPlatformInfo() to get CL_PLATFORM_NAME length returned '%s'.\n", TranslateOpenCLError(err)); 
      return false; 
     } 

     // Now, that we know the platform's name string length, we can allocate enough space before read it 
     std::vector<char> platformName(stringLength); 

     // Read the platform's name string 
     // The read value returned in platformName 
     err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, stringLength, &platformName[0], NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetplatform_ids() to get CL_PLATFORM_NAME returned %s.\n", TranslateOpenCLError(err)); 
      return false; 
     } 

     // Now check if the platform's name is the required one 
     if (strstr(&platformName[0], preferredPlatform) != 0) 
     { 
      // The checked platform is the one we're looking for 
      match = true; 
     } 

     return match; 
    } 

    cl_platform_id FindOpenCLPlatform(const char* preferredPlatform, cl_device_type deviceType) 
    { 
     cl_uint numPlatforms = 0; 
     cl_int err = CL_SUCCESS; 

     // Get (in numPlatforms) the number of OpenCL platforms available 
     // No platform ID will be return, since platforms is NULL 
     err = clGetPlatformIDs(0, NULL, &numPlatforms); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetplatform_ids() to get num platforms returned %s.\n", TranslateOpenCLError(err)); 
      return NULL; 
     } 
     LogInfo("Number of available platforms: %u\n", numPlatforms); 

     if (0 == numPlatforms) 
     { 
      LogError("Error: No platforms found!\n"); 
      return NULL; 
     } 

     std::vector<cl_platform_id> platforms(numPlatforms); 

     // Now, obtains a list of numPlatforms OpenCL platforms available 
     // The list of platforms available will be returned in platforms 
     err = clGetPlatformIDs(numPlatforms, &platforms[0], NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetplatform_ids() to get platforms returned %s.\n", TranslateOpenCLError(err)); 
      return NULL; 
     } 

     // Check if one of the available platform matches the preferred requirements 
     for (cl_uint i = 0; i < numPlatforms; i++) 
     { 
      bool match = true; 
      cl_uint numDevices = 0; 

      // If the preferredPlatform is not NULL then check if platforms[i] is the required one 
      // Otherwise, continue the check with platforms[i] 
      if ((NULL != preferredPlatform) && (strlen(preferredPlatform) > 0)) 
      { 
       // In case we're looking for a specific platform 
       match = CheckPreferredPlatformMatch(platforms[i], preferredPlatform); 
      } 

      // match is true if the platform's name is the required one or don't care (NULL) 
      if (match) 
      { 
       // Obtains the number of deviceType devices available on platform 
       // When the function failed we expect numDevices to be zero. 
       // We ignore the function return value since a non-zero error code 
       // could happen if this platform doesn't support the specified device type. 
       err = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &numDevices); 
       if (CL_SUCCESS != err) 
       { 
        LogError("clGetDeviceIDs() returned %s.\n", TranslateOpenCLError(err)); 
       } 

       if (0 != numDevices) 
       { 
        // There is at list one device that answer the requirements 
        return platforms[i]; 
       } 
      } 
     } 

     return NULL; 
    } 


    /* 
    * This function read the OpenCL platdorm and device versions 
    * (using clGetxxxInfo API) and stores it in the ocl structure. 
    * Later it will enable us to support both OpenCL 1.2 and 2.0 platforms and devices 
    * in the same program. 
    */ 
    int GetPlatformAndDeviceVersion (cl_platform_id platformId, ocl_args_d_t *ocl) 
    { 
     cl_int err = CL_SUCCESS; 

     // Read the platform's version string length (param_value is NULL). 
     // The value returned in stringLength 
     size_t stringLength = 0; 
     err = clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, 0, NULL, &stringLength); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetPlatformInfo() to get CL_PLATFORM_VERSION length returned '%s'.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // Now, that we know the platform's version string length, we can allocate enough space before read it 
     std::vector<char> platformVersion(stringLength); 

     // Read the platform's version string 
     // The read value returned in platformVersion 
     err = clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, stringLength, &platformVersion[0], NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetplatform_ids() to get CL_PLATFORM_VERSION returned %s.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     if (strstr(&platformVersion[0], "OpenCL 2.0") != NULL) 
     { 
      ocl->platformVersion = OPENCL_VERSION_2_0; 
     } 

     // Read the device's version string length (param_value is NULL). 
     err = clGetDeviceInfo(ocl->device, CL_DEVICE_VERSION, 0, NULL, &stringLength); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetDeviceInfo() to get CL_DEVICE_VERSION length returned '%s'.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // Now, that we know the device's version string length, we can allocate enough space before read it 
     std::vector<char> deviceVersion(stringLength); 

     // Read the device's version string 
     // The read value returned in deviceVersion 
     err = clGetDeviceInfo(ocl->device, CL_DEVICE_VERSION, stringLength, &deviceVersion[0], NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetDeviceInfo() to get CL_DEVICE_VERSION returned %s.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     if (strstr(&deviceVersion[0], "OpenCL 2.0") != NULL) 
     { 
      ocl->deviceVersion = OPENCL_VERSION_2_0; 
     } 

     // Read the device's OpenCL C version string length (param_value is NULL). 
     err = clGetDeviceInfo(ocl->device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &stringLength); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetDeviceInfo() to get CL_DEVICE_OPENCL_C_VERSION length returned '%s'.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // Now, that we know the device's OpenCL C version string length, we can allocate enough space before read it 
     std::vector<char> compilerVersion(stringLength); 

     // Read the device's OpenCL C version string 
     // The read value returned in compilerVersion 
     err = clGetDeviceInfo(ocl->device, CL_DEVICE_OPENCL_C_VERSION, stringLength, &compilerVersion[0], NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetDeviceInfo() to get CL_DEVICE_OPENCL_C_VERSION returned %s.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     else if (strstr(&compilerVersion[0], "OpenCL C 2.0") != NULL) 
     { 
      ocl->compilerVersion = OPENCL_VERSION_2_0; 
     } 

     return err; 
    } 


    /* 
    * Generate random value for input buffers 
    */ 
    void generateInput(cl_int* inputArray, cl_uint arrayWidth, cl_uint arrayHeight) 
    { 
     srand(12345); 

     // random initialization of input 
     cl_uint array_size = arrayWidth * arrayHeight; 
     for (cl_uint i = 0; i < array_size; ++i) 
     { 
      inputArray[i] = rand(); 
     } 
    } 

    int SetupOpenCL(ocl_args_d_t *ocl, cl_device_type deviceType) 
    { 
     // The following variable stores return codes for all OpenCL calls. 
     cl_int err = CL_SUCCESS; 

     // Query for all available OpenCL platforms on the system 
     // Here you enumerate all platforms and pick one which name has preferredPlatform as a sub-string 
     cl_platform_id platformId = FindOpenCLPlatform("Intel", deviceType); 
     if (NULL == platformId) 
     { 
      LogError("Error: Failed to find OpenCL platform.\n"); 
      return CL_INVALID_VALUE; 
     } 

     // Create context with device of specified type. 
     // Required device type is passed as function argument deviceType. 
     // So you may use this function to create context for any CPU or GPU OpenCL device. 
     // The creation is synchronized (pfn_notify is NULL) and NULL user_data 
     cl_context_properties contextProperties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platformId, 0}; 
     ocl->context = clCreateContextFromType(contextProperties, deviceType, NULL, NULL, &err); 
     if ((CL_SUCCESS != err) || (NULL == ocl->context)) 
     { 
      LogError("Couldn't create a context, clCreateContextFromType() returned '%s'.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // Query for OpenCL device which was used for context creation 
     err = clGetContextInfo(ocl->context, CL_CONTEXT_DEVICES, sizeof(cl_device_id), &ocl->device, NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clGetContextInfo() to get list of devices returned %s.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // Read the OpenCL platform's version and the device OpenCL and OpenCL C versions 
     GetPlatformAndDeviceVersion(platformId, ocl); 

     // Create command queue. 
     // OpenCL kernels are enqueued for execution to a particular device through special objects called command queues. 
     // Command queue guarantees some ordering between calls and other OpenCL commands. 
     // Here you create a simple in-order OpenCL command queue that doesn't allow execution of two kernels in parallel on a target device. 
    #ifdef CL_VERSION_2_0 
     if (OPENCL_VERSION_2_0 == ocl->deviceVersion) 
     { 
      const cl_command_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; 
      ocl->commandQueue = clCreateCommandQueueWithProperties(ocl->context, ocl->device, properties, &err); 
     } 
     else { 
      // default behavior: OpenCL 1.2 
      cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; 
      ocl->commandQueue = clCreateCommandQueue(ocl->context, ocl->device, properties, &err); 
     } 
    #else 
     // default behavior: OpenCL 1.2 
     cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; 
     ocl->commandQueue = clCreateCommandQueue(ocl->context, ocl->device, properties, &err); 
    #endif 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clCreateCommandQueue() returned %s.\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     return CL_SUCCESS; 
    } 


    /* 
    * Create and build OpenCL program from its source code 
    */ 
    int CreateAndBuildProgram(ocl_args_d_t *ocl) 
    { 
     cl_int err = CL_SUCCESS; 

     // Upload the OpenCL C source code from the input file to source 
     // The size of the C program is returned in sourceSize 
     char* source = NULL; 
     size_t src_size = 0; 
     err = ReadSourceFromFile("Template.cl", &source, &src_size); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: ReadSourceFromFile returned %s.\n", TranslateOpenCLError(err)); 
      goto Finish; 
     } 

     // And now after you obtained a regular C string call clCreateProgramWithSource to create OpenCL program object. 
     ocl->program = clCreateProgramWithSource(ocl->context, 1, (const char**)&source, &src_size, &err); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clCreateProgramWithSource returned %s.\n", TranslateOpenCLError(err)); 
      goto Finish; 
     } 

     // Build the program 
     // During creation a program is not built. You need to explicitly call build function. 
     // Here you just use create-build sequence, 
     // but there are also other possibilities when program consist of several parts, 
     // some of which are libraries, and you may want to consider using clCompileProgram and clLinkProgram as 
     // alternatives. 
     err = clBuildProgram(ocl->program, 1, &ocl->device, "", NULL, NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clBuildProgram() for source program returned %s.\n", TranslateOpenCLError(err)); 

      // In case of error print the build log to the standard output 
      // First check the size of the log 
      // Then allocate the memory and obtain the log from the program 
      if (err == CL_BUILD_PROGRAM_FAILURE) 
      { 
       size_t log_size = 0; 
       clGetProgramBuildInfo(ocl->program, ocl->device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); 

       std::vector<char> build_log(log_size); 
       clGetProgramBuildInfo(ocl->program, ocl->device, CL_PROGRAM_BUILD_LOG, log_size, &build_log[0], NULL); 

       LogError("Error happened during the build of OpenCL program.\nBuild log:%s", &build_log[0]); 
      } 
     } 

    Finish: 
     if (source) 
     { 
      delete[] source; 
      source = NULL; 
     } 

     return err; 
    } 


    int CreateBufferArguments(ocl_args_d_t *ocl, cl_int* inputA, cl_int* inputB, cl_int* outputC, cl_uint arrayWidth, cl_uint arrayHeight) 
    { 
     cl_int err = CL_SUCCESS; 

     // Create new OpenCL buffer objects 
     // As these buffer are used only for read by the kernel, you are recommended to create it with flag CL_MEM_READ_ONLY. 
     // Always set minimal read/write flags for buffers, it may lead to better performance because it allows runtime 
     // to better organize data copying. 
     // You use CL_MEM_COPY_HOST_PTR here, because the buffers should be populated with bytes at inputA and inputB. 

     ocl->srcA = clCreateBuffer(ocl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_uint) * arrayWidth * arrayHeight, inputA, &err); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clCreateBuffer for srcA returned %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     ocl->srcB = clCreateBuffer(ocl->context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_uint) * arrayWidth * arrayHeight, inputB, &err); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clCreateBuffer for srcB returned %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // If the output buffer is created directly on top of output buffer using CL_MEM_USE_HOST_PTR, 
     // then, depending on the OpenCL runtime implementation and hardware capabilities, 
     // it may save you not necessary data copying. 
     // As it is known that output buffer will be write only, you explicitly declare it using CL_MEM_WRITE_ONLY. 
     ocl->dstMem = clCreateBuffer(ocl->context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_uint) * arrayWidth * arrayHeight, outputC, &err); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clCreateBuffer for dstMem returned %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 


     return CL_SUCCESS; 
    } 


    cl_uint SetKernelArguments(ocl_args_d_t *ocl) 
    { 
     cl_int err = CL_SUCCESS; 

     err = clSetKernelArg(ocl->kernel, 0, sizeof(cl_mem), (void *)&ocl->srcA); 
     if (CL_SUCCESS != err) 
     { 
      LogError("error: Failed to set argument srcA, returned %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     err = clSetKernelArg(ocl->kernel, 1, sizeof(cl_mem), (void *)&ocl->srcB); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: Failed to set argument srcB, returned %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     err = clSetKernelArg(ocl->kernel, 2, sizeof(cl_mem), (void *)&ocl->dstMem); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: Failed to set argument dstMem, returned %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     return err; 
    } 


    /* 
    * Execute the kernel 
    */ 
    cl_uint ExecuteAddKernel(ocl_args_d_t *ocl, cl_uint width, cl_uint height) 
    { 
     cl_int err = CL_SUCCESS; 

     // Define global iteration space for clEnqueueNDRangeKernel. 
     size_t globalWorkSize[2] = {width, height}; 


     // execute kernel 
     err = clEnqueueNDRangeKernel(ocl->commandQueue, ocl->kernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: Failed to run kernel, return %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     // Wait until the queued kernel is completed by the device 
     err = clFinish(ocl->commandQueue); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clFinish return %s\n", TranslateOpenCLError(err)); 
      return err; 
     } 

     return CL_SUCCESS; 
    } 


    /* 
    * "Read" the result buffer (mapping the buffer to the host memory address) 
    */ 
    bool ReadAndVerify(ocl_args_d_t *ocl, cl_uint width, cl_uint height, cl_int *inputA, cl_int *inputB) 
    { 
     cl_int err = CL_SUCCESS; 
     bool result = true; 

     // Enqueue a command to map the buffer object (ocl->dstMem) into the host address space and returns a pointer to it 
     // The map operation is blocking 
     cl_int *resultPtr = (cl_int *)clEnqueueMapBuffer(ocl->commandQueue, ocl->dstMem, true, CL_MAP_READ, 0, sizeof(cl_uint) * width * height, 0, NULL, NULL, &err); 

     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clEnqueueMapBuffer returned %s\n", TranslateOpenCLError(err)); 
      return false; 
     } 

     // Call clFinish to guarantee that output region is updated 
     err = clFinish(ocl->commandQueue); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clFinish returned %s\n", TranslateOpenCLError(err)); 
     } 

     // We mapped dstMem to resultPtr, so resultPtr is ready and includes the kernel output !!! 
     // Verify the results 
     unsigned int size = width * height; 
     for (unsigned int k = 0; k < size; ++k) 
     { 
      if (resultPtr[k] != inputA[k] + inputB[k]) 
      { 
       LogError("Verification failed at %d: (%d + %d = %d)\n", k, inputA[k], inputB[k], resultPtr[k]); 
       result = false; 
      } 
     } 

     // Unmapped the output buffer before releasing it 
     err = clEnqueueUnmapMemObject(ocl->commandQueue, ocl->dstMem, resultPtr, 0, NULL, NULL); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clEnqueueUnmapMemObject returned %s\n", TranslateOpenCLError(err)); 
     } 

     return result; 
    } 


    /* 
    * main execution routine 
    * Basically it consists of three parts: 
    * - generating the inputs 
    * - running OpenCL kernel 
    * - reading results of processing 
    */ 
    int _tmain(int argc, TCHAR* argv[]) 
    { 
     cl_int err; 
     ocl_args_d_t ocl; 
     cl_device_type deviceType = CL_DEVICE_TYPE_CPU; 

     LARGE_INTEGER perfFrequency; 
     LARGE_INTEGER performanceCountNDRangeStart; 
     LARGE_INTEGER performanceCountNDRangeStop; 

     cl_uint arrayWidth = 1024; 
     cl_uint arrayHeight = 1024; 

     //initialize Open CL objects (context, queue, etc.) 
     if (CL_SUCCESS != SetupOpenCL(&ocl, deviceType)) 
     { 
      return -1; 
     } 

     // allocate working buffers. 
     // the buffer should be aligned with 4K page and size should fit 64-byte cached line 
     cl_uint optimizedSize = ((sizeof(cl_int) * arrayWidth * arrayHeight - 1)/64 + 1) * 64; 
     cl_int* inputA = (cl_int*)_aligned_malloc(optimizedSize, 4096); 
     cl_int* inputB = (cl_int*)_aligned_malloc(optimizedSize, 4096); 
     cl_int* outputC = (cl_int*)_aligned_malloc(optimizedSize, 4096); 
     if (NULL == inputA || NULL == inputB || NULL == outputC) 
     { 
      LogError("Error: _aligned_malloc failed to allocate buffers.\n"); 
      return -1; 
     } 

     //random input 
     generateInput(inputA, arrayWidth, arrayHeight); 
     generateInput(inputB, arrayWidth, arrayHeight); 

     // Create OpenCL buffers from host memory 
     // These buffers will be used later by the OpenCL kernel 
     if (CL_SUCCESS != CreateBufferArguments(&ocl, inputA, inputB, outputC, arrayWidth, arrayHeight)) 
     { 
      return -1; 
     } 

     // Create and build the OpenCL program 
     if (CL_SUCCESS != CreateAndBuildProgram(&ocl)) 
     { 
      return -1; 
     } 

     // Program consists of kernels. 
     // Each kernel can be called (enqueued) from the host part of OpenCL application. 
     // To call the kernel, you need to create it from existing program. 
     ocl.kernel = clCreateKernel(ocl.program, "Add", &err); 
     if (CL_SUCCESS != err) 
     { 
      LogError("Error: clCreateKernel returned %s\n", TranslateOpenCLError(err)); 
      return -1; 
     } 

     // Passing arguments into OpenCL kernel. 
     if (CL_SUCCESS != SetKernelArguments(&ocl)) 
     { 
      return -1; 
     } 

     // Regularly you wish to use OpenCL in your application to achieve greater performance results 
     // that are hard to achieve in other ways. 
     // To understand those performance benefits you may want to measure time your application spent in OpenCL kernel execution. 
     // The recommended way to obtain this time is to measure interval between two moments: 
     // - just before clEnqueueNDRangeKernel is called, and 
     // - just after clFinish is called 
     // clFinish is necessary to measure entire time spending in the kernel, measuring just clEnqueueNDRangeKernel is not enough, 
     // because this call doesn't guarantees that kernel is finished. 
     // clEnqueueNDRangeKernel is just enqueue new command in OpenCL command queue and doesn't wait until it ends. 
     // clFinish waits until all commands in command queue are finished, that suits your need to measure time. 
     bool queueProfilingEnable = true; 
     if (queueProfilingEnable) 
      QueryPerformanceCounter(&performanceCountNDRangeStart); 
     // Execute (enqueue) the kernel 
     if (CL_SUCCESS != ExecuteAddKernel(&ocl, arrayWidth, arrayHeight)) 
     { 
      return -1; 
     } 
     if (queueProfilingEnable) 
      QueryPerformanceCounter(&performanceCountNDRangeStop); 

     // The last part of this function: getting processed results back. 
     // use map-unmap sequence to update original memory area with output buffer. 
     ReadAndVerify(&ocl, arrayWidth, arrayHeight, inputA, inputB); 

     // retrieve performance counter frequency 
     if (queueProfilingEnable) 
     { 
      QueryPerformanceFrequency(&perfFrequency); 
      LogInfo("NDRange performance counter time %f ms.\n", 
       1000.0f*(float)(performanceCountNDRangeStop.QuadPart - performanceCountNDRangeStart.QuadPart)/(float)perfFrequency.QuadPart); 
     } 

     _aligned_free(inputA); 
     _aligned_free(inputB); 
     _aligned_free(outputC); 

     return 0; 
    } 

カーネルコード(Template.cl)です:

__kernel void Add(__global int* pA, __global int* pB, __global int* pC) 
{ 
    const int x  = get_global_id(0); 
    const int y  = get_global_id(1); 
    const int width = get_global_size(0); 

    const int id = y * width + x; 

    pC[id] = pA[id] + pB[id]; 
} 
+0

読みやすくするために書式設定されたコードを投稿することはできますか?エラーがどこから来ているのかは本当に明らかではありません。 – Farside

+0

私はそれが私に与えるすべてを追加します。さて、少なくともcppとclファイル –

答えて

1

がここにこれの重複されることがあります。

OpenCL code 'Error MSB3721' for Intel OpenCL SDK on Visual Studio 2010

解決策「を削除することでした。 cl 'ファイルをプロジェクトから削除します。

+0

ええ!それは動作しますが、それは奇妙な解決策です...おそらくビジュアルスタジオバグ? –

+1

実際にはバグではありません。プロジェクト内のすべてのファイルについて、VSはプロジェクトの構築時に何をすべきかを知る必要があります。 .clファイルの場合、VSはその処理方法を知らないため、エラーが発生します。 VSは何もする必要がないため、解決策はプロジェクトからファイルを削除することです。ビルド中に何らかのプロジェクト設定でVSがこのファイルを無視するように指示することも可能ですが、試していませんでした。 –

関連する問題