Unsolved Strange Behavior of QT with cuda values...
-
I am printing cuda variable values but it wills print zeros. While on debug I store the value in float variable x and the debuger stores the original value in x but even if print x it will show zero.
This problem is only in QT because if I run the executable generated by QT in terminal (cuda-memcheck ./CU_KERNEL), I get the original values not zeros.
(base) sher@Sher-Afghan-Malik:~/CU_KERNEL$ cuda-memcheck ./CU_KERNEL ========= CUDA-MEMCHECK There are 1 CUDA capable devices on your machine : device 0 : sms 30 Capabilities 7.5, SmClock 1200.0 Mhz, MemSize (Mb) 5926, MemClock 5480.0 Mhz, Ecc=0, boardGroupID=0 n=0, c=0: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 ========= ERROR SUMMARY: 0 errors #include <string> int main() { createHandles(); showDevices(); cudaDeviceReset(); int input []={1, 1, 5, 5}; std::string p_type = "valid"; p_type = "same"; int insize = 0; insize = input[0] * input[1] * input[2] * input[3]; int filters []={2, 1, 2, 2}; int filtersize = filters[0] * filters[1] * filters[2] *filters[3]; int padding []={1,0,1,0}; int strides []={1,1}; int dilation []={1,1}; int outsize = 72; float *hostI, *hostF, *hostO, *bias; checkCudaErrors(cudaMallocManaged( &hostI, insize * sizeof(float) )); checkCudaErrors(cudaMallocManaged( &hostF, filtersize * sizeof(float) )); checkCudaErrors(cudaMallocManaged( &hostO, outsize * sizeof(float) )); checkCudaErrors(cudaMallocManaged( &bias, int(2) * sizeof(float) )); bias[0]=2;bias[1]=3; h_t_d_i(input, hostI); h_t_d_c(filters, hostF); printc(hostI, input); checkCudaErrors(cudaDeviceSynchronize()); // convNDF (input, filters, padding, dilation, strides, hostI, bias, hostF, &hostO, p_type); // int dim [] = {1, 2, 6, 6}; // int pool_size [] = {2, 2}; // int padding_size [] = {0, 0}; // int strides_size [] = {2, 2}; // insize = 72; // outsize = 18; // cudaMallocManaged( &hostI_m, insize * sizeof(float) ); // cudaMallocManaged( &hostO_m, outsize * sizeof(float) ); // checkCudaErrors(cudaMemcpy(hostI_m, hostO, sizeof(float) * insize, cudaMemcpyHostToDevice)); // checkCudaErrors(cudaDeviceSynchronize()); // poolForward( dim, pool_size, padding_size, strides_size, hostI_m, hostO_m); cudaFree(hostI); cudaFree(hostF); cudaFree(hostO); cudaFree(bias); // cudaFree(hostI_m); // cudaFree(hostO_m); return 0; } inline void printc(float *data, const int *NCHW) { int a = 0; for (int i = 0; i < NCHW[0]; ++i) { for (int j = 0; j < NCHW[1]; ++j) { std::cout << "n=" << i << ", c=" << j << ":" << std::endl; for (int k = 0; k < NCHW[2]; ++k) { for (int l = 0; l < NCHW[3]; ++l) { float x = 444; x = data[a]; std::cout << std::setw(0) << std::right << x<<"\t"; ++a; } std::cout << std::endl; } } } std::cout << std::endl; }