Build Error cuda on QT with Jetson TX2
-
Hello.
I am building a cuda sample in Qt Creator.
However, an error of "undefined reference to'addVectorsGPU'" was detected in "main.cpp".
i do not know the reason. Please Help!!/--------------- main.cpp ---------------------------/
#include <iostream>
#include <chrono>extern "C"
double* addVectorsGPU(double* a, double* b, int n);double* addVectorsCPU(double *a, double b, int n) {
auto r = (double) malloc(n * sizeof(double));
for(int i = 0; i < n; ++ i)
r[i] = a[i] + b[i];
return r;
}double* genRanVec(int n) {
double* v = (double*) malloc(n * sizeof(double));
for(int i = 0; i < n; ++ i)
v[i] = (double) rand() / RAND_MAX;
return v;
}int main() {
int n = 1<<20;
std::cout << "double vector addition, n = " << n << "\n\n";
double* a = genRanVec(n);
double* b = genRanVec(n);auto t = std::chrono::high_resolution_clock::now(); auto r1 = addVectorsGPU(a, b, n); auto gpuTimeMs = (double) std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - t).count() / 1000000; t = std::chrono::high_resolution_clock::now(); auto r2 = addVectorsCPU(a, b, n); auto cpuTimeMs = (double) std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - t).count() / 1000000; std::cout << "\nTOTAL----------------\n"; std::cout << "GPU: " << gpuTimeMs << " milliseconds\n"; std::cout << "CPU: " << cpuTimeMs << " milliseconds\n"; delete[] r1; delete[] r2; return 0;
}
/--------------- vectorAdd.cu ---------------------------/
#include <iostream>
#include <chrono>
#include "cuda_runtime.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"// helper functions for cleaner time measuring code
std::chrono::time_pointstd::chrono::high_resolution_clock now() {
return std::chrono::high_resolution_clock::now();
}template <typename T>
double milliseconds(T t) {
return (double) std::chrono::duration_caststd::chrono::nanoseconds(t).count() / 1000000;
}// gpu kernel function
global void addKernel(double* x, double* y, int n)
{
int index = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;for(int i = index; i < n; i += stride) y[i] = x[i] + y[i];
}
extern "C"
double* addVectorsGPU(double* a, double* b, int n) {
auto t1 = now();double* x; double* y; double* z; cudaMalloc(&x, n * sizeof(double)); cudaMalloc(&y, n * sizeof(double)); cudaMemcpy(x, a, n * sizeof(double), cudaMemcpyHostToDevice); cudaMemcpy(y, b, n * sizeof(double), cudaMemcpyHostToDevice); auto t2 = now(); cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, 0); int blockSize = deviceProp.maxThreadsPerBlock; int numBlocks = (n - 1) / blockSize + 1; addKernel<<<numBlocks, blockSize>>>(x, y, n); cudaDeviceSynchronize(); auto t3 = now(); z = (double*) malloc(n * sizeof(double)); cudaMemcpy(z, y, n * sizeof(double), cudaMemcpyDeviceToHost); cudaFree(x); cudaFree(y); auto t4 = now(); std::cout << "GPU time breakdown--------\n"; std::cout << "loading into device memory: " << milliseconds(t2 - t1) << " milliseconds\n"; std::cout << "actual addition: " << milliseconds(t3 - t2) << " milliseconds\n"; std::cout << "loading into host memory: " << milliseconds(t4 - t3) << " milliseconds\n"; return z;
}
/--------------- CudaConsole.pro ---------------------------/
QT += core
QT -= gui
...
OBJECT_DIR = $$DESTDIR/cuda/CUDA_SOURCES += $$OTHER_FILES
OTHER_FILES += vectorAdd.cuCUDA_DIR = /usr/local/cuda-9.0
INCLUDEPATH += $$CUDA_DIR/include
QMAKE_LIBDIR += $$CUDA_DIR/lib64
LIBS += -lcudart -lcudaCUDA_ARCH = compute_62
CUDA_GPU_ARCH = sm_62
NVCCFLAGS = --compiler-options --use-fast-math --Wno-deprecated-gpu-targets
CUDA_INC = $$join(INCLUDEPATH, ' -I', '-I', ' ')
cuda.commands = $$CUDA_DIR/bin/nvcc -m64 -g -G -gencode arch=$CUDA_ARCH,code=$$CUDA_GPU_ARCH -c $$NVCCFLAGS $$CUDA_INC $$LIBS $(QMAKE_FILE_NAME} -o ${QMAKE_FILE_OUT} 2>$1 | sed -r "s/\(([0-9]+)\):\1/g" 1>&2
cuda.dependency_type = TYPE_C
cuda.depend_command = $$CUDA_DIR/bin/nvcc -g -G -M $$CUDA_INC $$NVCCFLAGS ${QMAKE_FILE_NAME} | sed "s/^.*: //"
cuda.input = CUDA_SOURCES
cuda.output = ${OBJECT_DIR}${QMAKE_FILE_BASE}_cuda.o
QMAKE_EXTRA_COMPILES += cuda/--------------- system ---------------------------/
OS : UBUNTU 16.04
gcc : 5.4.0
nvcc : 9.0Please Help me....
-
Hi and welcome to devnet,
@DancingDaddy said in Build Error cuda on QT with Jetson TX2:
CUDA_SOURCES += $$OTHER_FILES
OTHER_FILES += vectorAdd.cuFrom the looks of it, your CUDA kernel is likely not built.