QRHI Compute Shader

RSteffen

I managed to run a compute shader too. Here some code for testing!

#version 450

layout(std430, binding = 0) buffer InputBuffer
{
    float inputData[];
};

layout(std430, binding = 1) buffer OutputBuffer
{
    float outputData[];
};

void main()
{
    uint globalIdX = gl_GlobalInvocationID.x;
    uint globalIdY = gl_GlobalInvocationID.y;
    uint globalIdZ = gl_GlobalInvocationID.z;
    uint width = gl_WorkGroupSize.x; // Width of the work group
    uint height = gl_WorkGroupSize.y; // Height of the work group
    uint depth = gl_WorkGroupSize.y; // Height of the work group
    uint globalIdx = globalIdZ * width * height + globalIdY * width + globalIdX; // 1D index
    //uint localIndexX = gl_LocalInvocationID.x;
    //uint localIndexY = gl_LocalInvocationID.y;
    //uint localIndexZ = gl_LocalInvocationID.z;
    //uint localIdx = localIndexZ * width * height + localIndexY * width + localIndexX; // 1D index
    float value = inputData[globalIdx]; // Read from the input buffer
    outputData[globalIdx] = value * value;
}

And the pipeline code:

bool ComputeBackend::onSquare(std::vector<float> data)
{
    #ifndef sptr_
#define sptr_ std::shared_ptr
    #endif

    if (!rhi_) return false;

    bool compute_feature = rhi_->isFeatureSupported(QRhi::Compute);
    bool readback_feature = rhi_->isFeatureSupported(QRhi::ReadBackNonUniformBuffer);
    qInfo() << "Compute Supported           : " << compute_feature << Qt::endl;
    qInfo() << "Read Back Storage Supported : " << readback_feature << Qt::endl;
    qInfo() << "Read Back Limit             : " << rhi_->resourceLimit(QRhi::MaxAsyncReadbackFrames) << Qt::endl;
    qInfo() << "MaxThreadGroupsX            : " << rhi_->resourceLimit(QRhi::MaxThreadGroupX) << Qt::endl;
    qInfo() << "MaxThreadGroupsY            : " << rhi_->resourceLimit(QRhi::MaxThreadGroupY) << Qt::endl;
    qInfo() << "MaxThreadGroupsZ            : " << rhi_->resourceLimit(QRhi::MaxThreadGroupZ) << Qt::endl;
    qInfo() << "MaxThreadGroupsperDim       : " << rhi_->resourceLimit(QRhi::MaxThreadGroupsPerDimension) << Qt::endl;

    if (!compute_feature || !readback_feature) return false;

    int num = data.size();
    // compute the optimal groups
    int wGX = num; //rhi_->resourceLimit(QRhi::MaxThreadGroupX);
    int wGY = 1; //rhi_->resourceLimit(QRhi::MaxThreadGroupX);
    int wGZ = 1;//rhi_->resourceLimit(QRhi::MaxThreadGroupZ);
    // Create QRhi buffers
    auto sbuf = sptr_<QRhiBuffer>(rhi_->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, num * sizeof(float)));
    sbuf->create();
    auto dbuf = sptr_<QRhiBuffer>(rhi_->newBuffer(QRhiBuffer::Static, QRhiBuffer::StorageBuffer, num * sizeof(float)));
    dbuf->create();
    auto srb = sptr_<QRhiShaderResourceBindings>(rhi_->newShaderResourceBindings());
    srb->setBindings(
    {
        QRhiShaderResourceBinding::bufferLoad(0, QRhiShaderResourceBinding::ComputeStage, sbuf.get()),
        QRhiShaderResourceBinding::bufferLoadStore(1, QRhiShaderResourceBinding::ComputeStage, dbuf.get())
    });
    srb->create(); 
    auto pipeline = sptr_<QRhiComputePipeline>(rhi_->newComputePipeline());
    pipeline->setShaderStage({QRhiShaderStage::Compute, *(ResourceManager::getUnmanagedShader(":/sort.comp.qsb"))});
    pipeline->setShaderResourceBindings(srb.get());
    pipeline->create();
    // --- create a off screen frame
    QRhiCommandBuffer *cb;
    rhi_->beginOffscreenFrame(&cb);
    auto resourceUpdateBatch = rhi_->nextResourceUpdateBatch();
    resourceUpdateBatch->uploadStaticBuffer(sbuf.get(), data.data());
    resourceUpdateBatch->uploadStaticBuffer(dbuf.get(), data.data());
    cb->beginComputePass(resourceUpdateBatch);//, QRhiCommandBuffer::ExternalContent);
    cb->setComputePipeline(pipeline.get());
    cb->setShaderResources(srb.get()); // srb can be null in which case the current graphics or compute pipeline's associated QRhiShaderResourceBindings is used
    cb->dispatch(wGX, wGY, wGZ); // Adjust for your data size
    //stat = rhi_->finish();// Waits for any work on the graphics queue (where applicable) to complete, then executes all deferred operations, like completing readbacks and resource releases. Can be called inside and outside of a frame, but not inside a pass. Inside a frame it implies submitting any work on the command buffer.
    resourceUpdateBatch = rhi_->nextResourceUpdateBatch();
    // in openGL only one readback is supported - in Vulkan 2
    QRhiReadbackResult rb_dbuf;
    resourceUpdateBatch->readBackBuffer(dbuf.get(), 0, num * sizeof(float), &rb_dbuf);
    cb->endComputePass(resourceUpdateBatch);
    rhi_->endOffscreenFrame(); // calls a finish
    qInfo() << "ReadBack Dbuf: " << rb_dbuf.data.size() << Qt::endl;

    if (rb_dbuf.data.size())
    {
        QByteArray &dba = rb_dbuf.data;
        float *ddata = reinterpret_cast<float *>(dba.data());
        // check the data

        for (int i = 0; i < num; ++i)
        {
            if (ddata[i] != (data[i]*data[i]))
            {
                qInfo() << i - 1 << data[i - 1] << ddata[i - 1] << Qt::endl;
                qCritical() << i << data[i] << ddata[i] << Qt::endl;
                break;
            }
        }
    }
    return true;
}

Please note, that OpenGL only support 1 buffer readback, vulkan support 2 buffer readback. Also setting the MaxThreadGroupsX is crutial.

Have fun.

RSteffen

I dont know why, but this code works with vulkan only.

SGaist

Hi,

I haven't tested QRhi yet but did you try the RHI Window example to see if things are running on all the backends you have on your machine ?