OpenCv Dlib with QT CREATOR & CNN very SLOW

RahibeMeryem

I am using Dlib with c++ . And decide to move QT platform for GUI requirement. However I couldnt find why CNN face recognition extremely slow (20 se per fram . freezing) .

Same code and same library(dlib) . Dlib compiled with avx2 and mkl. the piece of code is below.

Same code running without QT 100 time faster.

Thanks

// work.cpp

#include "mainwindow.h"
#include "ui_mainwindow.h"
#include <QTimer>
#include <QDebug>

#define CAM_ID 0

MainWindow::MainWindow(QWidget *parent) :
    QMainWindow(parent),
    ui(new Ui::MainWindow)
{
    ui->setupUi(this);

    QTimer* timer = new QTimer(this);
    connect(timer, SIGNAL(timeout()), this,  SLOT(updateView()));
    timer->start(30);

    ui->camViewer->setScene(&scene);

}

MainWindow::~MainWindow()
{
    delete ui;
    if(video.isOpened())
    {
         video.release();
    }
}

void MainWindow::on_pushButton_clicked()
{
    if(video.isOpened())
    {
         video.release();
         ui->pushButton->setText("Start");

    }
    else
    {
        //SAFTAS

        deserialize("/Users/alpullu/Projects/dlib/examples/models/dlib_face_recognition_resnet_model_v1.dat") >> anet;
        deserialize("/Users/alpullu/Projects/dlib/examples/models/mmod_human_face_detector.dat") >> net;

        video.open("http://ckyxtrm.com:3000/live/muzisyenhakan/29mayismh/177.ts");
        ui->pushButton->setText("Stop");


    }
}




void MainWindow::updateView()
{
    if(!video.isOpened()) return;
    cv::Mat frame;
    while(1)
    {
        video >> frame;
        if(!frame.empty()) break;
    }
// FROM SAFTAS

            ++count;
            if (!video.read(temp)) {
                cout << "        if (!cap.read(temp)) !!!! " <<  endl;
                video.release();
                video.open("http://ckyxtrm.com:3000/live/muzisyenhakan/29mayismh/177.ts");
                cout << "   yeni CAP acilmaya calisti..." << endl;
                if (!video.isOpened()) {
                    cerr << "Unable to connect to STREAM" << endl;

                }
    //            continue;
            }


            im = frame;
            cv::putText(frame, "S A F T A S  A.S. 2018 Deep Learning .", cvPoint(30, 30),
                                cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cvScalar(0, 250, 50), 1, CV_AA);

            std::vector<full_object_detection> shapes;
            cout << count << endl;
            if (count % SKIP_FRAMES == 0 && not_skip ) {

                cout << count << "::" << not_skip<< endl;

                matrix<rgb_pixel> img;
                cout << im.depth() << "  DEPTh . . . . . . ." << endl ;
                cv_image<bgr_pixel> image(im);
                assign_image(img, image);

//                std::vector<dlib::rectangle> detected_faces = detector(img);   This look normal

                dlib::cv_image<bgr_pixel> cimg_small(frame);


                std::vector<full_object_detection> shapes;
//                            for (unsigned long i = 0; i < detected_faces.size(); ++i)
//                                shapes.push_back(pose_model(cimg_small, detected_faces[i]));


                cout << detected_faces.size() << " yüz var " << endl;

                auto dets = net(img);  // THIS ONE IS very sloww.....

                cout << count << ">>>::<<<" << not_skip<< endl;

                //win.clear_overlay();


            }


 // FROM SAFTAS



    if(frame.empty()) return;
    ui->camViewer->setImage(QImage((const unsigned char*)(frame.data), frame.cols,frame.rows,QImage::Format_RGB888).rgbSwapped());


}

// work.h
#ifndef MAINWINDOW_H
#define MAINWINDOW_H

#include <QMainWindow>
#include "camviewer.h"
#include <opencv2/highgui/highgui.hpp>
//DLIB

#include <iostream>
#include <dlib/dnn.h>
#include <dlib/data_io.h>
#include <dlib/image_processing.h>
#include <dlib/gui_widgets.h>
#include <opencv2/videoio.hpp>
#include <dlib/opencv/cv_image.h>
#include <opencv/cv.hpp>
#include <dlib/image_processing/render_face_detections.h>
#include <dlib/clustering/chinese_whispers.h>
#include <dlib/image_processing/frontal_face_detector.h>


using namespace std;
using namespace dlib;

// ----------------------------------------------------------------------------------------

template<long num_filters, typename SUBNET> using con5d = con<num_filters, 5, 5, 2, 2, SUBNET>;
template<long num_filters, typename SUBNET> using con5  = con<num_filters, 5, 5, 1, 1, SUBNET>;

template<typename SUBNET> using downsampler  = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16, SUBNET>>>>>>>>>;
template<typename SUBNET> using rcon5  = relu<affine<con5<45, SUBNET>>>;

using net_type = loss_mmod<con<1, 9, 9, 1, 1, rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;


//------------------------------------------------------------------------------------------

// The next bit of code defines a ResNet network.  It's basically copied
// and pasted from the dnn_imagenet_ex.cpp example, except we replaced the loss
// layer with loss_metric and made the network somewhat smaller.  Go read the introductory
// dlib DNN examples to learn what all this stuff means.
//
// Also, the dnn_metric_learning_on_images_ex.cpp example shows how to train this network.
// The dlib_face_recognition_resnet_model_v1 model used by this example was trained using
// essentially the code shown in dnn_metric_learning_on_images_ex.cpp except the
// mini-batches were made larger (35x15 instead of 5x5), the iterations without progress
// was set to 10000, and the training dataset consisted of about 3 million images instead of
// 55.  Also, the input layer was locked to images of size 150.
template<template<int, template<typename> class, int, typename> class block, int N,
        template<typename> class BN, typename SUBNET>
using residual = add_prev1<block<N, BN, 1, tag1<SUBNET>>>;

template<template<int, template<typename> class, int, typename> class block, int N,
        template<typename> class BN, typename SUBNET>
using residual_down = add_prev2<avg_pool<2, 2, 2, 2, skip1<tag2<block<N, BN, 2, tag1<SUBNET>>>>>>;

template<int N, template<typename> class BN, int stride, typename SUBNET>
using block  = BN<con<N, 3, 3, 1, 1, relu<BN<con<N, 3, 3, stride, stride, SUBNET>>>>>;

template<int N, typename SUBNET> using ares      = relu<residual<block, N, affine, SUBNET>>;
template<int N, typename SUBNET> using ares_down = relu<residual_down<block, N, affine, SUBNET>>;

template<typename SUBNET> using alevel0 = ares_down<256, SUBNET>;
template<typename SUBNET> using alevel1 = ares<256, ares<256, ares_down<256, SUBNET>>>;
template<typename SUBNET> using alevel2 = ares<128, ares<128, ares_down<128, SUBNET>>>;
template<typename SUBNET> using alevel3 = ares<64, ares<64, ares<64, ares_down<64, SUBNET>>>>;
template<typename SUBNET> using alevel4 = ares<32, ares<32, ares<32, SUBNET>>>;

using anet_type = loss_metric<fc_no_bias<128, avg_pool_everything<
        alevel0<
                alevel1<
                        alevel2<
                                alevel3<
                                        alevel4<
                                                max_pool<3, 3, 2, 2, relu<affine<con<32, 7, 7, 2, 2,
                                                        input_rgb_image_sized<150>
                                                >>>>>>>>>>>>;


//**************************************************************

namespace Ui {
class MainWindow;
}

class MainWindow : public QMainWindow
{
    Q_OBJECT

public:
    explicit MainWindow(QWidget *parent = 0);
    bool isCameraOn;
    ~MainWindow();

private slots:
    void on_pushButton_clicked();
    void updateView(void);

private:
    Ui::MainWindow *ui;
    QGraphicsScene scene;
    cv::VideoCapture video;
    cv::Mat frame;
    cv::Mat temp;
    cv::Mat im;
    int count = 0;
    int SKIP_FRAMES = 8;
    bool not_skip = true;
    net_type net;
    anet_type anet;

    frontal_face_detector detector;
    shape_predictor shape_model;
    shape_predictor pose_model;



};

#endif // MAINWINDOW_H