OpenCv Dlib with QT CREATOR & CNN very SLOW



  • I am using Dlib with c++ . And decide to move QT platform for GUI requirement. However I couldnt find why CNN face recognition extremely slow (20 se per fram . freezing) .

    Same code and same library(dlib) . Dlib compiled with avx2 and mkl. the piece of code is below.

    Same code running without QT 100 time faster.

    Thanks

    // work.cpp
    
    #include "mainwindow.h"
    #include "ui_mainwindow.h"
    #include <QTimer>
    #include <QDebug>
    
    #define CAM_ID 0
    
    MainWindow::MainWindow(QWidget *parent) :
        QMainWindow(parent),
        ui(new Ui::MainWindow)
    {
        ui->setupUi(this);
    
        QTimer* timer = new QTimer(this);
        connect(timer, SIGNAL(timeout()), this,  SLOT(updateView()));
        timer->start(30);
    
        ui->camViewer->setScene(&scene);
    
    }
    
    MainWindow::~MainWindow()
    {
        delete ui;
        if(video.isOpened())
        {
             video.release();
        }
    }
    
    void MainWindow::on_pushButton_clicked()
    {
        if(video.isOpened())
        {
             video.release();
             ui->pushButton->setText("Start");
    
        }
        else
        {
            //SAFTAS
    
            deserialize("/Users/alpullu/Projects/dlib/examples/models/dlib_face_recognition_resnet_model_v1.dat") >> anet;
            deserialize("/Users/alpullu/Projects/dlib/examples/models/mmod_human_face_detector.dat") >> net;
    
            video.open("http://ckyxtrm.com:3000/live/muzisyenhakan/29mayismh/177.ts");
            ui->pushButton->setText("Stop");
    
    
        }
    }
    
    
    
    
    void MainWindow::updateView()
    {
        if(!video.isOpened()) return;
        cv::Mat frame;
        while(1)
        {
            video >> frame;
            if(!frame.empty()) break;
        }
    // FROM SAFTAS
    
                ++count;
                if (!video.read(temp)) {
                    cout << "        if (!cap.read(temp)) !!!! " <<  endl;
                    video.release();
                    video.open("http://ckyxtrm.com:3000/live/muzisyenhakan/29mayismh/177.ts");
                    cout << "   yeni CAP acilmaya calisti..." << endl;
                    if (!video.isOpened()) {
                        cerr << "Unable to connect to STREAM" << endl;
    
                    }
        //            continue;
                }
    
    
                im = frame;
                cv::putText(frame, "S A F T A S  A.S. 2018 Deep Learning .", cvPoint(30, 30),
                                    cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cvScalar(0, 250, 50), 1, CV_AA);
    
                std::vector<full_object_detection> shapes;
                cout << count << endl;
                if (count % SKIP_FRAMES == 0 && not_skip ) {
    
                    cout << count << "::" << not_skip<< endl;
    
                    matrix<rgb_pixel> img;
                    cout << im.depth() << "  DEPTh . . . . . . ." << endl ;
                    cv_image<bgr_pixel> image(im);
                    assign_image(img, image);
    
    //                std::vector<dlib::rectangle> detected_faces = detector(img);   This look normal
    
                    dlib::cv_image<bgr_pixel> cimg_small(frame);
    
    
                    std::vector<full_object_detection> shapes;
    //                            for (unsigned long i = 0; i < detected_faces.size(); ++i)
    //                                shapes.push_back(pose_model(cimg_small, detected_faces[i]));
    
    
                    cout << detected_faces.size() << " yüz var " << endl;
    
                    auto dets = net(img);  // THIS ONE IS very sloww.....
    
                    cout << count << ">>>::<<<" << not_skip<< endl;
    
                    //win.clear_overlay();
    
    
                }
    
    
     // FROM SAFTAS
    
    
    
        if(frame.empty()) return;
        ui->camViewer->setImage(QImage((const unsigned char*)(frame.data), frame.cols,frame.rows,QImage::Format_RGB888).rgbSwapped());
    
    
    }
    
    // work.h
    #ifndef MAINWINDOW_H
    #define MAINWINDOW_H
    
    #include <QMainWindow>
    #include "camviewer.h"
    #include <opencv2/highgui/highgui.hpp>
    //DLIB
    
    #include <iostream>
    #include <dlib/dnn.h>
    #include <dlib/data_io.h>
    #include <dlib/image_processing.h>
    #include <dlib/gui_widgets.h>
    #include <opencv2/videoio.hpp>
    #include <dlib/opencv/cv_image.h>
    #include <opencv/cv.hpp>
    #include <dlib/image_processing/render_face_detections.h>
    #include <dlib/clustering/chinese_whispers.h>
    #include <dlib/image_processing/frontal_face_detector.h>
    
    
    using namespace std;
    using namespace dlib;
    
    // ----------------------------------------------------------------------------------------
    
    template<long num_filters, typename SUBNET> using con5d = con<num_filters, 5, 5, 2, 2, SUBNET>;
    template<long num_filters, typename SUBNET> using con5  = con<num_filters, 5, 5, 1, 1, SUBNET>;
    
    template<typename SUBNET> using downsampler  = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16, SUBNET>>>>>>>>>;
    template<typename SUBNET> using rcon5  = relu<affine<con5<45, SUBNET>>>;
    
    using net_type = loss_mmod<con<1, 9, 9, 1, 1, rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>;
    
    
    //------------------------------------------------------------------------------------------
    
    // The next bit of code defines a ResNet network.  It's basically copied
    // and pasted from the dnn_imagenet_ex.cpp example, except we replaced the loss
    // layer with loss_metric and made the network somewhat smaller.  Go read the introductory
    // dlib DNN examples to learn what all this stuff means.
    //
    // Also, the dnn_metric_learning_on_images_ex.cpp example shows how to train this network.
    // The dlib_face_recognition_resnet_model_v1 model used by this example was trained using
    // essentially the code shown in dnn_metric_learning_on_images_ex.cpp except the
    // mini-batches were made larger (35x15 instead of 5x5), the iterations without progress
    // was set to 10000, and the training dataset consisted of about 3 million images instead of
    // 55.  Also, the input layer was locked to images of size 150.
    template<template<int, template<typename> class, int, typename> class block, int N,
            template<typename> class BN, typename SUBNET>
    using residual = add_prev1<block<N, BN, 1, tag1<SUBNET>>>;
    
    template<template<int, template<typename> class, int, typename> class block, int N,
            template<typename> class BN, typename SUBNET>
    using residual_down = add_prev2<avg_pool<2, 2, 2, 2, skip1<tag2<block<N, BN, 2, tag1<SUBNET>>>>>>;
    
    template<int N, template<typename> class BN, int stride, typename SUBNET>
    using block  = BN<con<N, 3, 3, 1, 1, relu<BN<con<N, 3, 3, stride, stride, SUBNET>>>>>;
    
    template<int N, typename SUBNET> using ares      = relu<residual<block, N, affine, SUBNET>>;
    template<int N, typename SUBNET> using ares_down = relu<residual_down<block, N, affine, SUBNET>>;
    
    template<typename SUBNET> using alevel0 = ares_down<256, SUBNET>;
    template<typename SUBNET> using alevel1 = ares<256, ares<256, ares_down<256, SUBNET>>>;
    template<typename SUBNET> using alevel2 = ares<128, ares<128, ares_down<128, SUBNET>>>;
    template<typename SUBNET> using alevel3 = ares<64, ares<64, ares<64, ares_down<64, SUBNET>>>>;
    template<typename SUBNET> using alevel4 = ares<32, ares<32, ares<32, SUBNET>>>;
    
    using anet_type = loss_metric<fc_no_bias<128, avg_pool_everything<
            alevel0<
                    alevel1<
                            alevel2<
                                    alevel3<
                                            alevel4<
                                                    max_pool<3, 3, 2, 2, relu<affine<con<32, 7, 7, 2, 2,
                                                            input_rgb_image_sized<150>
                                                    >>>>>>>>>>>>;
    
    
    //**************************************************************
    
    namespace Ui {
    class MainWindow;
    }
    
    class MainWindow : public QMainWindow
    {
        Q_OBJECT
    
    public:
        explicit MainWindow(QWidget *parent = 0);
        bool isCameraOn;
        ~MainWindow();
    
    private slots:
        void on_pushButton_clicked();
        void updateView(void);
    
    private:
        Ui::MainWindow *ui;
        QGraphicsScene scene;
        cv::VideoCapture video;
        cv::Mat frame;
        cv::Mat temp;
        cv::Mat im;
        int count = 0;
        int SKIP_FRAMES = 8;
        bool not_skip = true;
        net_type net;
        anet_type anet;
    
        frontal_face_detector detector;
        shape_predictor shape_model;
        shape_predictor pose_model;
    
    
    
    };
    
    #endif // MAINWINDOW_H
    
    

Log in to reply
 

Looks like your connection to Qt Forum was lost, please wait while we try to reconnect.