OpenCv Dlib with QT CREATOR & CNN very SLOW
Unsolved
General and Desktop
-
I am using Dlib with c++ . And decide to move QT platform for GUI requirement. However I couldnt find why CNN face recognition extremely slow (20 se per fram . freezing) .
Same code and same library(dlib) . Dlib compiled with avx2 and mkl. the piece of code is below.
Same code running without QT 100 time faster.
Thanks
// work.cpp #include "mainwindow.h" #include "ui_mainwindow.h" #include <QTimer> #include <QDebug> #define CAM_ID 0 MainWindow::MainWindow(QWidget *parent) : QMainWindow(parent), ui(new Ui::MainWindow) { ui->setupUi(this); QTimer* timer = new QTimer(this); connect(timer, SIGNAL(timeout()), this, SLOT(updateView())); timer->start(30); ui->camViewer->setScene(&scene); } MainWindow::~MainWindow() { delete ui; if(video.isOpened()) { video.release(); } } void MainWindow::on_pushButton_clicked() { if(video.isOpened()) { video.release(); ui->pushButton->setText("Start"); } else { //SAFTAS deserialize("/Users/alpullu/Projects/dlib/examples/models/dlib_face_recognition_resnet_model_v1.dat") >> anet; deserialize("/Users/alpullu/Projects/dlib/examples/models/mmod_human_face_detector.dat") >> net; video.open("http://ckyxtrm.com:3000/live/muzisyenhakan/29mayismh/177.ts"); ui->pushButton->setText("Stop"); } } void MainWindow::updateView() { if(!video.isOpened()) return; cv::Mat frame; while(1) { video >> frame; if(!frame.empty()) break; } // FROM SAFTAS ++count; if (!video.read(temp)) { cout << " if (!cap.read(temp)) !!!! " << endl; video.release(); video.open("http://ckyxtrm.com:3000/live/muzisyenhakan/29mayismh/177.ts"); cout << " yeni CAP acilmaya calisti..." << endl; if (!video.isOpened()) { cerr << "Unable to connect to STREAM" << endl; } // continue; } im = frame; cv::putText(frame, "S A F T A S A.S. 2018 Deep Learning .", cvPoint(30, 30), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cvScalar(0, 250, 50), 1, CV_AA); std::vector<full_object_detection> shapes; cout << count << endl; if (count % SKIP_FRAMES == 0 && not_skip ) { cout << count << "::" << not_skip<< endl; matrix<rgb_pixel> img; cout << im.depth() << " DEPTh . . . . . . ." << endl ; cv_image<bgr_pixel> image(im); assign_image(img, image); // std::vector<dlib::rectangle> detected_faces = detector(img); This look normal dlib::cv_image<bgr_pixel> cimg_small(frame); std::vector<full_object_detection> shapes; // for (unsigned long i = 0; i < detected_faces.size(); ++i) // shapes.push_back(pose_model(cimg_small, detected_faces[i])); cout << detected_faces.size() << " yüz var " << endl; auto dets = net(img); // THIS ONE IS very sloww..... cout << count << ">>>::<<<" << not_skip<< endl; //win.clear_overlay(); } // FROM SAFTAS if(frame.empty()) return; ui->camViewer->setImage(QImage((const unsigned char*)(frame.data), frame.cols,frame.rows,QImage::Format_RGB888).rgbSwapped()); }
// work.h #ifndef MAINWINDOW_H #define MAINWINDOW_H #include <QMainWindow> #include "camviewer.h" #include <opencv2/highgui/highgui.hpp> //DLIB #include <iostream> #include <dlib/dnn.h> #include <dlib/data_io.h> #include <dlib/image_processing.h> #include <dlib/gui_widgets.h> #include <opencv2/videoio.hpp> #include <dlib/opencv/cv_image.h> #include <opencv/cv.hpp> #include <dlib/image_processing/render_face_detections.h> #include <dlib/clustering/chinese_whispers.h> #include <dlib/image_processing/frontal_face_detector.h> using namespace std; using namespace dlib; // ---------------------------------------------------------------------------------------- template<long num_filters, typename SUBNET> using con5d = con<num_filters, 5, 5, 2, 2, SUBNET>; template<long num_filters, typename SUBNET> using con5 = con<num_filters, 5, 5, 1, 1, SUBNET>; template<typename SUBNET> using downsampler = relu<affine<con5d<32, relu<affine<con5d<32, relu<affine<con5d<16, SUBNET>>>>>>>>>; template<typename SUBNET> using rcon5 = relu<affine<con5<45, SUBNET>>>; using net_type = loss_mmod<con<1, 9, 9, 1, 1, rcon5<rcon5<rcon5<downsampler<input_rgb_image_pyramid<pyramid_down<6>>>>>>>>; //------------------------------------------------------------------------------------------ // The next bit of code defines a ResNet network. It's basically copied // and pasted from the dnn_imagenet_ex.cpp example, except we replaced the loss // layer with loss_metric and made the network somewhat smaller. Go read the introductory // dlib DNN examples to learn what all this stuff means. // // Also, the dnn_metric_learning_on_images_ex.cpp example shows how to train this network. // The dlib_face_recognition_resnet_model_v1 model used by this example was trained using // essentially the code shown in dnn_metric_learning_on_images_ex.cpp except the // mini-batches were made larger (35x15 instead of 5x5), the iterations without progress // was set to 10000, and the training dataset consisted of about 3 million images instead of // 55. Also, the input layer was locked to images of size 150. template<template<int, template<typename> class, int, typename> class block, int N, template<typename> class BN, typename SUBNET> using residual = add_prev1<block<N, BN, 1, tag1<SUBNET>>>; template<template<int, template<typename> class, int, typename> class block, int N, template<typename> class BN, typename SUBNET> using residual_down = add_prev2<avg_pool<2, 2, 2, 2, skip1<tag2<block<N, BN, 2, tag1<SUBNET>>>>>>; template<int N, template<typename> class BN, int stride, typename SUBNET> using block = BN<con<N, 3, 3, 1, 1, relu<BN<con<N, 3, 3, stride, stride, SUBNET>>>>>; template<int N, typename SUBNET> using ares = relu<residual<block, N, affine, SUBNET>>; template<int N, typename SUBNET> using ares_down = relu<residual_down<block, N, affine, SUBNET>>; template<typename SUBNET> using alevel0 = ares_down<256, SUBNET>; template<typename SUBNET> using alevel1 = ares<256, ares<256, ares_down<256, SUBNET>>>; template<typename SUBNET> using alevel2 = ares<128, ares<128, ares_down<128, SUBNET>>>; template<typename SUBNET> using alevel3 = ares<64, ares<64, ares<64, ares_down<64, SUBNET>>>>; template<typename SUBNET> using alevel4 = ares<32, ares<32, ares<32, SUBNET>>>; using anet_type = loss_metric<fc_no_bias<128, avg_pool_everything< alevel0< alevel1< alevel2< alevel3< alevel4< max_pool<3, 3, 2, 2, relu<affine<con<32, 7, 7, 2, 2, input_rgb_image_sized<150> >>>>>>>>>>>>; //************************************************************** namespace Ui { class MainWindow; } class MainWindow : public QMainWindow { Q_OBJECT public: explicit MainWindow(QWidget *parent = 0); bool isCameraOn; ~MainWindow(); private slots: void on_pushButton_clicked(); void updateView(void); private: Ui::MainWindow *ui; QGraphicsScene scene; cv::VideoCapture video; cv::Mat frame; cv::Mat temp; cv::Mat im; int count = 0; int SKIP_FRAMES = 8; bool not_skip = true; net_type net; anet_type anet; frontal_face_detector detector; shape_predictor shape_model; shape_predictor pose_model; }; #endif // MAINWINDOW_H