QWebpage tohtml() get the wrong html



  • poor in english ,i want to get web html ,save to disk . use QWebEngine->page()->toHtml();
    but i get " <html> <body></body> </html> ", the web use ajax to load data .
    how can i get the data html.
    i used 2 ways to get html.

    1. when set url , call QWebEngine->page()->toHtml() function . but get" <html> <body></body> </html>"
    2. when loadfinished , call QWebEngine->page()->toHtml() function , get nothing .
      test web is https://dangdang.tmall.com/search.htm?search=y&orderType=newOn_desc&pageNo=88

    version 5.7


  • Moderators

    @tkks
    please post some code.
    QWebPage::toHtml() should return the correct html as you are expecting it.



  • Well if nothing else works you can always pull it through javascript

    document.getElementsByTagName('html')[0].innerHTML
    

    Get the html of the javascript-rendered page (after interacting with it)
    and if there are any IFRAMEs or any other part of page missing you should pass argument to application "--disable-web-security"
    QtWebEngine: “Not allowed to load local resource” for iframe, how to disable web security?



  • @raven-worx

    file :ca.h

    #ifndef CA_H
    #define CA_H
    
    #include <QObject>
    #include <string>
    #include <QtWebEngineWidgets/qwebengineview.h>
    #include <QtWebEngineWidgets/qwebenginesettings.h>
    
    class CA:public QObject
    {
        Q_OBJECT
    public:
        CA();
        void spider(const std::string& str);
    public slots:
        void finish(bool is_ok);
    
    private:
        QWebEngineView *view ;
    };
    
    #endif // CA_H
    
    

    file:ca.cpp

    #include "ca.h"
    #include <QString>
    
    CA::CA()
    {
        view = nullptr;
    }
    
    void CA::spider(const std::string& str)
    {
        qDebug("spider");
        if(view == nullptr)
        {
            qDebug("new QWebEngineView");
            view = new QWebEngineView;
            view->setUrl(QUrl(QString::fromStdString(str)));
            QObject::connect( view , &QWebEngineView::loadFinished , this , &CA::finish  );
            QWebEngineSettings *setting = view->page()->settings();
            setting->setAttribute( QWebEngineSettings::AutoLoadImages , false );
            view->resize(1024, 750);
        }
        else
        {
            view->setUrl(QUrl(QString(str.c_str())));
        }
        //view->show();
    }
    
    void CA::finish(bool is_ok)
    {
        if(is_ok)
        {
            qDebug("load successed!");
            view->page()->toHtml(
            [](const QString &str )mutable -> void
            {
                QString html = str ;
                qDebug()<<html;
            }
            );
        }
        else
        {
            qDebug("load error!");
        }
    }
    
    

    file main.cpp

    #include <QCoreApplication>
    #include <QApplication>
    #include "ca.h"
    using namespace std;
    
    
    
    
    
    int main(int argc, char *argv[])
    {
        QApplication a(argc, argv);
    
        CA  ca;
        ca.spider("https://dangdang.tmall.com/search.htm?search=y&orderType=newOn_desc&pageNo=88");
    
        return a.exec();
    }
    
    

    when the slots funcion nothing happened just cout

    load successed!
    

Log in to reply
 

Looks like your connection to Qt Forum was lost, please wait while we try to reconnect.