Unsolved QWebpage tohtml() get the wrong html
-
poor in english ,i want to get web html ,save to disk . use QWebEngine->page()->toHtml();
but i get " <html> <body></body> </html> ", the web use ajax to load data .
how can i get the data html.
i used 2 ways to get html.- when set url , call QWebEngine->page()->toHtml() function . but get" <html> <body></body> </html>"
- when loadfinished , call QWebEngine->page()->toHtml() function , get nothing .
test web is https://dangdang.tmall.com/search.htm?search=y&orderType=newOn_desc&pageNo=88
version 5.7
-
@tkks
please post some code.
QWebPage::toHtml() should return the correct html as you are expecting it. -
Well if nothing else works you can always pull it through javascript
document.getElementsByTagName('html')[0].innerHTML
Get the html of the javascript-rendered page (after interacting with it)
and if there are any IFRAMEs or any other part of page missing you should pass argument to application "--disable-web-security"
QtWebEngine: “Not allowed to load local resource” for iframe, how to disable web security? -
file :ca.h
#ifndef CA_H #define CA_H #include <QObject> #include <string> #include <QtWebEngineWidgets/qwebengineview.h> #include <QtWebEngineWidgets/qwebenginesettings.h> class CA:public QObject { Q_OBJECT public: CA(); void spider(const std::string& str); public slots: void finish(bool is_ok); private: QWebEngineView *view ; }; #endif // CA_H
file:ca.cpp
#include "ca.h" #include <QString> CA::CA() { view = nullptr; } void CA::spider(const std::string& str) { qDebug("spider"); if(view == nullptr) { qDebug("new QWebEngineView"); view = new QWebEngineView; view->setUrl(QUrl(QString::fromStdString(str))); QObject::connect( view , &QWebEngineView::loadFinished , this , &CA::finish ); QWebEngineSettings *setting = view->page()->settings(); setting->setAttribute( QWebEngineSettings::AutoLoadImages , false ); view->resize(1024, 750); } else { view->setUrl(QUrl(QString(str.c_str()))); } //view->show(); } void CA::finish(bool is_ok) { if(is_ok) { qDebug("load successed!"); view->page()->toHtml( [](const QString &str )mutable -> void { QString html = str ; qDebug()<<html; } ); } else { qDebug("load error!"); } }
file main.cpp
#include <QCoreApplication> #include <QApplication> #include "ca.h" using namespace std; int main(int argc, char *argv[]) { QApplication a(argc, argv); CA ca; ca.spider("https://dangdang.tmall.com/search.htm?search=y&orderType=newOn_desc&pageNo=88"); return a.exec(); }
when the slots funcion nothing happened just cout
load successed!