QSyntaxHighlighter with parser
-
Hi,
as suggested by this forum I tried to redo the QSyntaxHighlighter example with a text parser instead of the regular expression. The syntax highlighting shall be for PHP. A PHP file can contain multiple other languages. So far I added the highlighting for PHP and HTML. Before I move on to add support for CSS and JavaScript I wanted to ask for feedback. Am I on the right track here or am I doing this completely wrong? This is my first parser.
Passing the key words to the highlighter needs to be improved, I know that. Here I need to come up with a good idea how I can do that for different files. If I use the highlighter with a css file, I only need and want the css highlighting.
I also have a second question. Besides the language specific syntax highlighting I would like to have language specific auto completion. How can I pass the current block state to the completer?Here is the modified code. I already included the matching parenthesis.
highlighter.h#ifndef HIGHLIGHTER_H #define HIGHLIGHTER_H #include <QSyntaxHighlighter> #include <QVector> #include <QStringListModel> #include <QRegularExpression> class QString; class QTextDocument; struct ParenthesisInfo { char character; int position; }; class TextBlockData : public QTextBlockUserData { public: TextBlockData(); QVector<ParenthesisInfo *> parentheses(); void insert(ParenthesisInfo *info); private: QVector<ParenthesisInfo *> m_parentheses; }; class Highlighter : public QSyntaxHighlighter { Q_OBJECT public: Highlighter(QTextDocument *document, QStringListModel *keyWords, QStringListModel *funcNames); protected: void highlightBlock(const QString &text); private: QMap<QString, QTextCharFormat> phpKeyWordsRules; QMap<QString, QTextCharFormat> htmlKeyWordsRules; QTextCharFormat currentTagFormat; QTextCharFormat phpKeywordFormat; QTextCharFormat phpTagFormat; QTextCharFormat phpCommentFormat; QTextCharFormat phpQuoteFormat; QTextCharFormat phpFunctionFormat; QTextCharFormat phpVariableFormat; QTextCharFormat phpNumberFormat; QTextCharFormat phpBoolFormat; QTextCharFormat htmlTagFormat; QTextCharFormat htmlGenTagFormat; QTextCharFormat htmlTableFormat; QTextCharFormat htmlFormFormat; QTextCharFormat htmlQuoteFormat; QTextCharFormat htmlCommentFormat; QRegularExpressionMatchIterator parser; QString currentLine; void closeTag(QString tag, int returnState, int startPos = 0); }; #endif
highlighter.cpp
#include <QtGui> #include "highlighter.h" TextBlockData::TextBlockData() { // Nothing to do } QVector<ParenthesisInfo *> TextBlockData::parentheses() { return m_parentheses; } void TextBlockData::insert(ParenthesisInfo *info) { int i = 0; while (i < m_parentheses.size() && info->position > m_parentheses.at(i)->position) ++i; m_parentheses.insert(i, info); } Highlighter::Highlighter(QTextDocument *document, QStringListModel *keyWords, QStringListModel *funcNames): QSyntaxHighlighter(document){ // PHP formatting phpTagFormat.setForeground(Qt::red); phpVariableFormat.setForeground(Qt::blue); phpCommentFormat.setForeground(Qt::gray); phpQuoteFormat.setForeground(Qt::darkGreen); phpBoolFormat.setForeground(QColor(0,100,0)); phpNumberFormat.setForeground(Qt::darkRed); phpKeywordFormat.setForeground(QColor(128, 128, 128)); phpKeywordFormat.setFontWeight(QFont::Normal); phpKeywordFormat.setFontItalic(true); int rows = keyWords->rowCount(); for (int j = 0; j < rows; j++) { phpKeyWordsRules.insert(keyWords->data(keyWords->index(j), Qt::DisplayRole).toString(), phpKeywordFormat); } phpKeyWordsRules.insert("true", phpBoolFormat); phpKeyWordsRules.insert("false", phpBoolFormat); phpFunctionFormat.setForeground(Qt::darkBlue); rows = funcNames->rowCount(); for (int k = 0; k < rows; k++) { phpKeyWordsRules.insert(funcNames->data(funcNames->index(k), Qt::DisplayRole).toString(), phpFunctionFormat); } // HTML formatting htmlGenTagFormat.setForeground(Qt::blue); htmlTableFormat.setForeground(Qt::cyan); htmlFormFormat.setForeground(QColor(255, 153, 0)); htmlQuoteFormat.setForeground(Qt::darkGreen); htmlCommentFormat.setForeground(Qt::gray); QStringList htmlList; htmlList << "DOCTYPE" << "a" << "abbr" << "address" << "area" << "article" << "aside" << "audio" << "b" << "base" << "bdi" << "bdo" << "blockquote" << "body" << "br" << "button" << "canvas" << "caption" << "cite" << "code" << "col" << "colgroup" << "command" << "datalist" << "dd" << "del" << "details" << "dfn" << "dir" << "div" << "dl" << "dt" << "em" << "embed" << "fieldset" << "figcaption" << "figure" << "footer" << "h" << "head" << "header" << "hgroup" << "hr" << "html" << "i" << "iframe" << "img" << "ins" << "kbd" << "keygen" << "label" << "legend" << "li" << "link" << "map" << "mark" << "menu" << "meta" << "meter" << "nav" << "noscript" << "object" << "ol" << "optgroup" << "option" << "output" << "p" << "param" << "pre" << "progress" << "q" << "rp" << "rt" << "ruby" << "s" << "samp" << "script" << "section" << "small" << "source" << "span" << "strong" << "style" << "sub" << "summary" << "sup" << "textarea" << "tfoot" << "time" << "title" << "track" << "u" << "ul" << "var" << "video" << "wbr"; foreach (QString tag, htmlList) { htmlKeyWordsRules.insert(tag, htmlGenTagFormat); } QStringList htmlTableList; htmlTableList << "table" << "tr" << "th" << "td" << "tbody" << "thead"; foreach (QString tag, htmlTableList) { htmlKeyWordsRules.insert(tag, htmlTableFormat); } QStringList htmlFormList; htmlFormList << "form" << "input" << "select"; foreach (QString tag, htmlFormList) { htmlKeyWordsRules.insert(tag, htmlFormFormat); } } void Highlighter::highlightBlock(const QString &text){ currentLine = text; // highlight matching parenthesis TextBlockData *data = new TextBlockData; int leftPos = text.indexOf('('); while (leftPos != -1) { ParenthesisInfo *info = new ParenthesisInfo; info->character = '('; info->position = leftPos; data->insert(info); leftPos = text.indexOf('(', leftPos + 1); } int rightPos = text.indexOf(')'); while (rightPos != -1) { ParenthesisInfo *info = new ParenthesisInfo; info->character = ')'; info->position = rightPos; data->insert(info); rightPos = text.indexOf(')', rightPos +1); } setCurrentBlockUserData(data); //- Text parser - // check if there is something to parse if(text.simplified().trimmed().isEmpty()){ setCurrentBlockState(previousBlockState()); return; } // Split line by coding language identifiers, here <? and ?>, then by comment tags, then by (key)words and then by any other char that is not escaped. QRegularExpression splitter = QRegularExpression("(<\\?)|(\\?>)|(\\/\\*)|(\\*\\/)|(\\w+)|((?<!\\\\)([^\\s\\\\]))"); parser = splitter.globalMatch(text); // qDebug() <<"Starting with" << currentBlockState() << "last state was" << previousBlockState() << "looking at" << text; // check if the last block can tell us what language we should start in if(previousBlockState() != -1){ setCurrentBlockState(previousBlockState()); }else{ // Assuming it should be html then setCurrentBlockState(10); } // Close any open comments/strings if necessary if(currentBlockState() == 1){ currentTagFormat = phpQuoteFormat; closeTag("\"", 0); }else if(currentBlockState() == 2){ currentTagFormat = phpQuoteFormat; closeTag("'", 0); }else if(currentBlockState() == 3){ currentTagFormat = phpCommentFormat; closeTag("*/", 0); } while(parser.hasNext()){ QRegularExpressionMatch word = parser.next(); if(word.captured() == "<?"){ // from now on this is PHP setCurrentBlockState(0); int start = word.capturedStart(); if(parser.hasNext() && parser.peekNext().captured() == "php"){ word = parser.next(); }else if(parser.hasNext() && parser.peekNext().captured() == "="){ word = parser.next(); } setFormat(start, word.capturedEnd(), phpTagFormat); if(parser.hasNext()) word = parser.next(); } // Start highlighting according to coding language if(currentBlockState() < 10){ // *** PHP highlighting *** int startExp = 0; if(word.captured() == "$"){ // we found a variable startExp = word.capturedStart(); word = parser.next(); if(word.captured() == "$") word = parser.next(); setFormat(startExp,word.capturedEnd()-startExp, phpVariableFormat); }else if(phpKeyWordsRules.contains(word.captured().toLower())){ setFormat(word.capturedStart(), word.capturedLength(), phpKeyWordsRules[word.captured()]); }else if(word.captured() == "/" && parser.hasNext() && parser.peekNext().captured() == "/"){ setFormat(word.capturedStart(), text.length()-word.capturedStart(), phpCommentFormat); }else if(word.captured() == "?>"){ setCurrentBlockState(10); // muss geändert werden falls zurück zu css oder js setFormat(word.capturedStart(), word.capturedLength(), phpTagFormat); }else if(word.captured().contains(QRegExp("\\d+"))){ startExp = word.capturedStart(); if(parser.hasNext() && parser.peekNext().captured() == ".") word = parser.next(); setFormat(startExp, word.capturedEnd()-startExp, phpNumberFormat); }else if(word.captured() == "\""){ currentTagFormat = phpQuoteFormat; setCurrentBlockState(1); closeTag("\"", 0, word.capturedStart()); }else if(word.captured() == "'"){ currentTagFormat = phpQuoteFormat; setCurrentBlockState(2); closeTag("'", 0, word.capturedStart()); }else if(word.captured() == "/*"){ currentTagFormat = phpCommentFormat; setCurrentBlockState(3); closeTag("*/", 0, word.capturedStart()); } }else{ // HTML highlighting if(word.captured() == "<"){ int startExp = word.capturedStart(); if(parser.hasNext() && parser.peekNext().captured().toLower().contains(QRegExp("[^a-z]"))) word = parser.next(); if(parser.hasNext()){ QString htmlTag = parser.peekNext().captured().toLower(); if(htmlKeyWordsRules.contains(htmlTag)) htmlTagFormat = htmlKeyWordsRules[htmlTag]; setFormat(startExp, word.capturedEnd()-startExp, htmlTagFormat); } bool tagAtEnd = false; while(parser.hasNext() && !tagAtEnd){ word = parser.next(); if(word.captured() == "\""){ currentTagFormat = htmlQuoteFormat; closeTag("\"", 10, word.capturedStart()); }else{ setFormat(word.capturedStart(), word.capturedLength(), htmlTagFormat); if(word.captured() == ">") tagAtEnd = true; } } } } } // qDebug() << "exeting line" << text << "with status" << currentBlockState(); } void Highlighter::closeTag(QString tag, int returnState, int startPos){ int length = 0; while(parser.hasNext()){ QRegularExpressionMatch word = parser.next(); if(word.captured() == tag){ setCurrentBlockState(returnState); length = word.capturedEnd() - startPos; break; } } if(length == 0){ // No end was found, multiline continues on next line length = currentLine.length(); } setFormat(startPos, length, currentTagFormat); }