QSyntaxHighlighter with parser



  • Hi,

    as suggested by this forum I tried to redo the QSyntaxHighlighter example with a text parser instead of the regular expression. The syntax highlighting shall be for PHP. A PHP file can contain multiple other languages. So far I added the highlighting for PHP and HTML. Before I move on to add support for CSS and JavaScript I wanted to ask for feedback. Am I on the right track here or am I doing this completely wrong? This is my first parser.
    Passing the key words to the highlighter needs to be improved, I know that. Here I need to come up with a good idea how I can do that for different files. If I use the highlighter with a css file, I only need and want the css highlighting.
    I also have a second question. Besides the language specific syntax highlighting I would like to have language specific auto completion. How can I pass the current block state to the completer?

    Here is the modified code. I already included the matching parenthesis.
    highlighter.h

    #ifndef HIGHLIGHTER_H
    #define HIGHLIGHTER_H
    
    #include <QSyntaxHighlighter>
    #include <QVector>
    #include <QStringListModel>
    #include <QRegularExpression>
    
    class QString;
    class QTextDocument;
    
    struct ParenthesisInfo {
        char character;
        int position;
    };
    
    class TextBlockData : public QTextBlockUserData {
    public:
        TextBlockData();
    
        QVector<ParenthesisInfo *> parentheses();
        void insert(ParenthesisInfo *info);
    
    private:
        QVector<ParenthesisInfo *> m_parentheses;
    };
    
    class Highlighter : public QSyntaxHighlighter {
        Q_OBJECT
    
    public:
        Highlighter(QTextDocument *document, QStringListModel *keyWords, QStringListModel *funcNames);
    
    protected:
        void highlightBlock(const QString &text);
    
    private:
        QMap<QString, QTextCharFormat> phpKeyWordsRules;
        QMap<QString, QTextCharFormat> htmlKeyWordsRules;
    
        QTextCharFormat currentTagFormat;
    
        QTextCharFormat phpKeywordFormat;
        QTextCharFormat phpTagFormat;
        QTextCharFormat phpCommentFormat;
        QTextCharFormat phpQuoteFormat;
        QTextCharFormat phpFunctionFormat;
        QTextCharFormat phpVariableFormat;
        QTextCharFormat phpNumberFormat;
        QTextCharFormat phpBoolFormat;
    
        QTextCharFormat htmlTagFormat;
        QTextCharFormat htmlGenTagFormat;
        QTextCharFormat htmlTableFormat;
        QTextCharFormat htmlFormFormat;
        QTextCharFormat htmlQuoteFormat;
        QTextCharFormat htmlCommentFormat;
    
        QRegularExpressionMatchIterator parser;
        QString currentLine;
    
        void closeTag(QString tag, int returnState, int startPos = 0);
    };
    
    #endif
    

    highlighter.cpp

    #include <QtGui>
    
    #include "highlighter.h"
    
    TextBlockData::TextBlockData()
    {
        // Nothing to do
    }
    
    QVector<ParenthesisInfo *> TextBlockData::parentheses()
    {
        return m_parentheses;
    }
    
    
    void TextBlockData::insert(ParenthesisInfo *info)
    {
        int i = 0;
        while (i < m_parentheses.size() &&
            info->position > m_parentheses.at(i)->position)
            ++i;
    
        m_parentheses.insert(i, info);
    }
    
    Highlighter::Highlighter(QTextDocument *document, QStringListModel *keyWords, QStringListModel *funcNames): QSyntaxHighlighter(document){
    // PHP formatting
        phpTagFormat.setForeground(Qt::red);
        phpVariableFormat.setForeground(Qt::blue);
        phpCommentFormat.setForeground(Qt::gray);
        phpQuoteFormat.setForeground(Qt::darkGreen);
        phpBoolFormat.setForeground(QColor(0,100,0));
        phpNumberFormat.setForeground(Qt::darkRed);
    
        phpKeywordFormat.setForeground(QColor(128, 128, 128));
        phpKeywordFormat.setFontWeight(QFont::Normal);
        phpKeywordFormat.setFontItalic(true);
        int rows = keyWords->rowCount();
        for (int j = 0; j < rows; j++) {
           phpKeyWordsRules.insert(keyWords->data(keyWords->index(j), Qt::DisplayRole).toString(), phpKeywordFormat);
        }
        phpKeyWordsRules.insert("true", phpBoolFormat);
        phpKeyWordsRules.insert("false", phpBoolFormat);
    
        phpFunctionFormat.setForeground(Qt::darkBlue);
        rows = funcNames->rowCount();
        for (int k = 0; k < rows; k++) {
            phpKeyWordsRules.insert(funcNames->data(funcNames->index(k), Qt::DisplayRole).toString(), phpFunctionFormat);
        }
    
    // HTML formatting
        htmlGenTagFormat.setForeground(Qt::blue);
        htmlTableFormat.setForeground(Qt::cyan);
        htmlFormFormat.setForeground(QColor(255, 153, 0));
        htmlQuoteFormat.setForeground(Qt::darkGreen);
        htmlCommentFormat.setForeground(Qt::gray);
    
        QStringList htmlList;
        htmlList << "DOCTYPE" << "a" << "abbr" << "address" << "area" << "article" << "aside" << "audio" << "b" << "base" << "bdi" << "bdo" << "blockquote" << "body" << "br" << "button" << "canvas" << "caption" << "cite" << "code" << "col" << "colgroup" << "command" << "datalist" << "dd" << "del" << "details" << "dfn" << "dir" << "div" << "dl" << "dt" << "em" << "embed" << "fieldset" << "figcaption" << "figure" << "footer" << "h" << "head" << "header" << "hgroup" << "hr" << "html" << "i" << "iframe" << "img" << "ins" << "kbd" << "keygen" << "label" << "legend" << "li" << "link" << "map" << "mark" << "menu" << "meta" << "meter" << "nav" << "noscript" << "object" << "ol" << "optgroup" << "option" << "output" << "p" << "param" << "pre" << "progress" << "q" << "rp" << "rt" << "ruby" << "s" << "samp" << "script" << "section" << "small" << "source" << "span" << "strong" << "style" << "sub" << "summary" << "sup" << "textarea" << "tfoot" << "time" << "title" << "track" << "u" << "ul" << "var" << "video" << "wbr";
        foreach (QString tag, htmlList) {
            htmlKeyWordsRules.insert(tag, htmlGenTagFormat);
        }
    
        QStringList htmlTableList;
        htmlTableList << "table" << "tr" << "th" << "td" << "tbody" << "thead";
        foreach (QString tag, htmlTableList) {
            htmlKeyWordsRules.insert(tag, htmlTableFormat);
        }
    
        QStringList htmlFormList;
        htmlFormList << "form" << "input" << "select";
        foreach (QString tag, htmlFormList) {
            htmlKeyWordsRules.insert(tag, htmlFormFormat);
        }
    }
    
    void Highlighter::highlightBlock(const QString &text){
        currentLine = text;
        // highlight matching parenthesis
        TextBlockData *data = new TextBlockData;
    
        int leftPos = text.indexOf('(');
        while (leftPos != -1) {
            ParenthesisInfo *info = new ParenthesisInfo;
            info->character = '(';
            info->position = leftPos;
    
            data->insert(info);
            leftPos = text.indexOf('(', leftPos + 1);
        }
    
        int rightPos = text.indexOf(')');
        while (rightPos != -1) {
            ParenthesisInfo *info = new ParenthesisInfo;
            info->character = ')';
            info->position = rightPos;
    
            data->insert(info);
    
            rightPos = text.indexOf(')', rightPos +1);
        }
    
        setCurrentBlockUserData(data);
    
    //- Text parser -
        // check if there is something to parse
        if(text.simplified().trimmed().isEmpty()){
            setCurrentBlockState(previousBlockState());
            return;
        }
        // Split line by coding language identifiers, here <? and ?>, then by comment tags, then by (key)words and then by any other char that is not escaped.
        QRegularExpression splitter = QRegularExpression("(<\\?)|(\\?>)|(\\/\\*)|(\\*\\/)|(\\w+)|((?<!\\\\)([^\\s\\\]))");
        parser = splitter.globalMatch(text);
    
    //    qDebug() <<"Starting with" << currentBlockState() << "last state was" << previousBlockState() << "looking at" << text;
        // check if the last block can tell us what language we should start in
    
        if(previousBlockState() != -1){
            setCurrentBlockState(previousBlockState());
        }else{
            // Assuming it should be html then
            setCurrentBlockState(10);
        }
    
        // Close any open comments/strings if necessary
        if(currentBlockState() == 1){
            currentTagFormat = phpQuoteFormat;
            closeTag("\"", 0);
        }else if(currentBlockState() == 2){
            currentTagFormat = phpQuoteFormat;
            closeTag("'", 0);
        }else if(currentBlockState() == 3){
            currentTagFormat = phpCommentFormat;
            closeTag("*/", 0);
        }
    
        while(parser.hasNext()){
            QRegularExpressionMatch word = parser.next();
    
            if(word.captured() == "<?"){
                // from now on this is PHP
                setCurrentBlockState(0);
                int start = word.capturedStart();
                if(parser.hasNext() && parser.peekNext().captured() == "php"){
                    word = parser.next();
                }else if(parser.hasNext() && parser.peekNext().captured() == "="){
                    word = parser.next();
                }
    
                setFormat(start, word.capturedEnd(), phpTagFormat);
                if(parser.hasNext())
                    word = parser.next();
            }
    
            // Start highlighting according to coding language
            if(currentBlockState() < 10){
                // *** PHP highlighting ***
                int startExp = 0;
    
               if(word.captured() == "$"){
                   // we found a variable
                   startExp = word.capturedStart();
    
                   word = parser.next();
                   if(word.captured() == "$")
                       word = parser.next();
                   setFormat(startExp,word.capturedEnd()-startExp, phpVariableFormat);
               }else if(phpKeyWordsRules.contains(word.captured().toLower())){
                   setFormat(word.capturedStart(), word.capturedLength(), phpKeyWordsRules[word.captured()]);
               }else if(word.captured() == "/" && parser.hasNext() && parser.peekNext().captured() == "/"){
                   setFormat(word.capturedStart(), text.length()-word.capturedStart(), phpCommentFormat);
               }else if(word.captured() == "?>"){
                   setCurrentBlockState(10); // muss geändert werden falls zurück zu css oder js
                   setFormat(word.capturedStart(), word.capturedLength(), phpTagFormat);
               }else if(word.captured().contains(QRegExp("\\d+"))){
                   startExp = word.capturedStart();
                   if(parser.hasNext() && parser.peekNext().captured() == ".")
                       word = parser.next();
                   setFormat(startExp, word.capturedEnd()-startExp, phpNumberFormat);
               }else if(word.captured() == "\""){
                   currentTagFormat = phpQuoteFormat;
                   setCurrentBlockState(1);
                   closeTag("\"", 0, word.capturedStart());
               }else if(word.captured() == "'"){
                   currentTagFormat = phpQuoteFormat;
                   setCurrentBlockState(2);
                   closeTag("'", 0, word.capturedStart());
               }else if(word.captured() == "/*"){
                   currentTagFormat = phpCommentFormat;
                   setCurrentBlockState(3);
                   closeTag("*/", 0, word.capturedStart());
               }
            }else{
                // HTML highlighting
                if(word.captured() == "<"){
                    int startExp = word.capturedStart();
                    if(parser.hasNext() && parser.peekNext().captured().toLower().contains(QRegExp("[^a-z]")))
                        word = parser.next();
    
                    if(parser.hasNext()){
                        QString htmlTag = parser.peekNext().captured().toLower();
                        if(htmlKeyWordsRules.contains(htmlTag))
                            htmlTagFormat = htmlKeyWordsRules[htmlTag];
    
                        setFormat(startExp, word.capturedEnd()-startExp, htmlTagFormat);
                    }
    
                    bool tagAtEnd = false;
                    while(parser.hasNext() && !tagAtEnd){
                        word = parser.next();
                        if(word.captured() == "\""){
                            currentTagFormat = htmlQuoteFormat;
                            closeTag("\"", 10, word.capturedStart());
                        }else{
                            setFormat(word.capturedStart(), word.capturedLength(), htmlTagFormat);
                            if(word.captured() == ">")
                                tagAtEnd = true;
                        }
                    }
                }
            }
        }
    //    qDebug() << "exeting line" << text << "with status" << currentBlockState();
    }
    
    void Highlighter::closeTag(QString tag, int returnState, int startPos){
        int length = 0;
    
        while(parser.hasNext()){
            QRegularExpressionMatch word = parser.next();
            if(word.captured() == tag){
                setCurrentBlockState(returnState);
                length = word.capturedEnd() - startPos;
                break;
            }
        }
    
        if(length == 0){
            // No end was found, multiline continues on next line
            length = currentLine.length();
        }
    
        setFormat(startPos, length, currentTagFormat);
    }
    

Log in to reply
 

Looks like your connection to Qt Forum was lost, please wait while we try to reconnect.