LCOV - code coverage report
Current view: top level - vala - valamarkupreader.vala (source / functions) Coverage Total Hit
Test: vala 0.57.0.298-a8cae1 Lines: 71.9 % 178 128
Test Date: 2024-04-25 11:34:36 Functions: - 0 0

            Line data    Source code
       1              : /* valamarkupreader.vala
       2              :  *
       3              :  * Copyright (C) 2008-2009  Jürg Billeter
       4              :  *
       5              :  * This library is free software; you can redistribute it and/or
       6              :  * modify it under the terms of the GNU Lesser General Public
       7              :  * License as published by the Free Software Foundation; either
       8              :  * version 2.1 of the License, or (at your option) any later version.
       9              : 
      10              :  * This library is distributed in the hope that it will be useful,
      11              :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      12              :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      13              :  * Lesser General Public License for more details.
      14              : 
      15              :  * You should have received a copy of the GNU Lesser General Public
      16              :  * License along with this library; if not, write to the Free Software
      17              :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
      18              :  *
      19              :  * Author:
      20              :  *      Jürg Billeter <j@bitron.ch>
      21              :  */
      22              : 
      23              : using GLib;
      24              : 
      25              : /**
      26              :  * Simple reader for a subset of XML.
      27              :  */
      28          612 : public class Vala.MarkupReader {
      29          558 :         public string filename { get; private set; }
      30              : 
      31        41252 :         public string name { get; private set; }
      32              : 
      33        13832 :         public string content { get; private set; }
      34              : 
      35          186 :         MappedFile mapped_file;
      36              : 
      37              :         char* begin;
      38              :         char* current;
      39              :         char* end;
      40              : 
      41              :         int line;
      42              :         int column;
      43              : 
      44          372 :         Map<string,string> attributes = new HashMap<string,string> (str_hash, str_equal);
      45              :         bool empty_element;
      46              : 
      47          372 :         public MarkupReader (string filename) {
      48          186 :                 this.filename = filename;
      49              : 
      50          186 :                 try {
      51          186 :                         mapped_file = new MappedFile (filename, false);
      52          186 :                         begin = mapped_file.get_contents ();
      53          186 :                         end = begin + mapped_file.get_length ();
      54              : 
      55          186 :                         current = begin;
      56              : 
      57          186 :                         line = 1;
      58          186 :                         column = 1;
      59              :                 } catch (FileError e) {
      60            0 :                         Report.error (null, "Unable to map file `%s': %s", filename, e.message);
      61              :                 }
      62              :         }
      63              : 
      64            0 :         public MarkupReader.from_string (string filename, string content) {
      65            0 :                 this.filename = filename;
      66              : 
      67            0 :                 begin = content;
      68            0 :                 end = begin + content.length;
      69              : 
      70            0 :                 current = begin;
      71              : 
      72            0 :                 line = 1;
      73            0 :                 column = 1;
      74              :         }
      75              : 
      76          242 :         public bool has_attribute (string attr) {
      77          242 :                 return attributes.contains (attr);
      78              :         }
      79              : 
      80        12767 :         public string? get_attribute (string attr) {
      81        12767 :                 return attributes[attr];
      82              :         }
      83              : 
      84              :         /*
      85              :          * Returns a copy of the current attributes.
      86              :          *
      87              :          * @return map of current attributes
      88              :          */
      89          501 :         public Map<string,string> get_attributes () {
      90          501 :                 var result = new HashMap<string,string> (str_hash, str_equal);
      91         2464 :                 foreach (var key in attributes.get_keys ()) {
      92         1462 :                         result.set (key, attributes.get (key));
      93              :                 }
      94              :                 return result;
      95              :         }
      96              : 
      97        12734 :         string read_name () {
      98        12734 :                 char* begin = current;
      99       112910 :                 while (current < end) {
     100       112910 :                         if (current[0] == ' ' || current[0] == '\t' || current[0] == '>'
     101              :                             || current[0] == '/' || current[0] == '=' || current[0] == '\n') {
     102              :                                 break;
     103              :                         }
     104       100176 :                         unichar u = ((string) current).get_char_validated ((long) (end - current));
     105       100176 :                         if (u != (unichar) (-1)) {
     106       100176 :                                 current += u.to_utf8 (null);
     107              :                         } else {
     108            0 :                                 Report.error (null, "invalid UTF-8 character");
     109              :                         }
     110              :                 }
     111        12734 :                 if (current == begin) {
     112              :                         // syntax error: invalid name
     113              :                 }
     114        12734 :                 return ((string) begin).substring (0, (int) (current - begin));
     115              :         }
     116              : 
     117         7839 :         public MarkupTokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
     118         7839 :                 attributes.clear ();
     119              : 
     120         7839 :                 if (empty_element) {
     121         1401 :                         empty_element = false;
     122         1401 :                         token_begin = SourceLocation (begin, line, column);
     123         1401 :                         token_end = SourceLocation (begin, line, column);
     124         1401 :                         return MarkupTokenType.END_ELEMENT;
     125              :                 }
     126              : 
     127         6438 :                 content = null;
     128         6438 :                 name = null;
     129              : 
     130         6438 :                 space ();
     131              : 
     132         6438 :                 MarkupTokenType type = MarkupTokenType.NONE;
     133         6438 :                 char* begin = current;
     134         6438 :                 token_begin = SourceLocation (begin, line, column);
     135              : 
     136         6438 :                 if (current >= end) {
     137              :                         type = MarkupTokenType.EOF;
     138         6376 :                 } else if (current[0] == '<') {
     139         5996 :                         current++;
     140         5996 :                         if (current >= end) {
     141              :                                 // error
     142         5996 :                         } else if (current[0] == '?') {
     143              :                                 // processing instruction
     144         5810 :                         } else if (current[0] == '!') {
     145              :                                 // comment or doctype
     146          133 :                                 current++;
     147          133 :                                 if (current < end - 1 && current[0] == '-' && current[1] == '-') {
     148              :                                         // comment
     149          133 :                                         current += 2;
     150        21745 :                                         while (current < end - 2) {
     151        21745 :                                                 if (current[0] == '-' && current[1] == '-' && current[2] == '>') {
     152              :                                                         // end of comment
     153          133 :                                                         current += 3;
     154          133 :                                                         break;
     155        21612 :                                                 } else if (current[0] == '\n') {
     156          248 :                                                         line++;
     157          248 :                                                         column = 0;
     158              :                                                 }
     159        21612 :                                                 current++;
     160              :                                         }
     161              : 
     162              :                                         // ignore comment, read next token
     163          133 :                                         return read_token (out token_begin, out token_end);
     164              :                                 }
     165         5677 :                         } else if (current[0] == '/') {
     166         2014 :                                 type = MarkupTokenType.END_ELEMENT;
     167         2014 :                                 current++;
     168         2014 :                                 name = read_name ();
     169         2014 :                                 if (current >= end || current[0] != '>') {
     170              :                                         // error
     171              :                                 }
     172         2014 :                                 current++;
     173              :                         } else {
     174         3663 :                                 type = MarkupTokenType.START_ELEMENT;
     175         3663 :                                 name = read_name ();
     176         3663 :                                 space ();
     177        10720 :                                 while (current < end && current[0] != '>' && current[0] != '/') {
     178         7057 :                                         string attr_name = read_name ();
     179         7057 :                                         space ();
     180         7057 :                                         if (current >= end || current[0] != '=') {
     181              :                                                 // error
     182              :                                         }
     183         7057 :                                         current++;
     184         7057 :                                         space ();
     185         7057 :                                         if (current >= end || current[0] != '"' || current[0] != '\'') {
     186              :                                                 // error
     187              :                                         }
     188         7057 :                                         char quote = current[0];
     189         7057 :                                         current++;
     190              : 
     191         7057 :                                         string attr_value = text (quote, false);
     192              : 
     193         7057 :                                         if (current >= end || current[0] != quote) {
     194              :                                                 // error
     195              :                                         }
     196         7057 :                                         current++;
     197         7057 :                                         attributes.set (attr_name, attr_value);
     198         7057 :                                         space ();
     199              :                                 }
     200         3663 :                                 if (current[0] == '/') {
     201         1525 :                                         empty_element = true;
     202         1525 :                                         current++;
     203         1525 :                                         space ();
     204              :                                 } else {
     205         2138 :                                         empty_element = false;
     206              :                                 }
     207         3663 :                                 if (current >= end || current[0] != '>') {
     208              :                                         // error
     209              :                                 }
     210         3663 :                                 current++;
     211              :                         }
     212              :                 } else {
     213          380 :                         space ();
     214              : 
     215          380 :                         if (current[0] != '<') {
     216          380 :                                 content = text ('<', true);
     217              :                         } else {
     218              :                                 // no text
     219              :                                 // read next token
     220            0 :                                 return read_token (out token_begin, out token_end);
     221              :                         }
     222              : 
     223          380 :                         type = MarkupTokenType.TEXT;
     224              :                 }
     225              : 
     226         6305 :                 token_end = SourceLocation (current, line, column - 1);
     227              : 
     228         6305 :                 return type;
     229              :         }
     230              : 
     231         7437 :         string text (char end_char, bool rm_trailing_whitespace) {
     232         7437 :                 StringBuilder content = new StringBuilder ();
     233         7437 :                 char* text_begin = current;
     234         7437 :                 char* last_linebreak = current;
     235              : 
     236        90555 :                 while (current < end && current[0] != end_char) {
     237        83118 :                         unichar u = ((string) current).get_char_validated ((long) (end - current));
     238        83118 :                         if (u == (unichar) (-1)) {
     239            0 :                                 Report.error (null, "invalid UTF-8 character");
     240        83118 :                         } else if (u == '&') {
     241            0 :                                 char* next_pos = current + u.to_utf8 (null);
     242              :                                 char buffer[16];
     243            0 :                                 Memory.copy (buffer, next_pos, (end - next_pos >= buffer.length ? buffer.length - 1 : end - next_pos));
     244            0 :                                 if (((string) buffer).has_prefix ("amp;")) {
     245            0 :                                         content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
     246            0 :                                         content.append_c ('&');
     247            0 :                                         current += 5;
     248            0 :                                         text_begin = current;
     249            0 :                                 } else if (((string) buffer).has_prefix ("quot;")) {
     250            0 :                                         content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
     251            0 :                                         content.append_c ('"');
     252            0 :                                         current += 6;
     253            0 :                                         text_begin = current;
     254            0 :                                 } else if (((string) buffer).has_prefix ("apos;")) {
     255            0 :                                         content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
     256            0 :                                         content.append_c ('\'');
     257            0 :                                         current += 6;
     258            0 :                                         text_begin = current;
     259            0 :                                 } else if (((string) buffer).has_prefix ("lt;")) {
     260            0 :                                         content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
     261            0 :                                         content.append_c ('<');
     262            0 :                                         current += 4;
     263            0 :                                         text_begin = current;
     264            0 :                                 } else if (((string) buffer).has_prefix ("gt;")) {
     265            0 :                                         content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
     266            0 :                                         content.append_c ('>');
     267            0 :                                         current += 4;
     268            0 :                                         text_begin = current;
     269            0 :                                 } else if (((string) buffer).has_prefix ("percnt;")) {
     270            0 :                                         content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
     271            0 :                                         content.append_c ('%');
     272            0 :                                         current += 8;
     273            0 :                                         text_begin = current;
     274              :                                 } else {
     275            0 :                                         current += u.to_utf8 (null);
     276              :                                 }
     277              :                         } else {
     278        83118 :                                 if (u == '\n') {
     279          186 :                                         line++;
     280          186 :                                         column = 0;
     281          186 :                                         last_linebreak = current;
     282              :                                 }
     283              : 
     284        83118 :                                 current += u.to_utf8 (null);
     285        83118 :                                 column++;
     286              :                         }
     287              :                 }
     288              : 
     289         7437 :                 if (text_begin != current) {
     290        14872 :                         content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
     291              :                 }
     292              : 
     293         7437 :                 column += (int) (current - last_linebreak);
     294              : 
     295              :                 // Removes trailing whitespace
     296         7437 :                 if (rm_trailing_whitespace) {
     297          380 :                         char* str_pos = ((char*)content.str) + content.len;
     298          566 :                         for (str_pos--; str_pos > ((char*)content.str) && str_pos[0].isspace(); str_pos--);
     299          380 :                         content.erase ((ssize_t) (str_pos-((char*) content.str) + 1), -1);
     300              :                 }
     301              : 
     302        14874 :                 return content.str;
     303              :         }
     304              : 
     305        33177 :         void space () {
     306        71511 :                 while (current < end && current[0].isspace ()) {
     307        38334 :                         if (current[0] == '\n') {
     308         6045 :                                 line++;
     309         6045 :                                 column = 0;
     310              :                         }
     311        38334 :                         current++;
     312        38334 :                         column++;
     313              :                 }
     314              :         }
     315              : }
     316              : 
     317              : public enum Vala.MarkupTokenType {
     318              :         NONE,
     319              :         START_ELEMENT,
     320              :         END_ELEMENT,
     321              :         TEXT,
     322              :         EOF;
     323              : 
     324              :         public unowned string to_string () {
     325            0 :                 switch (this) {
     326            0 :                 case START_ELEMENT: return "start element";
     327            0 :                 case END_ELEMENT: return "end element";
     328            0 :                 case TEXT: return "text";
     329            0 :                 case EOF: return "end of file";
     330            0 :                 default: return "unknown token type";
     331              :                 }
     332              :         }
     333              : }
     334              : 
        

Generated by: LCOV version 2.0-1