1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
//! Glue between the libxml2 API and our xml parser module.
//!
//! This file provides functions to create a libxml2 xmlParserCtxtPtr, configured
//! to read from a gio::InputStream, and to maintain its loading data in an XmlState.

use gio::prelude::*;
use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::ptr;
use std::rc::Rc;
use std::slice;
use std::str;
use std::sync::Once;

use glib::translate::*;
use markup5ever::{namespace_url, ns, LocalName, Namespace, Prefix, QualName};

use crate::error::LoadingError;
use crate::util::{cstr, opt_utf8_cstr, utf8_cstr, utf8_cstr_len};

use super::xml2::*;
use super::Attributes;
use super::XmlState;

#[rustfmt::skip]
fn get_xml2_sax_handler() -> xmlSAXHandler {
    xmlSAXHandler {
        // first the unused callbacks
        internalSubset:        None,
        isStandalone:          None,
        hasInternalSubset:     None,
        hasExternalSubset:     None,
        resolveEntity:         None,
        notationDecl:          None,
        attributeDecl:         None,
        elementDecl:           None,
        setDocumentLocator:    None,
        startDocument:         None,
        endDocument:           None,
        reference:             None,
        ignorableWhitespace:   None,
        comment:               None,
        warning:               None,
        error:                 None,
        fatalError:            None,
        externalSubset:        None,

        _private:              ptr::null_mut(),

        // then the used callbacks
        getEntity:             Some(sax_get_entity_cb),
        entityDecl:            Some(sax_entity_decl_cb),
        unparsedEntityDecl:    Some(sax_unparsed_entity_decl_cb),
        getParameterEntity:    Some(sax_get_parameter_entity_cb),
        characters:            Some(sax_characters_cb),
        cdataBlock:            Some(sax_characters_cb),
        startElement:          None,
        endElement:            None,
        processingInstruction: Some(sax_processing_instruction_cb),
        startElementNs:        Some(sax_start_element_ns_cb),
        endElementNs:          Some(sax_end_element_ns_cb),
        serror:                Some(rsvg_sax_serror_cb),

        initialized:           XML_SAX2_MAGIC,
    }
}

unsafe extern "C" fn rsvg_sax_serror_cb(user_data: *mut libc::c_void, error: xmlErrorPtr) {
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
    let error = error.as_ref().unwrap();

    let level_name = match error.level {
        1 => "warning",
        2 => "error",
        3 => "fatal error",
        _ => "unknown error",
    };

    // "int2" is the column number
    let column = if error.int2 > 0 {
        Cow::Owned(format!(":{}", error.int2))
    } else {
        Cow::Borrowed("")
    };

    let full_error_message = format!(
        "{} code={} ({}) in {}:{}{}: {}",
        level_name,
        error.code,
        error.domain,
        cstr(error.file),
        error.line,
        column,
        cstr(error.message)
    );
    xml2_parser
        .state
        .error(LoadingError::XmlParseError(full_error_message));
}

fn free_xml_parser_and_doc(parser: xmlParserCtxtPtr) {
    // Free the ctxt and its ctxt->myDoc - libxml2 doesn't free them together
    // http://xmlsoft.org/html/libxml-parser.html#xmlFreeParserCtxt
    unsafe {
        if !parser.is_null() {
            let rparser = &mut *parser;

            if !rparser.myDoc.is_null() {
                xmlFreeDoc(rparser.myDoc);
                rparser.myDoc = ptr::null_mut();
            }

            xmlFreeParserCtxt(parser);
        }
    }
}

unsafe extern "C" fn sax_get_entity_cb(
    user_data: *mut libc::c_void,
    name: *const libc::c_char,
) -> xmlEntityPtr {
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);

    assert!(!name.is_null());
    let name = utf8_cstr(name);

    xml2_parser
        .state
        .entity_lookup(name)
        .unwrap_or(ptr::null_mut())
}

unsafe extern "C" fn sax_entity_decl_cb(
    user_data: *mut libc::c_void,
    name: *const libc::c_char,
    type_: libc::c_int,
    _public_id: *const libc::c_char,
    _system_id: *const libc::c_char,
    content: *const libc::c_char,
) {
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);

    assert!(!name.is_null());

    if type_ != XML_INTERNAL_GENERAL_ENTITY {
        // We don't allow loading external entities; we don't support
        // defining parameter entities in the DTD, and libxml2 should
        // handle internal predefined entities by itself (e.g. "&amp;").
        return;
    }

    let entity = xmlNewEntity(
        ptr::null_mut(),
        name,
        type_,
        ptr::null(),
        ptr::null(),
        content,
    );
    assert!(!entity.is_null());

    let name = utf8_cstr(name);
    xml2_parser.state.entity_insert(name, entity);
}

unsafe extern "C" fn sax_unparsed_entity_decl_cb(
    user_data: *mut libc::c_void,
    name: *const libc::c_char,
    public_id: *const libc::c_char,
    system_id: *const libc::c_char,
    _notation_name: *const libc::c_char,
) {
    sax_entity_decl_cb(
        user_data,
        name,
        XML_INTERNAL_GENERAL_ENTITY,
        public_id,
        system_id,
        ptr::null(),
    );
}

fn make_qual_name(prefix: Option<&str>, uri: Option<&str>, localname: &str) -> QualName {
    // FIXME: If the element doesn't have a namespace URI, we are falling back
    // to the SVG namespace.  In reality we need to take namespace scoping into account,
    // i.e. handle the "default namespace" active at that point in the XML stack.
    let element_ns = uri.map_or_else(|| ns!(svg), Namespace::from);

    QualName::new(
        prefix.map(Prefix::from),
        element_ns,
        LocalName::from(localname),
    )
}

unsafe extern "C" fn sax_start_element_ns_cb(
    user_data: *mut libc::c_void,
    localname: *mut libc::c_char,
    prefix: *mut libc::c_char,
    uri: *mut libc::c_char,
    _nb_namespaces: libc::c_int,
    _namespaces: *mut *mut libc::c_char,
    nb_attributes: libc::c_int,
    _nb_defaulted: libc::c_int,
    attributes: *mut *mut libc::c_char,
) {
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);

    assert!(!localname.is_null());

    let prefix = opt_utf8_cstr(prefix);
    let uri = opt_utf8_cstr(uri);
    let localname = utf8_cstr(localname);

    let qual_name = make_qual_name(prefix, uri, localname);

    let nb_attributes = nb_attributes as usize;
    let attrs =
        match Attributes::new_from_xml2_attributes(nb_attributes, attributes as *const *const _) {
            Ok(attrs) => attrs,
            Err(e) => {
                xml2_parser.state.error(e);
                let parser = xml2_parser.parser.get();
                xmlStopParser(parser);
                return;
            }
        };

    // This clippy::let_unit_value is for the "let _: () = e" guard below.
    #[allow(clippy::let_unit_value)]
    if let Err(e) = xml2_parser.state.start_element(qual_name, attrs) {
        let _: () = e; // guard in case we change the error type later

        let parser = xml2_parser.parser.get();
        xmlStopParser(parser);
    }
}

unsafe extern "C" fn sax_end_element_ns_cb(
    user_data: *mut libc::c_void,
    localname: *mut libc::c_char,
    prefix: *mut libc::c_char,
    uri: *mut libc::c_char,
) {
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);

    assert!(!localname.is_null());

    let prefix = opt_utf8_cstr(prefix);
    let uri = opt_utf8_cstr(uri);
    let localname = utf8_cstr(localname);

    let qual_name = make_qual_name(prefix, uri, localname);

    xml2_parser.state.end_element(qual_name);
}

unsafe extern "C" fn sax_characters_cb(
    user_data: *mut libc::c_void,
    unterminated_text: *const libc::c_char,
    len: libc::c_int,
) {
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);

    assert!(!unterminated_text.is_null());
    assert!(len >= 0);

    let utf8 = utf8_cstr_len(unterminated_text, len as usize);
    xml2_parser.state.characters(utf8);
}

unsafe extern "C" fn sax_processing_instruction_cb(
    user_data: *mut libc::c_void,
    target: *const libc::c_char,
    data: *const libc::c_char,
) {
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);

    assert!(!target.is_null());
    let target = utf8_cstr(target);

    let data = if data.is_null() { "" } else { utf8_cstr(data) };

    xml2_parser.state.processing_instruction(target, data);
}

unsafe extern "C" fn sax_get_parameter_entity_cb(
    user_data: *mut libc::c_void,
    name: *const libc::c_char,
) -> xmlEntityPtr {
    sax_get_entity_cb(user_data, name)
}

fn set_xml_parse_options(parser: xmlParserCtxtPtr, unlimited_size: bool) {
    let mut options: libc::c_int = XML_PARSE_NONET | XML_PARSE_BIG_LINES;

    if unlimited_size {
        options |= XML_PARSE_HUGE;
    }

    unsafe {
        xmlCtxtUseOptions(parser, options);

        // If false, external entities work, but internal ones don't. if
        // true, internal entities work, but external ones don't. favor
        // internal entities, in order to not cause a regression
        (*parser).replaceEntities = 1;
    }
}

// Struct used as closure data for xmlCreateIOParserCtxt().  In conjunction
// with stream_ctx_read() and stream_ctx_close(), this struct provides the
// I/O callbacks and their context for libxml2.
//
// We call I/O methods on the stream, and as soon as we get an error
// we store it in the gio_error field.  Libxml2 just allows us to
// return -1 from the I/O callbacks in that case; it doesn't actually
// see the error code.
//
// The gio_error field comes from the place that constructs the
// StreamCtx.  That place is later responsible for seeing if the error
// is set; if it is, it means that there was an I/O error.  Otherwise,
// there were no I/O errors but the caller must then ask libxml2 for
// XML parsing errors.
struct StreamCtx {
    stream: gio::InputStream,
    cancellable: Option<gio::Cancellable>,
    gio_error: Rc<RefCell<Option<glib::Error>>>,
}

// read() callback from xmlCreateIOParserCtxt()
unsafe extern "C" fn stream_ctx_read(
    context: *mut libc::c_void,
    buffer: *mut libc::c_char,
    len: libc::c_int,
) -> libc::c_int {
    let ctx = &mut *(context as *mut StreamCtx);

    let mut err_ref = ctx.gio_error.borrow_mut();

    // has the error been set already?
    if err_ref.is_some() {
        return -1;
    }

    // Convert from libc::c_char to u8.  Why transmute?  Because libc::c_char
    // is of different signedness depending on the architecture (u8 on aarch64,
    // i8 on x86_64).  If one just uses "start as *const u8", it triggers a
    // trivial_casts warning.
    #[allow(trivial_casts)]
    let u8_buffer = buffer as *mut u8;
    let buf = slice::from_raw_parts_mut(u8_buffer, len as usize);

    match ctx.stream.read(buf, ctx.cancellable.as_ref()) {
        Ok(size) => size as libc::c_int,

        Err(e) => {
            // Just store the first I/O error we get; ignore subsequent ones.
            *err_ref = Some(e);
            -1
        }
    }
}

// close() callback from xmlCreateIOParserCtxt()
unsafe extern "C" fn stream_ctx_close(context: *mut libc::c_void) -> libc::c_int {
    let ctx = &mut *(context as *mut StreamCtx);

    let ret = match ctx.stream.close(ctx.cancellable.as_ref()) {
        Ok(()) => 0,

        Err(e) => {
            let mut err_ref = ctx.gio_error.borrow_mut();

            // don't overwrite a previous error
            if err_ref.is_none() {
                *err_ref = Some(e);
            }

            -1
        }
    };

    drop(Box::from_raw(ctx));

    ret
}

fn init_libxml2() {
    static ONCE: Once = Once::new();

    ONCE.call_once(|| unsafe {
        xmlInitParser();
    });
}

pub struct Xml2Parser<'a> {
    parser: Cell<xmlParserCtxtPtr>,
    state: &'a XmlState,
    gio_error: Rc<RefCell<Option<glib::Error>>>,
}

impl<'a> Xml2Parser<'a> {
    pub fn from_stream(
        state: &'a XmlState,
        unlimited_size: bool,
        stream: &gio::InputStream,
        cancellable: Option<&gio::Cancellable>,
    ) -> Result<Box<Xml2Parser<'a>>, LoadingError> {
        init_libxml2();

        // The Xml2Parser we end up creating, if
        // xmlCreateIOParserCtxt() is successful, needs to hold a
        // location to place a GError from within the I/O callbacks
        // stream_ctx_read() and stream_ctx_close().  We put this
        // location in an Rc so that it can outlive the call to
        // xmlCreateIOParserCtxt() in case that fails, since on
        // failure that function frees the StreamCtx.
        let gio_error = Rc::new(RefCell::new(None));

        let ctx = Box::new(StreamCtx {
            stream: stream.clone(),
            cancellable: cancellable.cloned(),
            gio_error: gio_error.clone(),
        });

        let mut sax_handler = get_xml2_sax_handler();

        let mut xml2_parser = Box::new(Xml2Parser {
            parser: Cell::new(ptr::null_mut()),
            state,
            gio_error,
        });

        unsafe {
            let xml2_parser_ptr: *mut Xml2Parser<'a> = xml2_parser.as_mut();
            let parser = xmlCreateIOParserCtxt(
                &mut sax_handler,
                xml2_parser_ptr as *mut _,
                Some(stream_ctx_read),
                Some(stream_ctx_close),
                Box::into_raw(ctx) as *mut _,
                XML_CHAR_ENCODING_NONE,
            );

            if parser.is_null() {
                // on error, xmlCreateIOParserCtxt() frees our ctx via the
                // stream_ctx_close function
                Err(LoadingError::OutOfMemory(String::from(
                    "could not create XML parser",
                )))
            } else {
                xml2_parser.parser.set(parser);

                set_xml_parse_options(parser, unlimited_size);

                Ok(xml2_parser)
            }
        }
    }

    pub fn parse(&self) -> Result<(), LoadingError> {
        unsafe {
            let parser = self.parser.get();

            let xml_parse_success = xmlParseDocument(parser) == 0;

            let mut err_ref = self.gio_error.borrow_mut();

            let io_error = err_ref.take();

            if let Some(io_error) = io_error {
                Err(LoadingError::from(io_error))
            } else if !xml_parse_success {
                let xerr = xmlCtxtGetLastError(parser as *mut _);
                let msg = xml2_error_to_string(xerr);
                Err(LoadingError::XmlParseError(msg))
            } else {
                Ok(())
            }
        }
    }
}

impl<'a> Drop for Xml2Parser<'a> {
    fn drop(&mut self) {
        let parser = self.parser.get();
        free_xml_parser_and_doc(parser);
        self.parser.set(ptr::null_mut());
    }
}

fn xml2_error_to_string(xerr: xmlErrorPtr) -> String {
    unsafe {
        if !xerr.is_null() {
            let xerr = &*xerr;

            let file = if xerr.file.is_null() {
                "data".to_string()
            } else {
                from_glib_none(xerr.file)
            };

            let message = if xerr.message.is_null() {
                "-".to_string()
            } else {
                from_glib_none(xerr.message)
            };

            format!(
                "Error domain {} code {} on line {} column {} of {}: {}",
                xerr.domain, xerr.code, xerr.line, xerr.int2, file, message
            )
        } else {
            // The error is not set?  Return a generic message :(
            "Error parsing XML data".to_string()
        }
    }
}