1
//! Glue between the libxml2 API and our xml parser module.
2
//!
3
//! This file provides functions to create a libxml2 xmlParserCtxtPtr, configured
4
//! to read from a gio::InputStream, and to maintain its loading data in an XmlState.
5

            
6
use gio::prelude::*;
7
use std::borrow::Cow;
8
use std::cell::{Cell, RefCell};
9
use std::ptr;
10
use std::rc::Rc;
11
use std::slice;
12
use std::str;
13
use std::sync::Once;
14

            
15
use glib::translate::*;
16
use markup5ever::{namespace_url, ns, LocalName, Namespace, Prefix, QualName};
17

            
18
use crate::error::LoadingError;
19
use crate::util::{cstr, opt_utf8_cstr, utf8_cstr, utf8_cstr_len};
20

            
21
use super::xml2::*;
22
use super::Attributes;
23
use super::XmlState;
24

            
25
#[rustfmt::skip]
26
1107
fn get_xml2_sax_handler() -> xmlSAXHandler {
27
1107
    xmlSAXHandler {
28
        // first the unused callbacks
29
1107
        internalSubset:        None,
30
1107
        isStandalone:          None,
31
1107
        hasInternalSubset:     None,
32
1107
        hasExternalSubset:     None,
33
1107
        resolveEntity:         None,
34
1107
        notationDecl:          None,
35
1107
        attributeDecl:         None,
36
1107
        elementDecl:           None,
37
1107
        setDocumentLocator:    None,
38
1107
        startDocument:         None,
39
1107
        endDocument:           None,
40
1107
        reference:             None,
41
1107
        ignorableWhitespace:   None,
42
1107
        comment:               None,
43
1107
        warning:               None,
44
1107
        error:                 None,
45
1107
        fatalError:            None,
46
1107
        externalSubset:        None,
47

            
48
1107
        _private:              ptr::null_mut(),
49

            
50
        // then the used callbacks
51
1107
        getEntity:             Some(sax_get_entity_cb),
52
1107
        entityDecl:            Some(sax_entity_decl_cb),
53
1107
        unparsedEntityDecl:    Some(sax_unparsed_entity_decl_cb),
54
1107
        getParameterEntity:    Some(sax_get_parameter_entity_cb),
55
1107
        characters:            Some(sax_characters_cb),
56
1107
        cdataBlock:            Some(sax_characters_cb),
57
1107
        startElement:          None,
58
1107
        endElement:            None,
59
1107
        processingInstruction: Some(sax_processing_instruction_cb),
60
1107
        startElementNs:        Some(sax_start_element_ns_cb),
61
1107
        endElementNs:          Some(sax_end_element_ns_cb),
62
1107
        serror:                Some(rsvg_sax_serror_cb),
63

            
64
        initialized:           XML_SAX2_MAGIC,
65
    }
66
1107
}
67

            
68
5
unsafe extern "C" fn rsvg_sax_serror_cb(user_data: *mut libc::c_void, error: xmlErrorPtr) {
69
5
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
70
5
    let error = error.as_ref().unwrap();
71

            
72
5
    let level_name = match error.level {
73
        1 => "warning",
74
2
        2 => "error",
75
3
        3 => "fatal error",
76
        _ => "unknown error",
77
    };
78

            
79
    // "int2" is the column number
80
5
    let column = if error.int2 > 0 {
81
5
        Cow::Owned(format!(":{}", error.int2))
82
    } else {
83
        Cow::Borrowed("")
84
    };
85

            
86
5
    let full_error_message = format!(
87
        "{} code={} ({}) in {}:{}{}: {}",
88
        level_name,
89
        error.code,
90
        error.domain,
91
8
        cstr(error.file),
92
        error.line,
93
        column,
94
6
        cstr(error.message)
95
    );
96
12
    xml2_parser
97
        .state
98
6
        .error(LoadingError::XmlParseError(full_error_message));
99
3
}
100

            
101
1153
fn free_xml_parser_and_doc(parser: xmlParserCtxtPtr) {
102
    // Free the ctxt and its ctxt->myDoc - libxml2 doesn't free them together
103
    // http://xmlsoft.org/html/libxml-parser.html#xmlFreeParserCtxt
104
    unsafe {
105
1153
        if !parser.is_null() {
106
1151
            let rparser = &mut *parser;
107

            
108
1151
            if !rparser.myDoc.is_null() {
109
                xmlFreeDoc(rparser.myDoc);
110
                rparser.myDoc = ptr::null_mut();
111
            }
112

            
113
1151
            xmlFreeParserCtxt(parser);
114
        }
115
    }
116
1153
}
117

            
118
62
unsafe extern "C" fn sax_get_entity_cb(
119
    user_data: *mut libc::c_void,
120
    name: *const libc::c_char,
121
) -> xmlEntityPtr {
122
62
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
123

            
124
62
    assert!(!name.is_null());
125
62
    let name = utf8_cstr(name);
126

            
127
124
    xml2_parser
128
        .state
129
        .entity_lookup(name)
130
62
        .unwrap_or(ptr::null_mut())
131
62
}
132

            
133
7
unsafe extern "C" fn sax_entity_decl_cb(
134
    user_data: *mut libc::c_void,
135
    name: *const libc::c_char,
136
    type_: libc::c_int,
137
    _public_id: *const libc::c_char,
138
    _system_id: *const libc::c_char,
139
    content: *const libc::c_char,
140
) {
141
7
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
142

            
143
7
    assert!(!name.is_null());
144

            
145
7
    if type_ != XML_INTERNAL_GENERAL_ENTITY {
146
        // We don't allow loading external entities; we don't support
147
        // defining parameter entities in the DTD, and libxml2 should
148
        // handle internal predefined entities by itself (e.g. "&amp;").
149
        return;
150
    }
151

            
152
7
    let entity = xmlNewEntity(
153
7
        ptr::null_mut(),
154
        name,
155
        type_,
156
7
        ptr::null(),
157
7
        ptr::null(),
158
        content,
159
    );
160
7
    assert!(!entity.is_null());
161

            
162
7
    let name = utf8_cstr(name);
163
7
    xml2_parser.state.entity_insert(name, entity);
164
7
}
165

            
166
unsafe extern "C" fn sax_unparsed_entity_decl_cb(
167
    user_data: *mut libc::c_void,
168
    name: *const libc::c_char,
169
    public_id: *const libc::c_char,
170
    system_id: *const libc::c_char,
171
    _notation_name: *const libc::c_char,
172
) {
173
    sax_entity_decl_cb(
174
        user_data,
175
        name,
176
        XML_INTERNAL_GENERAL_ENTITY,
177
        public_id,
178
        system_id,
179
        ptr::null(),
180
    );
181
}
182

            
183
2044608
fn make_qual_name(prefix: Option<&str>, uri: Option<&str>, localname: &str) -> QualName {
184
    // FIXME: If the element doesn't have a namespace URI, we are falling back
185
    // to the SVG namespace.  In reality we need to take namespace scoping into account,
186
    // i.e. handle the "default namespace" active at that point in the XML stack.
187
2045097
    let element_ns = uri.map_or_else(|| ns!(svg), Namespace::from);
188

            
189
2044646
    QualName::new(
190
2044608
        prefix.map(Prefix::from),
191
2044684
        element_ns,
192
2044684
        LocalName::from(localname),
193
2044608
    )
194
2044608
}
195

            
196
1022316
unsafe extern "C" fn sax_start_element_ns_cb(
197
    user_data: *mut libc::c_void,
198
    localname: *mut libc::c_char,
199
    prefix: *mut libc::c_char,
200
    uri: *mut libc::c_char,
201
    _nb_namespaces: libc::c_int,
202
    _namespaces: *mut *mut libc::c_char,
203
    nb_attributes: libc::c_int,
204
    _nb_defaulted: libc::c_int,
205
    attributes: *mut *mut libc::c_char,
206
) {
207
1022316
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
208

            
209
1022316
    assert!(!localname.is_null());
210

            
211
1022316
    let prefix = opt_utf8_cstr(prefix);
212
1022316
    let uri = opt_utf8_cstr(uri);
213
1022316
    let localname = utf8_cstr(localname);
214

            
215
1022316
    let qual_name = make_qual_name(prefix, uri, localname);
216

            
217
1022316
    let nb_attributes = nb_attributes as usize;
218
    let attrs =
219
1022316
        match Attributes::new_from_xml2_attributes(nb_attributes, attributes as *const *const _) {
220
1022316
            Ok(attrs) => attrs,
221
            Err(e) => {
222
                xml2_parser.state.error(e);
223
                let parser = xml2_parser.parser.get();
224
                xmlStopParser(parser);
225
                return;
226
            }
227
        };
228

            
229
    // This clippy::let_unit_value is for the "let _: () = e" guard below.
230
    #[allow(clippy::let_unit_value)]
231
2044632
    if let Err(e) = xml2_parser.state.start_element(qual_name, attrs) {
232
        let _: () = e; // guard in case we change the error type later
233

            
234
4
        let parser = xml2_parser.parser.get();
235
4
        xmlStopParser(parser);
236
    }
237
1022316
}
238

            
239
1022324
unsafe extern "C" fn sax_end_element_ns_cb(
240
    user_data: *mut libc::c_void,
241
    localname: *mut libc::c_char,
242
    prefix: *mut libc::c_char,
243
    uri: *mut libc::c_char,
244
) {
245
1022324
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
246

            
247
1022324
    assert!(!localname.is_null());
248

            
249
1022324
    let prefix = opt_utf8_cstr(prefix);
250
1022324
    let uri = opt_utf8_cstr(uri);
251
1022324
    let localname = utf8_cstr(localname);
252

            
253
1022324
    let qual_name = make_qual_name(prefix, uri, localname);
254

            
255
1022324
    xml2_parser.state.end_element(qual_name);
256
1022324
}
257

            
258
1034240
unsafe extern "C" fn sax_characters_cb(
259
    user_data: *mut libc::c_void,
260
    unterminated_text: *const libc::c_char,
261
    len: libc::c_int,
262
) {
263
1034240
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
264

            
265
1034240
    assert!(!unterminated_text.is_null());
266
1034240
    assert!(len >= 0);
267

            
268
1034240
    let utf8 = utf8_cstr_len(unterminated_text, len as usize);
269
1034240
    xml2_parser.state.characters(utf8);
270
1034240
}
271

            
272
3
unsafe extern "C" fn sax_processing_instruction_cb(
273
    user_data: *mut libc::c_void,
274
    target: *const libc::c_char,
275
    data: *const libc::c_char,
276
) {
277
3
    let xml2_parser = &*(user_data as *mut Xml2Parser<'_>);
278

            
279
3
    assert!(!target.is_null());
280
3
    let target = utf8_cstr(target);
281

            
282
3
    let data = if data.is_null() { "" } else { utf8_cstr(data) };
283

            
284
3
    xml2_parser.state.processing_instruction(target, data);
285
3
}
286

            
287
unsafe extern "C" fn sax_get_parameter_entity_cb(
288
    user_data: *mut libc::c_void,
289
    name: *const libc::c_char,
290
) -> xmlEntityPtr {
291
    sax_get_entity_cb(user_data, name)
292
}
293

            
294
1123
fn set_xml_parse_options(parser: xmlParserCtxtPtr, unlimited_size: bool) {
295
1123
    let mut options: libc::c_int = XML_PARSE_NONET | XML_PARSE_BIG_LINES;
296

            
297
1123
    if unlimited_size {
298
3
        options |= XML_PARSE_HUGE;
299
    }
300

            
301
    unsafe {
302
1123
        xmlCtxtUseOptions(parser, options);
303

            
304
        // If false, external entities work, but internal ones don't. if
305
        // true, internal entities work, but external ones don't. favor
306
        // internal entities, in order to not cause a regression
307
1123
        (*parser).replaceEntities = 1;
308
    }
309
1123
}
310

            
311
// Struct used as closure data for xmlCreateIOParserCtxt().  In conjunction
312
// with stream_ctx_read() and stream_ctx_close(), this struct provides the
313
// I/O callbacks and their context for libxml2.
314
//
315
// We call I/O methods on the stream, and as soon as we get an error
316
// we store it in the gio_error field.  Libxml2 just allows us to
317
// return -1 from the I/O callbacks in that case; it doesn't actually
318
// see the error code.
319
//
320
// The gio_error field comes from the place that constructs the
321
// StreamCtx.  That place is later responsible for seeing if the error
322
// is set; if it is, it means that there was an I/O error.  Otherwise,
323
// there were no I/O errors but the caller must then ask libxml2 for
324
// XML parsing errors.
325
struct StreamCtx {
326
    stream: gio::InputStream,
327
    cancellable: Option<gio::Cancellable>,
328
    gio_error: Rc<RefCell<Option<glib::Error>>>,
329
}
330

            
331
// read() callback from xmlCreateIOParserCtxt()
332
5522
unsafe extern "C" fn stream_ctx_read(
333
    context: *mut libc::c_void,
334
    buffer: *mut libc::c_char,
335
    len: libc::c_int,
336
) -> libc::c_int {
337
5522
    let ctx = &mut *(context as *mut StreamCtx);
338

            
339
5522
    let mut err_ref = ctx.gio_error.borrow_mut();
340

            
341
    // has the error been set already?
342
5522
    if err_ref.is_some() {
343
        return -1;
344
    }
345

            
346
    // Convert from libc::c_char to u8.  Why transmute?  Because libc::c_char
347
    // is of different signedness depending on the architecture (u8 on aarch64,
348
    // i8 on x86_64).  If one just uses "start as *const u8", it triggers a
349
    // trivial_casts warning.
350
    #[allow(trivial_casts)]
351
5517
    let u8_buffer = buffer as *mut u8;
352
5495
    let buf = slice::from_raw_parts_mut(u8_buffer, len as usize);
353

            
354
5506
    match ctx.stream.read(buf, ctx.cancellable.as_ref()) {
355
5504
        Ok(size) => size as libc::c_int,
356

            
357
        Err(e) => {
358
            // Just store the first I/O error we get; ignore subsequent ones.
359
            *err_ref = Some(e);
360
            -1
361
        }
362
    }
363
5504
}
364

            
365
// close() callback from xmlCreateIOParserCtxt()
366
1153
unsafe extern "C" fn stream_ctx_close(context: *mut libc::c_void) -> libc::c_int {
367
1153
    let ctx = &mut *(context as *mut StreamCtx);
368

            
369
1153
    let ret = match ctx.stream.close(ctx.cancellable.as_ref()) {
370
1153
        Ok(()) => 0,
371

            
372
        Err(e) => {
373
            let mut err_ref = ctx.gio_error.borrow_mut();
374

            
375
            // don't overwrite a previous error
376
            if err_ref.is_none() {
377
                *err_ref = Some(e);
378
            }
379

            
380
            -1
381
        }
382
    };
383

            
384
1153
    drop(Box::from_raw(ctx));
385

            
386
1153
    ret
387
1153
}
388

            
389
1148
fn init_libxml2() {
390
    static ONCE: Once = Once::new();
391

            
392
1260
    ONCE.call_once(|| unsafe {
393
112
        xmlInitParser();
394
112
    });
395
1148
}
396

            
397
pub struct Xml2Parser<'a> {
398
    parser: Cell<xmlParserCtxtPtr>,
399
    state: &'a XmlState,
400
    gio_error: Rc<RefCell<Option<glib::Error>>>,
401
}
402

            
403
impl<'a> Xml2Parser<'a> {
404
1149
    pub fn from_stream(
405
        state: &'a XmlState,
406
        unlimited_size: bool,
407
        stream: &gio::InputStream,
408
        cancellable: Option<&gio::Cancellable>,
409
    ) -> Result<Box<Xml2Parser<'a>>, LoadingError> {
410
1151
        init_libxml2();
411

            
412
        // The Xml2Parser we end up creating, if
413
        // xmlCreateIOParserCtxt() is successful, needs to hold a
414
        // location to place a GError from within the I/O callbacks
415
        // stream_ctx_read() and stream_ctx_close().  We put this
416
        // location in an Rc so that it can outlive the call to
417
        // xmlCreateIOParserCtxt() in case that fails, since on
418
        // failure that function frees the StreamCtx.
419
1151
        let gio_error = Rc::new(RefCell::new(None));
420

            
421
1139
        let ctx = Box::new(StreamCtx {
422
1149
            stream: stream.clone(),
423
1139
            cancellable: cancellable.cloned(),
424
1139
            gio_error: gio_error.clone(),
425
1127
        });
426

            
427
1151
        let mut sax_handler = get_xml2_sax_handler();
428

            
429
1131
        let mut xml2_parser = Box::new(Xml2Parser {
430
1139
            parser: Cell::new(ptr::null_mut()),
431
            state,
432
1205
            gio_error,
433
1168
        });
434

            
435
        unsafe {
436
1168
            let xml2_parser_ptr: *mut Xml2Parser<'a> = xml2_parser.as_mut();
437
1139
            let parser = xmlCreateIOParserCtxt(
438
                &mut sax_handler,
439
                xml2_parser_ptr as *mut _,
440
1144
                Some(stream_ctx_read),
441
1144
                Some(stream_ctx_close),
442
1144
                Box::into_raw(ctx) as *mut _,
443
                XML_CHAR_ENCODING_NONE,
444
            );
445

            
446
2265
            if parser.is_null() {
447
                // on error, xmlCreateIOParserCtxt() frees our ctx via the
448
                // stream_ctx_close function
449
                Err(LoadingError::OutOfMemory(String::from(
450
                    "could not create XML parser",
451
                )))
452
            } else {
453
1134
                xml2_parser.parser.set(parser);
454

            
455
1138
                set_xml_parse_options(parser, unlimited_size);
456

            
457
1130
                Ok(xml2_parser)
458
            }
459
        }
460
1130
    }
461

            
462
1126
    pub fn parse(&self) -> Result<(), LoadingError> {
463
        unsafe {
464
1180
            let parser = self.parser.get();
465

            
466
1180
            let xml_parse_success = xmlParseDocument(parser) == 0;
467

            
468
1180
            let mut err_ref = self.gio_error.borrow_mut();
469

            
470
1126
            let io_error = err_ref.take();
471

            
472
1153
            if let Some(io_error) = io_error {
473
                Err(LoadingError::from(io_error))
474
1156
            } else if !xml_parse_success {
475
3
                let xerr = xmlCtxtGetLastError(parser as *mut _);
476
3
                let msg = xml2_error_to_string(xerr);
477
3
                Err(LoadingError::XmlParseError(msg))
478
            } else {
479
1150
                Ok(())
480
            }
481
1153
        }
482
1180
    }
483
}
484

            
485
impl<'a> Drop for Xml2Parser<'a> {
486
1152
    fn drop(&mut self) {
487
1152
        let parser = self.parser.get();
488
1152
        free_xml_parser_and_doc(parser);
489
1152
        self.parser.set(ptr::null_mut());
490
1152
    }
491
}
492

            
493
3
fn xml2_error_to_string(xerr: xmlErrorPtr) -> String {
494
    unsafe {
495
3
        if !xerr.is_null() {
496
3
            let xerr = &*xerr;
497

            
498
3
            let file = if xerr.file.is_null() {
499
3
                "data".to_string()
500
            } else {
501
                from_glib_none(xerr.file)
502
            };
503

            
504
3
            let message = if xerr.message.is_null() {
505
                "-".to_string()
506
            } else {
507
3
                from_glib_none(xerr.message)
508
            };
509

            
510
3
            format!(
511
                "Error domain {} code {} on line {} column {} of {}: {}",
512
                xerr.domain, xerr.code, xerr.line, xerr.int2, file, message
513
            )
514
3
        } else {
515
            // The error is not set?  Return a generic message :(
516
            "Error parsing XML data".to_string()
517
        }
518
    }
519
3
}