1
//! Parser for an Accept-Language HTTP header.
2

            
3
use language_tags::{LanguageTag, ParseError};
4
use locale_config::{LanguageRange, Locale};
5

            
6
use std::error;
7
use std::fmt;
8
use std::str::FromStr;
9

            
10
#[cfg(doc)]
11
use crate::api::CairoRenderer;
12

            
13
/// Used to set the language for rendering.
14
///
15
/// SVG documents can use the `<switch>` element, whose children have a `systemLanguage`
16
/// attribute; only the first child which has a `systemLanguage` that matches the
17
/// preferred languages will be rendered.
18
///
19
/// This enum, used with [`CairoRenderer::with_language`], configures how to obtain the
20
/// user's prefererred languages.
21
pub enum Language {
22
    /// Use the Unix environment variables `LANGUAGE`, `LC_ALL`, `LC_MESSAGES` and `LANG` to obtain the
23
    /// user's language.
24
    ///
25
    /// This uses [`g_get_language_names()`][ggln] underneath.
26
    ///
27
    /// [ggln]: https://docs.gtk.org/glib/func.get_language_names.html
28
    FromEnvironment,
29

            
30
    /// Use a list of languages in the form of an HTTP Accept-Language header, like `es, en;q=0.8`.
31
    ///
32
    /// This is convenient when you want to select an explicit set of languages, instead of
33
    /// assuming that the Unix environment has the language you want.
34
    AcceptLanguage(AcceptLanguage),
35
}
36

            
37
/// `Language` but with the environment's locale converted to something we can use.
38
#[derive(Clone)]
39
pub enum UserLanguage {
40
    LanguageTags(LanguageTags),
41
    AcceptLanguage(AcceptLanguage),
42
}
43

            
44
#[derive(Clone, Debug, PartialEq)]
45
struct Weight(Option<f32>);
46

            
47
impl Weight {
48
104
    fn numeric(&self) -> f32 {
49
104
        self.0.unwrap_or(1.0)
50
104
    }
51
}
52

            
53
#[derive(Clone, Debug, PartialEq)]
54
struct Item {
55
    tag: LanguageTag,
56
    weight: Weight,
57
}
58

            
59
/// Stores a parsed version of an HTTP Accept-Language header.
60
///
61
/// RFC 7231: <https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.5>
62
#[derive(Clone, Debug, PartialEq)]
63
pub struct AcceptLanguage(Box<[Item]>);
64

            
65
/// Errors when parsing an `AcceptLanguage`.
66
#[derive(Debug, PartialEq)]
67
enum AcceptLanguageError {
68
    NoElements,
69
    InvalidCharacters,
70
    InvalidLanguageTag(ParseError),
71
    InvalidWeight,
72
}
73

            
74
impl error::Error for AcceptLanguageError {}
75

            
76
impl fmt::Display for AcceptLanguageError {
77
20
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78
20
        match self {
79
            Self::NoElements => write!(f, "no language tags in list"),
80
            Self::InvalidCharacters => write!(f, "invalid characters in language list"),
81
20
            Self::InvalidLanguageTag(e) => write!(f, "invalid language tag: {e}"),
82
            Self::InvalidWeight => write!(f, "invalid q= weight"),
83
        }
84
20
    }
85
}
86

            
87
/// Optional whitespace, Space or Tab, per RFC 7230.
88
///
89
/// RFC 7230: <https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.3>
90
const OWS: [char; 2] = ['\x20', '\x09'];
91

            
92
impl AcceptLanguage {
93
    /// Parses the payload of an HTTP Accept-Language header.
94
    ///
95
    /// For example, a valid header looks like `es, en;q=0.8`, and means, "I prefer Spanish,
96
    /// but will also accept English".
97
    ///
98
    /// Use this function to construct a [`Language::AcceptLanguage`]
99
    /// variant to pass to the [`CairoRenderer::with_language`] function.
100
    ///
101
    /// See RFC 7231 for details: <https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.5>
102
80
    pub fn parse(s: &str) -> Result<AcceptLanguage, String> {
103
80
        AcceptLanguage::parse_internal(s).map_err(|e| format!("{}", e))
104
80
    }
105

            
106
    /// Internal constructor.  We don't expose [`AcceptLanguageError`] in the public API;
107
    /// there we just use a [`String`].
108
128
    fn parse_internal(s: &str) -> Result<AcceptLanguage, AcceptLanguageError> {
109
128
        if !s.is_ascii() {
110
2
            return Err(AcceptLanguageError::InvalidCharacters);
111
126
        }
112

            
113
126
        let mut items = Vec::new();
114

            
115
150
        for val in s.split(',') {
116
150
            let trimmed = val.trim_matches(&OWS[..]);
117
150
            if trimmed.is_empty() {
118
24
                continue;
119
126
            }
120

            
121
126
            items.push(Item::parse(trimmed)?);
122
        }
123

            
124
88
        if items.is_empty() {
125
6
            Err(AcceptLanguageError::NoElements)
126
        } else {
127
82
            Ok(AcceptLanguage(items.into_boxed_slice()))
128
        }
129
128
    }
130

            
131
102
    fn iter(&self) -> impl Iterator<Item = (&LanguageTag, f32)> {
132
104
        self.0.iter().map(|item| (&item.tag, item.weight.numeric()))
133
102
    }
134

            
135
100
    fn any_matches(&self, tag: &LanguageTag) -> bool {
136
100
        self.iter().any(|(self_tag, _weight)| tag.matches(self_tag))
137
100
    }
138
}
139

            
140
impl Item {
141
126
    fn parse(s: &str) -> Result<Item, AcceptLanguageError> {
142
126
        let semicolon_pos = s.find(';');
143

            
144
126
        let (before_semicolon, after_semicolon) = if let Some(semi) = semicolon_pos {
145
38
            (&s[..semi], Some(&s[semi + 1..]))
146
        } else {
147
88
            (s, None)
148
        };
149

            
150
126
        let tag = LanguageTag::parse(before_semicolon)
151
126
            .map_err(AcceptLanguageError::InvalidLanguageTag)?;
152

            
153
104
        let weight = if let Some(quality) = after_semicolon {
154
38
            let quality = quality.trim_start_matches(&OWS[..]);
155

            
156
38
            let number = if let Some(qvalue) = quality.strip_prefix("q=") {
157
34
                if qvalue.starts_with(&['0', '1'][..]) {
158
30
                    let first_digit = qvalue.chars().next().unwrap();
159

            
160
30
                    if let Some(decimals) = qvalue[1..].strip_prefix('.') {
161
26
                        if (first_digit == '0'
162
10
                            && decimals.len() <= 3
163
10
                            && decimals.chars().all(|c| c.is_ascii_digit()))
164
18
                            || (first_digit == '1'
165
16
                                && decimals.len() <= 3
166
24
                                && decimals.chars().all(|c| c == '0'))
167
                        {
168
18
                            qvalue
169
                        } else {
170
8
                            return Err(AcceptLanguageError::InvalidWeight);
171
                        }
172
                    } else {
173
4
                        qvalue
174
                    }
175
                } else {
176
4
                    return Err(AcceptLanguageError::InvalidWeight);
177
                }
178
            } else {
179
4
                return Err(AcceptLanguageError::InvalidWeight);
180
            };
181

            
182
            Weight(Some(
183
22
                f32::from_str(number).map_err(|_| AcceptLanguageError::InvalidWeight)?,
184
            ))
185
        } else {
186
66
            Weight(None)
187
        };
188

            
189
88
        Ok(Item { tag, weight })
190
126
    }
191
}
192

            
193
/// A list of BCP47 language tags.
194
///
195
/// RFC 5664: <https://www.rfc-editor.org/info/rfc5664>
196
#[derive(Debug, Clone, PartialEq)]
197
pub struct LanguageTags(Box<[LanguageTag]>);
198

            
199
impl LanguageTags {
200
2
    pub fn empty() -> Self {
201
2
        LanguageTags(Box::new([]))
202
2
    }
203

            
204
    /// Converts a `Locale` to a set of language tags.
205
23742
    pub fn from_locale(locale: &Locale) -> Result<LanguageTags, String> {
206
23742
        let mut tags = Vec::new();
207

            
208
68744
        for locale_range in locale.tags_for("messages") {
209
68744
            if locale_range == LanguageRange::invariant() {
210
23740
                continue;
211
45004
            }
212

            
213
45004
            let str_locale_range = locale_range.as_ref();
214

            
215
45004
            let locale_tag = LanguageTag::from_str(str_locale_range).map_err(|e| {
216
                format!("invalid language tag \"{str_locale_range}\" in locale: {e}")
217
            })?;
218

            
219
45004
            if !locale_tag.is_language_range() {
220
                return Err(format!(
221
                    "language tag \"{locale_tag}\" is not a language range"
222
                ));
223
45004
            }
224

            
225
45004
            tags.push(locale_tag);
226
        }
227

            
228
23742
        Ok(LanguageTags(Box::from(tags)))
229
23742
    }
230

            
231
256
    pub fn from(tags: Vec<LanguageTag>) -> LanguageTags {
232
256
        LanguageTags(Box::from(tags))
233
256
    }
234

            
235
216
    pub fn iter(&self) -> impl Iterator<Item = &LanguageTag> {
236
216
        self.0.iter()
237
216
    }
238

            
239
118
    pub fn any_matches(&self, language_tag: &LanguageTag) -> bool {
240
288
        self.0.iter().any(|tag| tag.matches(language_tag))
241
118
    }
242
}
243

            
244
impl UserLanguage {
245
216
    pub fn any_matches(&self, tags: &LanguageTags) -> bool {
246
216
        match *self {
247
116
            UserLanguage::LanguageTags(ref language_tags) => {
248
118
                tags.iter().any(|tag| language_tags.any_matches(tag))
249
            }
250
100
            UserLanguage::AcceptLanguage(ref accept_language) => {
251
100
                tags.iter().any(|tag| accept_language.any_matches(tag))
252
            }
253
        }
254
216
    }
255
}
256

            
257
#[cfg(test)]
258
mod tests {
259
    use super::*;
260

            
261
    #[test]
262
2
    fn parses_accept_language() {
263
        // plain tag
264
2
        assert_eq!(
265
2
            AcceptLanguage::parse_internal("es-MX").unwrap(),
266
2
            AcceptLanguage(
267
2
                vec![Item {
268
2
                    tag: LanguageTag::parse("es-MX").unwrap(),
269
2
                    weight: Weight(None)
270
2
                }]
271
2
                .into_boxed_slice()
272
2
            )
273
        );
274

            
275
        // with quality
276
2
        assert_eq!(
277
2
            AcceptLanguage::parse_internal("es-MX;q=1").unwrap(),
278
2
            AcceptLanguage(
279
2
                vec![Item {
280
2
                    tag: LanguageTag::parse("es-MX").unwrap(),
281
2
                    weight: Weight(Some(1.0))
282
2
                }]
283
2
                .into_boxed_slice()
284
2
            )
285
        );
286

            
287
        // with quality
288
2
        assert_eq!(
289
2
            AcceptLanguage::parse_internal("es-MX;q=0").unwrap(),
290
2
            AcceptLanguage(
291
2
                vec![Item {
292
2
                    tag: LanguageTag::parse("es-MX").unwrap(),
293
2
                    weight: Weight(Some(0.0))
294
2
                }]
295
2
                .into_boxed_slice()
296
2
            )
297
        );
298

            
299
        // zero decimals are allowed
300
2
        assert_eq!(
301
2
            AcceptLanguage::parse_internal("es-MX;q=0.").unwrap(),
302
2
            AcceptLanguage(
303
2
                vec![Item {
304
2
                    tag: LanguageTag::parse("es-MX").unwrap(),
305
2
                    weight: Weight(Some(0.0))
306
2
                }]
307
2
                .into_boxed_slice()
308
2
            )
309
        );
310

            
311
        // zero decimals are allowed
312
2
        assert_eq!(
313
2
            AcceptLanguage::parse_internal("es-MX;q=1.").unwrap(),
314
2
            AcceptLanguage(
315
2
                vec![Item {
316
2
                    tag: LanguageTag::parse("es-MX").unwrap(),
317
2
                    weight: Weight(Some(1.0))
318
2
                }]
319
2
                .into_boxed_slice()
320
2
            )
321
        );
322

            
323
        // one decimal
324
2
        assert_eq!(
325
2
            AcceptLanguage::parse_internal("es-MX;q=1.0").unwrap(),
326
2
            AcceptLanguage(
327
2
                vec![Item {
328
2
                    tag: LanguageTag::parse("es-MX").unwrap(),
329
2
                    weight: Weight(Some(1.0))
330
2
                }]
331
2
                .into_boxed_slice()
332
2
            )
333
        );
334

            
335
        // two decimals
336
2
        assert_eq!(
337
2
            AcceptLanguage::parse_internal("es-MX;q=1.00").unwrap(),
338
2
            AcceptLanguage(
339
2
                vec![Item {
340
2
                    tag: LanguageTag::parse("es-MX").unwrap(),
341
2
                    weight: Weight(Some(1.0))
342
2
                }]
343
2
                .into_boxed_slice()
344
2
            )
345
        );
346

            
347
        // three decimals
348
2
        assert_eq!(
349
2
            AcceptLanguage::parse_internal("es-MX;q=1.000").unwrap(),
350
2
            AcceptLanguage(
351
2
                vec![Item {
352
2
                    tag: LanguageTag::parse("es-MX").unwrap(),
353
2
                    weight: Weight(Some(1.0))
354
2
                }]
355
2
                .into_boxed_slice()
356
2
            )
357
        );
358

            
359
        // multiple elements
360
2
        assert_eq!(
361
2
            AcceptLanguage::parse_internal("es-MX, en; q=0.5").unwrap(),
362
2
            AcceptLanguage(
363
2
                vec![
364
2
                    Item {
365
2
                        tag: LanguageTag::parse("es-MX").unwrap(),
366
2
                        weight: Weight(None)
367
2
                    },
368
2
                    Item {
369
2
                        tag: LanguageTag::parse("en").unwrap(),
370
2
                        weight: Weight(Some(0.5))
371
2
                    },
372
2
                ]
373
2
                .into_boxed_slice()
374
2
            )
375
        );
376

            
377
        // superfluous whitespace
378
2
        assert_eq!(
379
2
            AcceptLanguage::parse_internal(",es-MX;q=1.000  , en; q=0.125  ,  ,").unwrap(),
380
2
            AcceptLanguage(
381
2
                vec![
382
2
                    Item {
383
2
                        tag: LanguageTag::parse("es-MX").unwrap(),
384
2
                        weight: Weight(Some(1.0))
385
2
                    },
386
2
                    Item {
387
2
                        tag: LanguageTag::parse("en").unwrap(),
388
2
                        weight: Weight(Some(0.125))
389
2
                    },
390
2
                ]
391
2
                .into_boxed_slice()
392
2
            )
393
        );
394
2
    }
395

            
396
    #[test]
397
2
    fn empty_lists() {
398
2
        assert!(matches!(
399
2
            AcceptLanguage::parse_internal(""),
400
            Err(AcceptLanguageError::NoElements)
401
        ));
402

            
403
2
        assert!(matches!(
404
2
            AcceptLanguage::parse_internal(","),
405
            Err(AcceptLanguageError::NoElements)
406
        ));
407

            
408
2
        assert!(matches!(
409
2
            AcceptLanguage::parse_internal(", , ,,,"),
410
            Err(AcceptLanguageError::NoElements)
411
        ));
412
2
    }
413

            
414
    #[test]
415
2
    fn ascii_only() {
416
2
        assert!(matches!(
417
2
            AcceptLanguage::parse_internal("ës"),
418
            Err(AcceptLanguageError::InvalidCharacters)
419
        ));
420
2
    }
421

            
422
    #[test]
423
2
    fn invalid_tag() {
424
2
        assert!(matches!(
425
2
            AcceptLanguage::parse_internal("no_underscores"),
426
            Err(AcceptLanguageError::InvalidLanguageTag(_))
427
        ));
428
2
    }
429

            
430
    #[test]
431
2
    fn invalid_weight() {
432
2
        assert!(matches!(
433
2
            AcceptLanguage::parse_internal("es;"),
434
            Err(AcceptLanguageError::InvalidWeight)
435
        ));
436
2
        assert!(matches!(
437
2
            AcceptLanguage::parse_internal("es;q"),
438
            Err(AcceptLanguageError::InvalidWeight)
439
        ));
440
2
        assert!(matches!(
441
2
            AcceptLanguage::parse_internal("es;q="),
442
            Err(AcceptLanguageError::InvalidWeight)
443
        ));
444
2
        assert!(matches!(
445
2
            AcceptLanguage::parse_internal("es;q=2"),
446
            Err(AcceptLanguageError::InvalidWeight)
447
        ));
448
2
        assert!(matches!(
449
2
            AcceptLanguage::parse_internal("es;q=1.1"),
450
            Err(AcceptLanguageError::InvalidWeight)
451
        ));
452
2
        assert!(matches!(
453
2
            AcceptLanguage::parse_internal("es;q=1.12"),
454
            Err(AcceptLanguageError::InvalidWeight)
455
        ));
456
2
        assert!(matches!(
457
2
            AcceptLanguage::parse_internal("es;q=1.123"),
458
            Err(AcceptLanguageError::InvalidWeight)
459
        ));
460

            
461
        // Up to three decimals allowed per RFC 7231
462
2
        assert!(matches!(
463
2
            AcceptLanguage::parse_internal("es;q=0.1234"),
464
            Err(AcceptLanguageError::InvalidWeight)
465
        ));
466
2
    }
467

            
468
    #[test]
469
2
    fn iter() {
470
2
        let accept_language = AcceptLanguage::parse_internal("es-MX, en; q=0.5").unwrap();
471
2
        let mut iter = accept_language.iter();
472

            
473
2
        let (tag, weight) = iter.next().unwrap();
474
2
        assert_eq!(*tag, LanguageTag::parse("es-MX").unwrap());
475
2
        assert_eq!(weight, 1.0);
476

            
477
2
        let (tag, weight) = iter.next().unwrap();
478
2
        assert_eq!(*tag, LanguageTag::parse("en").unwrap());
479
2
        assert_eq!(weight, 0.5);
480

            
481
2
        assert!(iter.next().is_none());
482
2
    }
483
}