1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
//! Processing of the `xml:space` attribute.

use itertools::Itertools;

pub struct NormalizeDefault {
    pub has_element_before: bool,
    pub has_element_after: bool,
}

pub enum XmlSpaceNormalize {
    Default(NormalizeDefault),
    Preserve,
}

/// Implements `xml:space` handling per the SVG spec
///
/// Normalizes a string as it comes out of the XML parser's handler
/// for character data according to the SVG rules in
/// <https://www.w3.org/TR/SVG/text.html#WhiteSpace>
pub fn xml_space_normalize(mode: XmlSpaceNormalize, s: &str) -> String {
    match mode {
        XmlSpaceNormalize::Default(d) => normalize_default(d, s),
        XmlSpaceNormalize::Preserve => normalize_preserve(s),
    }
}

// From https://www.w3.org/TR/SVG/text.html#WhiteSpace
//
// When xml:space="default", the SVG user agent will do the following
// using a copy of the original character data content. First, it will
// remove all newline characters. Then it will convert all tab
// characters into space characters. Then, it will strip off all
// leading and trailing space characters. Then, all contiguous space
// characters will be consolidated.
fn normalize_default(elements: NormalizeDefault, mut s: &str) -> String {
    if !elements.has_element_before {
        s = s.trim_start();
    }

    if !elements.has_element_after {
        s = s.trim_end();
    }

    s.chars()
        .filter(|ch| *ch != '\n')
        .map(|ch| match ch {
            '\t' => ' ',
            c => c,
        })
        .coalesce(|current, next| match (current, next) {
            (' ', ' ') => Ok(' '),
            (_, _) => Err((current, next)),
        })
        .collect::<String>()
}

// From https://www.w3.org/TR/SVG/text.html#WhiteSpace
//
// When xml:space="preserve", the SVG user agent will do the following
// using a copy of the original character data content. It will
// convert all newline and tab characters into space characters. Then,
// it will draw all space characters, including leading, trailing and
// multiple contiguous space characters. Thus, when drawn with
// xml:space="preserve", the string "a   b" (three spaces between "a"
// and "b") will produce a larger separation between "a" and "b" than
// "a b" (one space between "a" and "b").
fn normalize_preserve(s: &str) -> String {
    s.chars()
        .map(|ch| match ch {
            '\n' | '\t' => ' ',

            c => c,
        })
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn xml_space_default() {
        assert_eq!(
            xml_space_normalize(
                XmlSpaceNormalize::Default(NormalizeDefault {
                    has_element_before: false,
                    has_element_after: false,
                }),
                "\n    WS example\n    indented lines\n  "
            ),
            "WS example indented lines"
        );
        assert_eq!(
            xml_space_normalize(
                XmlSpaceNormalize::Default(NormalizeDefault {
                    has_element_before: false,
                    has_element_after: false,
                }),
                "\n  \t  \tWS \t\t\texample\n  \t  indented lines\t\t  \n  "
            ),
            "WS example indented lines"
        );
        assert_eq!(
            xml_space_normalize(
                XmlSpaceNormalize::Default(NormalizeDefault {
                    has_element_before: false,
                    has_element_after: false,
                }),
                "\n  \t  \tWS \t\t\texample\n  \t  duplicate letters\t\t  \n  "
            ),
            "WS example duplicate letters"
        );
        assert_eq!(
            xml_space_normalize(
                XmlSpaceNormalize::Default(NormalizeDefault {
                    has_element_before: false,
                    has_element_after: false,
                }),
                "\nWS example\nnon-indented lines\n  "
            ),
            "WS examplenon-indented lines"
        );
        assert_eq!(
            xml_space_normalize(
                XmlSpaceNormalize::Default(NormalizeDefault {
                    has_element_before: false,
                    has_element_after: false,
                }),
                "\nWS example\tnon-indented lines\n  "
            ),
            "WS example non-indented lines"
        );
    }

    #[test]
    fn xml_space_default_with_elements() {
        assert_eq!(
            xml_space_normalize(
                XmlSpaceNormalize::Default(NormalizeDefault {
                    has_element_before: true,
                    has_element_after: false,
                }),
                " foo \n\t  bar "
            ),
            " foo bar"
        );

        assert_eq!(
            xml_space_normalize(
                XmlSpaceNormalize::Default(NormalizeDefault {
                    has_element_before: false,
                    has_element_after: true,
                }),
                " foo   \nbar "
            ),
            "foo bar "
        );
    }

    #[test]
    fn xml_space_preserve() {
        assert_eq!(
            xml_space_normalize(
                XmlSpaceNormalize::Preserve,
                "\n    WS example\n    indented lines\n  "
            ),
            "     WS example     indented lines   "
        );
        assert_eq!(
            xml_space_normalize(
                XmlSpaceNormalize::Preserve,
                "\n  \t  \tWS \t\t\texample\n  \t  indented lines\t\t  \n  "
            ),
            "       WS    example      indented lines       "
        );
        assert_eq!(
            xml_space_normalize(
                XmlSpaceNormalize::Preserve,
                "\n  \t  \tWS \t\t\texample\n  \t  duplicate letters\t\t  \n  "
            ),
            "       WS    example      duplicate letters       "
        );
    }
}