1  
//
1  
//
2  
// Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
2  
// Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3  
//
3  
//
4  
// Distributed under the Boost Software License, Version 1.0. (See accompanying
4  
// Distributed under the Boost Software License, Version 1.0. (See accompanying
5  
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5  
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6  
//
6  
//
7  
// Official repository: https://github.com/boostorg/url
7  
// Official repository: https://github.com/boostorg/url
8  
//
8  
//
9  

9  

10  
#ifndef BOOST_URL_IMPL_ENCODE_HPP
10  
#ifndef BOOST_URL_IMPL_ENCODE_HPP
11  
#define BOOST_URL_IMPL_ENCODE_HPP
11  
#define BOOST_URL_IMPL_ENCODE_HPP
12  

12  

13  
#include <boost/url/grammar/token_rule.hpp>
13  
#include <boost/url/grammar/token_rule.hpp>
14  
#include <boost/assert.hpp>
14  
#include <boost/assert.hpp>
15  
#include <boost/core/detail/static_assert.hpp>
15  
#include <boost/core/detail/static_assert.hpp>
16  
#include <boost/url/detail/encode.hpp>
16  
#include <boost/url/detail/encode.hpp>
17  
#include <boost/url/detail/except.hpp>
17  
#include <boost/url/detail/except.hpp>
18  
#include <boost/url/encoding_opts.hpp>
18  
#include <boost/url/encoding_opts.hpp>
19  
#include <boost/url/grammar/charset.hpp>
19  
#include <boost/url/grammar/charset.hpp>
20  
#include <boost/url/grammar/hexdig_chars.hpp>
20  
#include <boost/url/grammar/hexdig_chars.hpp>
21  
#include <boost/url/grammar/string_token.hpp>
21  
#include <boost/url/grammar/string_token.hpp>
22  
#include <boost/url/grammar/type_traits.hpp>
22  
#include <boost/url/grammar/type_traits.hpp>
23  

23  

24  
namespace boost {
24  
namespace boost {
25  
namespace urls {
25  
namespace urls {
26  

26  

27  
//------------------------------------------------
27  
//------------------------------------------------
28  

28  

29  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
29  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
30  
std::size_t
30  
std::size_t
31  
encoded_size(
31  
encoded_size(
32  
    core::string_view s,
32  
    core::string_view s,
33  
    CS const& allowed,
33  
    CS const& allowed,
34  
    encoding_opts opt) noexcept
34  
    encoding_opts opt) noexcept
35  
{
35  
{
36  
    /*
36  
    /*
37  
        If you get a compilation error here, it
37  
        If you get a compilation error here, it
38  
        means that the value you passed does
38  
        means that the value you passed does
39  
        not meet the requirements stated in
39  
        not meet the requirements stated in
40  
        the documentation.
40  
        the documentation.
41  
    */
41  
    */
42  
    BOOST_CORE_STATIC_ASSERT(
42  
    BOOST_CORE_STATIC_ASSERT(
43  
        grammar::is_charset<CS>::value);
43  
        grammar::is_charset<CS>::value);
44  

44  

45  
    std::size_t n = 0;
45  
    std::size_t n = 0;
46  
    auto it = s.data();
46  
    auto it = s.data();
47  
    auto const last = it + s.size();
47  
    auto const last = it + s.size();
48  

48  

49  
    if (!opt.space_as_plus)
49  
    if (!opt.space_as_plus)
50  
    {
50  
    {
51  
        while (it != last)
51  
        while (it != last)
52  
        {
52  
        {
53  
            char const c = *it;
53  
            char const c = *it;
54  
            if (allowed(c))
54  
            if (allowed(c))
55  
            {
55  
            {
56  
                ++n;
56  
                ++n;
57  
            }
57  
            }
58  
            else
58  
            else
59  
            {
59  
            {
60  
                n += 3;
60  
                n += 3;
61  
            }
61  
            }
62  
            ++it;
62  
            ++it;
63  
        }
63  
        }
64  
    }
64  
    }
65  
    else
65  
    else
66  
    {
66  
    {
67  
        // '+' is always encoded (thus
67  
        // '+' is always encoded (thus
68  
        // spending 3 chars) even if
68  
        // spending 3 chars) even if
69  
        // allowed because "%2B" and
69  
        // allowed because "%2B" and
70  
        // "+" have different meanings
70  
        // "+" have different meanings
71  
        // when space as plus is enabled
71  
        // when space as plus is enabled
72  
        using FNT = bool (*)(CS const& allowed, char);
72  
        using FNT = bool (*)(CS const& allowed, char);
73  
        FNT takes_one_char =
73  
        FNT takes_one_char =
74  
            allowed('+') ?
74  
            allowed('+') ?
75  
                (allowed(' ') ?
75  
                (allowed(' ') ?
76  
                     FNT([](CS const& allowed, char c){ return allowed(c) && c != '+'; }) :
76  
                     FNT([](CS const& allowed, char c){ return allowed(c) && c != '+'; }) :
77  
                     FNT([](CS const& allowed, char c){ return (allowed(c) || c == ' ') && c != '+'; })) :
77  
                     FNT([](CS const& allowed, char c){ return (allowed(c) || c == ' ') && c != '+'; })) :
78  
                (allowed(' ') ?
78  
                (allowed(' ') ?
79  
                     FNT([](CS const& allowed, char c){ return allowed(c); }) :
79  
                     FNT([](CS const& allowed, char c){ return allowed(c); }) :
80  
                     FNT([](CS const& allowed, char c){ return allowed(c) || c == ' '; }));
80  
                     FNT([](CS const& allowed, char c){ return allowed(c) || c == ' '; }));
81  
        while (it != last)
81  
        while (it != last)
82  
        {
82  
        {
83  
            char const c = *it;
83  
            char const c = *it;
84  
            if (takes_one_char(allowed, c))
84  
            if (takes_one_char(allowed, c))
85  
            {
85  
            {
86  
                ++n;
86  
                ++n;
87  
            }
87  
            }
88  
            else
88  
            else
89  
            {
89  
            {
90  
                n += 3;
90  
                n += 3;
91  
            }
91  
            }
92  
            ++it;
92  
            ++it;
93  
        }
93  
        }
94  
    }
94  
    }
95  
    return n;
95  
    return n;
96  
}
96  
}
97  

97  

98  
//------------------------------------------------
98  
//------------------------------------------------
99  

99  

100  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
100  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
101  
std::size_t
101  
std::size_t
102  
encode(
102  
encode(
103  
    char* dest,
103  
    char* dest,
104  
    std::size_t size,
104  
    std::size_t size,
105  
    core::string_view s,
105  
    core::string_view s,
106  
    CS const& allowed,
106  
    CS const& allowed,
107  
    encoding_opts opt)
107  
    encoding_opts opt)
108  
{
108  
{
109  
/*  If you get a compilation error here, it
109  
/*  If you get a compilation error here, it
110  
    means that the value you passed does
110  
    means that the value you passed does
111  
    not meet the requirements stated in
111  
    not meet the requirements stated in
112  
    the documentation.
112  
    the documentation.
113  
*/
113  
*/
114  
    BOOST_CORE_STATIC_ASSERT(
114  
    BOOST_CORE_STATIC_ASSERT(
115  
        grammar::is_charset<CS>::value);
115  
        grammar::is_charset<CS>::value);
116  

116  

117  
    // '%' must be reserved
117  
    // '%' must be reserved
118  
    BOOST_ASSERT(!allowed('%'));
118  
    BOOST_ASSERT(!allowed('%'));
119  

119  

120  
    char const* const hex =
120  
    char const* const hex =
121  
        detail::hexdigs[opt.lower_case];
121  
        detail::hexdigs[opt.lower_case];
122  
    auto const encode = [hex](
122  
    auto const encode = [hex](
123  
        char*& dest,
123  
        char*& dest,
124  
        unsigned char c) noexcept
124  
        unsigned char c) noexcept
125  
    {
125  
    {
126  
        *dest++ = '%';
126  
        *dest++ = '%';
127  
        *dest++ = hex[c>>4];
127  
        *dest++ = hex[c>>4];
128  
        *dest++ = hex[c&0xf];
128  
        *dest++ = hex[c&0xf];
129  
    };
129  
    };
130  

130  

131  
    auto it = s.data();
131  
    auto it = s.data();
132  
    auto const end = dest + size;
132  
    auto const end = dest + size;
133  
    auto const last = it + s.size();
133  
    auto const last = it + s.size();
134 -
    auto const end3 = end - 3;
 
135  
    auto const dest0 = dest;
134  
    auto const dest0 = dest;
136  

135  

137  
    if (!opt.space_as_plus)
136  
    if (!opt.space_as_plus)
138  
    {
137  
    {
139  
        while(it != last)
138  
        while(it != last)
140  
        {
139  
        {
141  
            char const c = *it;
140  
            char const c = *it;
142  
            if (allowed(c))
141  
            if (allowed(c))
143  
            {
142  
            {
144  
                if(dest == end)
143  
                if(dest == end)
145  
                    return dest - dest0;
144  
                    return dest - dest0;
146  
                *dest++ = c;
145  
                *dest++ = c;
147  
                ++it;
146  
                ++it;
148  
                continue;
147  
                continue;
149  
            }
148  
            }
150 -
            if (dest > end3)
149 +
            if (end - dest < 3)
151  
                return dest - dest0;
150  
                return dest - dest0;
152  
            encode(dest, c);
151  
            encode(dest, c);
153  
            ++it;
152  
            ++it;
154  
        }
153  
        }
155  
        return dest - dest0;
154  
        return dest - dest0;
156  
    }
155  
    }
157  
    else
156  
    else
158  
    {
157  
    {
159  
        while (it != last)
158  
        while (it != last)
160  
        {
159  
        {
161  
            char const c = *it;
160  
            char const c = *it;
162  
            if (c == ' ')
161  
            if (c == ' ')
163  
            {
162  
            {
164  
                if(dest == end)
163  
                if(dest == end)
165  
                    return dest - dest0;
164  
                    return dest - dest0;
166  
                *dest++ = '+';
165  
                *dest++ = '+';
167  
                ++it;
166  
                ++it;
168  
                continue;
167  
                continue;
169  
            }
168  
            }
170  
            else if (
169  
            else if (
171  
                allowed(c) &&
170  
                allowed(c) &&
172  
                c != '+')
171  
                c != '+')
173  
            {
172  
            {
174  
                if(dest == end)
173  
                if(dest == end)
175  
                    return dest - dest0;
174  
                    return dest - dest0;
176  
                *dest++ = c;
175  
                *dest++ = c;
177  
                ++it;
176  
                ++it;
178  
                continue;
177  
                continue;
179  
            }
178  
            }
180 -
            if(dest > end3)
179 +
            if(end - dest < 3)
181  
                return dest - dest0;
180  
                return dest - dest0;
182  
            encode(dest, c);
181  
            encode(dest, c);
183  
            ++it;
182  
            ++it;
184  
        }
183  
        }
185  
    }
184  
    }
186  
    return dest - dest0;
185  
    return dest - dest0;
187  
}
186  
}
188  

187  

189  
//------------------------------------------------
188  
//------------------------------------------------
190  

189  

191  
// unsafe encode just
190  
// unsafe encode just
192  
// asserts on the output buffer
191  
// asserts on the output buffer
193  
//
192  
//
194  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
193  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
195  
std::size_t
194  
std::size_t
196  
encode_unsafe(
195  
encode_unsafe(
197  
    char* dest,
196  
    char* dest,
198  
    std::size_t size,
197  
    std::size_t size,
199  
    core::string_view s,
198  
    core::string_view s,
200  
    CS const& allowed,
199  
    CS const& allowed,
201  
    encoding_opts opt)
200  
    encoding_opts opt)
202  
{
201  
{
203  
    BOOST_CORE_STATIC_ASSERT(
202  
    BOOST_CORE_STATIC_ASSERT(
204  
        grammar::is_charset<CS>::value);
203  
        grammar::is_charset<CS>::value);
205  

204  

206  
    // '%' must be reserved
205  
    // '%' must be reserved
207  
    BOOST_ASSERT(!allowed('%'));
206  
    BOOST_ASSERT(!allowed('%'));
208  

207  

209  
    auto it = s.data();
208  
    auto it = s.data();
210  
    auto const last = it + s.size();
209  
    auto const last = it + s.size();
211  
    auto const end = dest + size;
210  
    auto const end = dest + size;
212  
    ignore_unused(end);
211  
    ignore_unused(end);
213  

212  

214  
    char const* const hex =
213  
    char const* const hex =
215  
        detail::hexdigs[opt.lower_case];
214  
        detail::hexdigs[opt.lower_case];
216  
    auto const encode = [end, hex](
215  
    auto const encode = [end, hex](
217  
        char*& dest,
216  
        char*& dest,
218  
        unsigned char c) noexcept
217  
        unsigned char c) noexcept
219  
    {
218  
    {
220  
        ignore_unused(end);
219  
        ignore_unused(end);
221  
        *dest++ = '%';
220  
        *dest++ = '%';
222  
        BOOST_ASSERT(dest != end);
221  
        BOOST_ASSERT(dest != end);
223  
        *dest++ = hex[c>>4];
222  
        *dest++ = hex[c>>4];
224  
        BOOST_ASSERT(dest != end);
223  
        BOOST_ASSERT(dest != end);
225  
        *dest++ = hex[c&0xf];
224  
        *dest++ = hex[c&0xf];
226  
    };
225  
    };
227  

226  

228  
    auto const dest0 = dest;
227  
    auto const dest0 = dest;
229  
    if (!opt.space_as_plus)
228  
    if (!opt.space_as_plus)
230  
    {
229  
    {
231  
        while(it != last)
230  
        while(it != last)
232  
        {
231  
        {
233  
            BOOST_ASSERT(dest != end);
232  
            BOOST_ASSERT(dest != end);
234  
            char const c = *it;
233  
            char const c = *it;
235  
            if(allowed(c))
234  
            if(allowed(c))
236  
            {
235  
            {
237  
                *dest++ = c;
236  
                *dest++ = c;
238  
            }
237  
            }
239  
            else
238  
            else
240  
            {
239  
            {
241  
                encode(dest, c);
240  
                encode(dest, c);
242  
            }
241  
            }
243  
            ++it;
242  
            ++it;
244  
        }
243  
        }
245  
    }
244  
    }
246  
    else
245  
    else
247  
    {
246  
    {
248  
        while(it != last)
247  
        while(it != last)
249  
        {
248  
        {
250  
            BOOST_ASSERT(dest != end);
249  
            BOOST_ASSERT(dest != end);
251  
            char const c = *it;
250  
            char const c = *it;
252  
            if (c == ' ')
251  
            if (c == ' ')
253  
            {
252  
            {
254  
                *dest++ = '+';
253  
                *dest++ = '+';
255  
            }
254  
            }
256  
            else if (
255  
            else if (
257  
                allowed(c) &&
256  
                allowed(c) &&
258  
                c != '+')
257  
                c != '+')
259  
            {
258  
            {
260  
                *dest++ = c;
259  
                *dest++ = c;
261  
            }
260  
            }
262  
            else
261  
            else
263  
            {
262  
            {
264  
                encode(dest, c);
263  
                encode(dest, c);
265  
            }
264  
            }
266  
            ++it;
265  
            ++it;
267  
        }
266  
        }
268  
    }
267  
    }
269  
    return dest - dest0;
268  
    return dest - dest0;
270  
}
269  
}
271  

270  

272  
//------------------------------------------------
271  
//------------------------------------------------
273  

272  

274  
template<
273  
template<
275  
    BOOST_URL_CONSTRAINT(string_token::StringToken) StringToken,
274  
    BOOST_URL_CONSTRAINT(string_token::StringToken) StringToken,
276  
    BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
275  
    BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
277  
BOOST_URL_STRTOK_RETURN
276  
BOOST_URL_STRTOK_RETURN
278  
encode(
277  
encode(
279  
    core::string_view s,
278  
    core::string_view s,
280  
    CS const& allowed,
279  
    CS const& allowed,
281  
    encoding_opts opt,
280  
    encoding_opts opt,
282 -
    StringToken&& token) noexcept
281 +
    StringToken&& token)
283  
{
282  
{
284  
    BOOST_CORE_STATIC_ASSERT(
283  
    BOOST_CORE_STATIC_ASSERT(
285  
        grammar::is_charset<CS>::value);
284  
        grammar::is_charset<CS>::value);
286  

285  

287  
    auto const n = encoded_size(
286  
    auto const n = encoded_size(
288  
        s, allowed, opt);
287  
        s, allowed, opt);
289  
    auto p = token.prepare(n);
288  
    auto p = token.prepare(n);
290  
    if(n > 0)
289  
    if(n > 0)
291  
        encode_unsafe(
290  
        encode_unsafe(
292  
            p, n, s, allowed, opt);
291  
            p, n, s, allowed, opt);
293  
    return token.result();
292  
    return token.result();
294  
}
293  
}
295  

294  

296  
} // urls
295  
} // urls
297  
} // boost
296  
} // boost
298  

297  

299  
#endif
298  
#endif