8 #ifndef BOOST_NOWIDE_UTF8_CODECVT_HPP_INCLUDED 9 #define BOOST_NOWIDE_UTF8_CODECVT_HPP_INCLUDED 11 #include <boost/nowide/detail/utf.hpp> 13 #include <boost/cstdint.hpp> 14 #include <boost/static_assert.hpp> 23 BOOST_STATIC_ASSERT(
sizeof(std::mbstate_t) >= 2);
26 inline void copy_uint16_t(
void* dst,
const void* src)
28 unsigned char* cdst = static_cast<unsigned char*>(dst);
29 const unsigned char* csrc = static_cast<const unsigned char*>(src);
33 inline boost::uint16_t read_state(
const std::mbstate_t& src)
36 copy_uint16_t(&dst, &src);
39 inline void write_state(std::mbstate_t& dst,
const boost::uint16_t src)
41 copy_uint16_t(&dst, &src);
45 #if defined _MSC_VER && _MSC_VER < 1700 47 #define BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 56 template<
typename CharType,
int CharSize = sizeof(CharType)>
59 template<
typename CharType>
60 class BOOST_SYMBOL_VISIBLE
utf8_codecvt<CharType, 2> :
public std::codecvt<CharType, char, std::mbstate_t>
63 BOOST_STATIC_ASSERT_MSG(
sizeof(CharType) >= 2,
"CharType must be able to store UTF16 code point");
65 utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs)
69 typedef CharType uchar;
71 virtual std::codecvt_base::result do_unshift(std::mbstate_t& s,
char* from,
char* ,
char*& next)
const 73 if(detail::read_state(s) != 0)
74 return std::codecvt_base::error;
76 return std::codecvt_base::ok;
78 virtual int do_encoding()
const throw()
82 virtual int do_max_length()
const throw()
86 virtual bool do_always_noconv()
const throw()
91 virtual int do_length(std::mbstate_t
92 #ifdef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST
100 boost::uint16_t state = detail::read_state(std_state);
101 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 102 const char* save_from = from;
104 size_t save_max = max;
106 while(max > 0 && from < from_end)
108 const char* prev_from = from;
109 boost::uint32_t ch = detail::utf::utf_traits<char>::decode(from, from_end);
110 if(ch == detail::utf::illegal)
113 }
else if(ch == detail::utf::incomplete)
131 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 132 detail::write_state(std_state, state);
133 return static_cast<int>(from - save_from);
135 return static_cast<int>(save_max - max);
139 virtual std::codecvt_base::result do_in(std::mbstate_t& std_state,
141 const char* from_end,
142 const char*& from_next,
145 uchar*& to_next)
const 147 std::codecvt_base::result r = std::codecvt_base::ok;
154 boost::uint16_t state = detail::read_state(std_state);
155 while(to < to_end && from < from_end)
157 const char* from_saved = from;
159 uint32_t ch = detail::utf::utf_traits<char>::decode(from, from_end);
161 if(ch == detail::utf::illegal)
164 }
else if(ch == detail::utf::incomplete)
167 r = std::codecvt_base::partial;
173 *to++ = static_cast<CharType>(ch);
186 boost::uint16_t vh = static_cast<boost::uint16_t>(ch >> 10);
187 boost::uint16_t vl = ch & 0x3FF;
188 boost::uint16_t w1 = vh + 0xD800;
189 boost::uint16_t w2 = vl + 0xDC00;
193 *to++ = static_cast<CharType>(w1);
197 *to++ = static_cast<CharType>(w2);
204 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
205 r = std::codecvt_base::partial;
206 detail::write_state(std_state, state);
210 virtual std::codecvt_base::result do_out(std::mbstate_t& std_state,
212 const uchar* from_end,
213 const uchar*& from_next,
216 char*& to_next)
const 218 std::codecvt_base::result r = std::codecvt_base::ok;
226 boost::uint16_t state = detail::read_state(std_state);
227 while(to < to_end && from < from_end)
229 boost::uint32_t ch = 0;
235 boost::uint16_t w1 = state;
236 boost::uint16_t w2 = *from;
239 if(0xDC00 <= w2 && w2 <= 0xDFFF)
241 boost::uint16_t vh = w1 - 0xD800;
242 boost::uint16_t vl = w2 - 0xDC00;
243 ch = ((uint32_t(vh) << 10) | vl) + 0x10000;
251 if(0xD800 <= ch && ch <= 0xDBFF)
257 state = static_cast<boost::uint16_t>(ch);
260 }
else if(0xDC00 <= ch && ch <= 0xDFFF)
268 if(!detail::utf::is_valid_codepoint(ch))
270 r = std::codecvt_base::error;
273 int len = detail::utf::utf_traits<char>::width(ch);
274 if(to_end - to < len)
276 r = std::codecvt_base::partial;
279 to = detail::utf::utf_traits<char>::encode(ch, to);
285 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
286 r = std::codecvt_base::partial;
287 detail::write_state(std_state, state);
292 template<
typename CharType>
293 class BOOST_SYMBOL_VISIBLE
utf8_codecvt<CharType, 4> :
public std::codecvt<CharType, char, std::mbstate_t>
296 utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs)
300 typedef CharType uchar;
302 virtual std::codecvt_base::result do_unshift(std::mbstate_t& ,
char* from,
char* ,
char*& next)
const 305 return std::codecvt_base::ok;
307 virtual int do_encoding()
const throw()
311 virtual int do_max_length()
const throw()
315 virtual bool do_always_noconv()
const throw()
320 virtual int do_length(std::mbstate_t
321 #ifdef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST
326 const char* from_end,
329 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 330 const char* start_from = from;
332 size_t save_max = max;
335 while(max > 0 && from < from_end)
337 const char* save_from = from;
338 boost::uint32_t ch = detail::utf::utf_traits<char>::decode(from, from_end);
339 if(ch == detail::utf::incomplete)
343 }
else if(ch == detail::utf::illegal)
349 #ifndef BOOST_NOWIDE_DO_LENGTH_MBSTATE_CONST 350 return from - start_from;
352 return save_max - max;
356 virtual std::codecvt_base::result do_in(std::mbstate_t& ,
358 const char* from_end,
359 const char*& from_next,
362 uchar*& to_next)
const 364 std::codecvt_base::result r = std::codecvt_base::ok;
366 while(to < to_end && from < from_end)
368 const char* from_saved = from;
370 uint32_t ch = detail::utf::utf_traits<char>::decode(from, from_end);
372 if(ch == detail::utf::illegal)
375 }
else if(ch == detail::utf::incomplete)
377 r = std::codecvt_base::partial;
385 if(r == std::codecvt_base::ok && from != from_end)
386 r = std::codecvt_base::partial;
390 virtual std::codecvt_base::result do_out(std::mbstate_t& ,
392 const uchar* from_end,
393 const uchar*& from_next,
396 char*& to_next)
const 398 std::codecvt_base::result r = std::codecvt_base::ok;
399 while(to < to_end && from < from_end)
401 boost::uint32_t ch = 0;
403 if(!detail::utf::is_valid_codepoint(ch))
407 int len = detail::utf::utf_traits<char>::width(ch);
408 if(to_end - to < len)
410 r = std::codecvt_base::partial;
413 to = detail::utf::utf_traits<char>::encode(ch, to);
418 if(r == std::codecvt_base::ok && from != from_end)
419 r = std::codecvt_base::partial;
Definition: utf8_codecvt.hpp:57
#define BOOST_NOWIDE_REPLACEMENT_CHARACTER
Definition: replacement.hpp:16