1 #ifndef MISC_UTF8ITER_HPP
2 #define MISC_UTF8ITER_HPP
4 #include <boost/tuple/tuple.hpp>
11 typedef unsigned char const *
Point;
53 if ((*cur & 0x80) == 0)
57 return std::make_pair (chr, cur);
66 return std::make_pair (
sBadChar(), cur);
68 Point eoc = cur + octets;
71 return std::make_pair (
sBadChar(), cur);
75 if ((*cur & 0xC0) != 0x80)
76 return std::make_pair (
sBadChar(), cur);
81 return std::make_pair (chr, cur);
86 static std::pair <int, UnicodeChar>
octet_count (
unsigned char octet)
90 unsigned char mark = 0xC0;
91 unsigned char mask = 0xE0;
93 for (octets = 1; octets <= 5; ++octets)
95 if ((octet & mask) == mark)
98 mark = (mark >> 1) | 0x80;
99 mask = (mask >> 1) | 0x80;
102 return std::make_pair (octets, octet & ~mask);
unsigned char const * Point
Definition: utf8stream.hpp:11
Utf8Stream(Point begin, Point end)
Definition: utf8stream.hpp:16
bool eof() const
Definition: utf8stream.hpp:26
UnicodeChar consume()
Definition: utf8stream.hpp:43
Point end
Definition: utf8stream.hpp:112
static UnicodeChar sBadChar()
Definition: utf8stream.hpp:14
Definition: utf8stream.hpp:6
Utf8Stream(std::pair< Point, Point > range)
Definition: utf8stream.hpp:21
static std::pair< int, UnicodeChar > octet_count(unsigned char octet)
Definition: utf8stream.hpp:86
uint32_t UnicodeChar
Definition: utf8stream.hpp:10
static std::pair< UnicodeChar, Point > decode(Point cur, Point end)
Definition: utf8stream.hpp:51
Point current() const
Definition: utf8stream.hpp:31
Point nxt
Definition: utf8stream.hpp:111
void next()
Definition: utf8stream.hpp:105
UnicodeChar peek()
Definition: utf8stream.hpp:36
Point cur
Definition: utf8stream.hpp:110
UnicodeChar val
Definition: utf8stream.hpp:113