7#ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
8#define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
10#include <boost/locale/boundary/boundary_point.hpp>
11#include <boost/locale/boundary/facets.hpp>
12#include <boost/locale/boundary/segment.hpp>
13#include <boost/locale/boundary/types.hpp>
14#include <boost/iterator/iterator_facade.hpp>
27# pragma warning(disable : 4275 4251 4231 4660)
30namespace boost {
namespace locale {
namespace boundary {
43 template<
typename Char>
44 const boundary_indexing<Char>& get_boundary_indexing(
const std::locale& l)
46 using facet_type = boundary_indexing<Char>;
47 if(!std::has_facet<facet_type>(l))
48 throw std::runtime_error(
"Locale was generated without segmentation support!");
49 return std::use_facet<facet_type>(l);
52 template<
typename IteratorType,
53 typename CategoryType =
typename std::iterator_traits<IteratorType>::iterator_category>
54 struct mapping_traits {
55 typedef typename std::iterator_traits<IteratorType>::value_type char_type;
56 static index_type map(boundary_type t, IteratorType b, IteratorType e,
const std::locale& l)
58 std::basic_string<char_type> str(b, e);
59 return get_boundary_indexing<char_type>(l).map(t, str.c_str(), str.c_str() + str.size());
63 template<
typename CharType,
typename SomeIteratorType>
64 struct linear_iterator_traits {
65 static constexpr bool is_linear =
66 std::is_same<SomeIteratorType, CharType*>::value || std::is_same<SomeIteratorType, const CharType*>::value
67 || std::is_same<SomeIteratorType, typename std::basic_string<CharType>::iterator>::value
68 || std::is_same<SomeIteratorType, typename std::basic_string<CharType>::const_iterator>::value
69 || std::is_same<SomeIteratorType, typename std::vector<CharType>::iterator>::value
70 || std::is_same<SomeIteratorType, typename std::vector<CharType>::const_iterator>::value;
73 template<
typename IteratorType>
74 struct mapping_traits<IteratorType, std::random_access_iterator_tag> {
75 typedef typename std::iterator_traits<IteratorType>::value_type char_type;
77 static index_type map(boundary_type t, IteratorType b, IteratorType e,
const std::locale& l)
86 if(linear_iterator_traits<char_type, IteratorType>::is_linear && b != e) {
87 const char_type* begin = &*b;
88 const char_type* end = begin + (e - b);
89 index_type tmp = get_boundary_indexing<char_type>(l).map(t, begin, end);
92 std::basic_string<char_type> str(b, e);
93 index_type tmp = get_boundary_indexing<char_type>(l).map(t, str.c_str(), str.c_str() + str.size());
100 template<
typename BaseIterator>
103 typedef BaseIterator base_iterator;
104 typedef typename std::iterator_traits<base_iterator>::value_type char_type;
106 mapping(boundary_type type, base_iterator begin, base_iterator end,
const std::locale& loc) :
107 index_(new
index_type()), begin_(begin), end_(end)
109 index_type idx = detail::mapping_traits<base_iterator>::map(type, begin, end, loc);
115 const index_type& index()
const {
return *index_; }
117 base_iterator begin()
const {
return begin_; }
119 base_iterator end()
const {
return end_; }
122 std::shared_ptr<index_type> index_;
123 base_iterator begin_, end_;
126 template<
typename BaseIterator>
127 class segment_index_iterator :
public boost::iterator_facade<segment_index_iterator<BaseIterator>,
128 segment<BaseIterator>,
129 boost::bidirectional_traversal_tag,
130 const segment<BaseIterator>&> {
132 typedef BaseIterator base_iterator;
133 typedef mapping<base_iterator> mapping_type;
134 typedef segment<base_iterator> segment_type;
136 segment_index_iterator() : current_(0, 0), map_(nullptr), mask_(0), full_select_(false) {}
138 segment_index_iterator(base_iterator p,
const mapping_type* map, rule_type mask,
bool full_select) :
139 map_(map), mask_(mask), full_select_(full_select)
143 segment_index_iterator(
bool is_begin,
const mapping_type* map, rule_type mask,
bool full_select) :
144 map_(map), mask_(mask), full_select_(full_select)
152 const segment_type& dereference()
const {
return value_; }
154 bool equal(
const segment_index_iterator& other)
const
156 return map_ == other.map_ && current_.second == other.current_.second;
161 std::pair<size_t, size_t> next = current_;
163 next.first = next.second;
164 while(next.second < size()) {
166 if(valid_offset(next.second))
169 if(next.second == size())
170 next.first = next.second - 1;
172 while(next.second < size()) {
173 next.first = next.second;
175 if(valid_offset(next.second))
179 update_current(next);
184 std::pair<size_t, size_t> next = current_;
186 while(next.second > 1) {
188 if(valid_offset(next.second))
191 next.first = next.second;
192 while(next.first > 0) {
194 if(valid_offset(next.first))
198 while(next.second > 1) {
200 if(valid_offset(next.second))
203 next.first = next.second - 1;
205 update_current(next);
211 current_.first = size() - 1;
212 current_.second = size();
213 value_ = segment_type(map_->end(), map_->end(), 0);
217 current_.first = current_.second = 0;
218 value_ = segment_type(map_->begin(), map_->begin(), 0);
222 void set(base_iterator p)
224 const auto b = map_->index().begin(), e = map_->index().end();
225 auto boundary_point = std::upper_bound(b, e, break_info(std::distance(map_->begin(), p)));
226 while(boundary_point != e && (boundary_point->rule & mask_) == 0)
229 current_.first = current_.second = boundary_point - b;
232 while(current_.first > 0) {
234 if(valid_offset(current_.first))
238 if(current_.first > 0)
241 value_.first = map_->begin();
242 std::advance(value_.first, get_offset(current_.first));
243 value_.second = value_.first;
244 std::advance(value_.second, get_offset(current_.second) - get_offset(current_.first));
249 void update_current(std::pair<size_t, size_t> pos)
251 std::ptrdiff_t first_diff = get_offset(pos.first) - get_offset(current_.first);
252 std::ptrdiff_t second_diff = get_offset(pos.second) - get_offset(current_.second);
253 std::advance(value_.first, first_diff);
254 std::advance(value_.second, second_diff);
261 if(current_.second != size())
262 value_.rule(index()[current_.second].rule);
264 size_t get_offset(
size_t ind)
const
267 return index().back().offset;
268 return index()[ind].offset;
271 bool valid_offset(
size_t offset)
const
273 return offset == 0 || offset == size()
274 || (index()[offset].rule & mask_) != 0;
277 size_t size()
const {
return index().size(); }
279 const index_type& index()
const {
return map_->index(); }
282 std::pair<size_t, size_t> current_;
283 const mapping_type* map_;
288 template<
typename BaseIterator>
289 class boundary_point_index_iterator :
public boost::iterator_facade<boundary_point_index_iterator<BaseIterator>,
290 boundary_point<BaseIterator>,
291 boost::bidirectional_traversal_tag,
292 const boundary_point<BaseIterator>&> {
294 typedef BaseIterator base_iterator;
295 typedef mapping<base_iterator> mapping_type;
296 typedef boundary_point<base_iterator> boundary_point_type;
298 boundary_point_index_iterator() : current_(0), map_(nullptr), mask_(0) {}
300 boundary_point_index_iterator(
bool is_begin,
const mapping_type* map, rule_type mask) :
301 map_(map), mask_(mask)
308 boundary_point_index_iterator(base_iterator p,
const mapping_type* map, rule_type mask) :
309 map_(map), mask_(mask)
314 const boundary_point_type& dereference()
const {
return value_; }
316 bool equal(
const boundary_point_index_iterator& other)
const
318 return map_ == other.map_ && current_ == other.current_;
323 size_t next = current_;
324 while(next < size()) {
326 if(valid_offset(next))
329 update_current(next);
334 size_t next = current_;
337 if(valid_offset(next))
340 update_current(next);
347 value_ = boundary_point_type(map_->end(), 0);
352 value_ = boundary_point_type(map_->begin(), 0);
355 void set(base_iterator p)
357 size_t dist = std::distance(map_->begin(), p);
359 const auto b = index().begin(), e = index().end();
360 const auto ptr = std::lower_bound(b, e, break_info(dist));
363 current_ = size() - 1;
367 while(!valid_offset(current_))
370 std::ptrdiff_t diff = get_offset(current_) - dist;
371 std::advance(p, diff);
376 void update_current(
size_t pos)
378 std::ptrdiff_t diff = get_offset(pos) - get_offset(current_);
379 base_iterator i = value_.iterator();
380 std::advance(i, diff);
388 if(current_ != size())
389 value_.rule(index()[current_].rule);
391 size_t get_offset(
size_t ind)
const
394 return index().back().offset;
395 return index()[ind].offset;
398 bool valid_offset(
size_t offset)
const
400 return offset == 0 || offset + 1 >= size()
401 || (index()[offset].rule & mask_) != 0;
404 size_t size()
const {
return index().size(); }
406 const index_type& index()
const {
return map_->index(); }
408 boundary_point_type value_;
410 const mapping_type* map_;
418 template<
typename BaseIterator>
421 template<
typename BaseIterator>
422 class boundary_point_index;
474 template<
typename BaseIterator>
480#ifdef BOOST_LOCALE_DOXYGEN
497 typedef detail::segment_index_iterator<base_iterator>
iterator;
498 typedef detail::segment_index_iterator<base_iterator>
const_iterator;
518 const std::locale& loc = std::locale()) :
520 mask_(mask), full_select_(false)
527 const std::locale& loc = std::locale()) :
529 mask_(0xFFFFFFFFu), full_select_(false)
558 map_ = mapping_type(type,
begin,
end, loc);
570 return iterator(
true, &map_, mask_, full_select_);
580 return iterator(
false, &map_, mask_, full_select_);
600 return iterator(p, &map_, mask_, full_select_);
646 typedef detail::mapping<base_iterator> mapping_type;
696 template<
typename BaseIterator>
702#ifdef BOOST_LOCALE_DOXYGEN
720 typedef detail::boundary_point_index_iterator<base_iterator>
iterator;
721 typedef detail::boundary_point_index_iterator<base_iterator>
const_iterator;
742 const std::locale& loc = std::locale()) :
751 const std::locale& loc = std::locale()) :
781 map_ = mapping_type(type,
begin,
end, loc);
793 return iterator(
true, &map_, mask_);
805 return iterator(
false, &map_, mask_);
837 typedef detail::mapping<base_iterator> mapping_type;
843 template<
typename BaseIterator>
845 map_(other.map_), mask_(0xFFFFFFFFu), full_select_(false)
848 template<
typename BaseIterator>
850 map_(other.map_), mask_(0xFFFFFFFFu)
853 template<
typename BaseIterator>
860 template<
typename BaseIterator>
861 boundary_point_index<BaseIterator>&
871#ifndef BOOST_LOCALE_NO_CXX20_STRING8
874#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
877#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
886#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
889#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
895#ifndef BOOST_LOCALE_NO_CXX20_STRING8
898#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
901#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
910#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
913#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
This class holds an index of boundary points and allows iterating over them.
Definition: index.hpp:697
This class represents a boundary point in the text.
Definition: boundary_point.hpp:44
This class holds an index of segments in the text range and allows to iterate over them.
Definition: index.hpp:475
a segment object that represents a pair of two iterators that define the range where this segment exi...
Definition: segment.hpp:90
boundary_point_index< const char16_t * > u16cboundary_point_index
convenience typedef
Definition: index.hpp:911
segment< base_iterator > value_type
Definition: index.hpp:502
segment_index< const wchar_t * > wcsegment_index
convenience typedef
Definition: index.hpp:882
BaseIterator base_iterator
The type of the iterator used to iterate over the original text.
Definition: index.hpp:478
void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale &loc=std::locale())
Definition: index.hpp:779
boundary_point_index< const char * > cboundary_point_index
convenience typedef
Definition: index.hpp:905
boundary_point_index< std::u32string::const_iterator > u32sboundary_point_index
convenience typedef
Definition: index.hpp:902
boundary_type
This type describes a possible boundary analysis alternatives.
Definition: types.hpp:30
unspecified_iterator_type iterator
Definition: index.hpp:716
void full_select(bool v)
Definition: index.hpp:639
uint32_t rule_type
Flags used with word boundary analysis – the type of the word, line or sentence boundary found.
Definition: types.hpp:40
segment_index()
Definition: index.hpp:511
segment_index< std::u8string::const_iterator > u8ssegment_index
convenience typedef
Definition: index.hpp:872
segment_index< std::u32string::const_iterator > u32ssegment_index
convenience typedef
Definition: index.hpp:878
void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale &loc=std::locale())
Definition: index.hpp:556
boundary_point_index< const wchar_t * > wcboundary_point_index
convenience typedef
Definition: index.hpp:906
segment_index(const boundary_point_index< base_iterator > &)
boundary_point_index< std::string::const_iterator > sboundary_point_index
convenience typedef
Definition: index.hpp:893
boundary_point_index< std::u8string::const_iterator > u8sboundary_point_index
convenience typedef
Definition: index.hpp:896
iterator end() const
Definition: index.hpp:803
void rule(rule_type v)
Set the mask of rules that are used.
Definition: index.hpp:830
segment_index< std::wstring::const_iterator > wssegment_index
convenience typedef
Definition: index.hpp:870
segment_index & operator=(const boundary_point_index< base_iterator > &)
unspecified_iterator_type const_iterator
Definition: index.hpp:495
iterator find(base_iterator p) const
Definition: index.hpp:819
boundary_point_index(boundary_type type, base_iterator begin, base_iterator end, rule_type mask, const std::locale &loc=std::locale())
Definition: index.hpp:738
iterator begin() const
Definition: index.hpp:791
segment_index< std::u16string::const_iterator > u16ssegment_index
convenience typedef
Definition: index.hpp:875
boundary_point_index(const segment_index< base_iterator > &other)
segment_index< const char * > csegment_index
convenience typedef
Definition: index.hpp:881
segment_index(boundary_type type, base_iterator begin, base_iterator end, rule_type mask, const std::locale &loc=std::locale())
Definition: index.hpp:514
std::vector< break_info > index_type
Definition: facets.hpp:52
boundary_point< base_iterator > value_type
Definition: index.hpp:725
bool full_select() const
Definition: index.hpp:624
segment_index< const char16_t * > u16csegment_index
convenience typedef
Definition: index.hpp:887
iterator find(base_iterator p) const
Definition: index.hpp:598
segment_index(boundary_type type, base_iterator begin, base_iterator end, const std::locale &loc=std::locale())
Definition: index.hpp:524
iterator end() const
Definition: index.hpp:578
BaseIterator base_iterator
The type of the iterator used to iterate over the original text.
Definition: index.hpp:700
boundary_point_index< const char32_t * > u32cboundary_point_index
convenience typedef
Definition: index.hpp:914
boundary_point_index< std::u16string::const_iterator > u16sboundary_point_index
convenience typedef
Definition: index.hpp:899
boundary_point_index()
Definition: index.hpp:734
segment_index< const char32_t * > u32csegment_index
convenience typedef
Definition: index.hpp:890
unspecified_iterator_type const_iterator
Definition: index.hpp:718
void rule(rule_type v)
Set the mask of rules that are used.
Definition: index.hpp:609
segment_index< std::string::const_iterator > ssegment_index
convenience typedef
Definition: index.hpp:869
boundary_point_index< std::wstring::const_iterator > wsboundary_point_index
convenience typedef
Definition: index.hpp:894
rule_type rule() const
Get the mask of rules that are used.
Definition: index.hpp:604
iterator begin() const
Definition: index.hpp:568
rule_type rule() const
Get the mask of rules that are used.
Definition: index.hpp:825
boundary_point_index(boundary_type type, base_iterator begin, base_iterator end, const std::locale &loc=std::locale())
Definition: index.hpp:748
unspecified_iterator_type iterator
Definition: index.hpp:493
boundary_point_index & operator=(const segment_index< base_iterator > &other)
@ boundary
Generate boundary analysis facet.