Open Chinese Convert  1.1.2
A project for conversion between Traditional and Simplified Chinese
Segments.hpp
1 /*
2  * Open Chinese Convert
3  *
4  * Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 #pragma once
20 
21 #include <sstream>
22 
23 #include "Common.hpp"
24 
25 namespace opencc {
30 class OPENCC_EXPORT Segments {
31 public:
32  Segments() {}
33 
34  Segments(std::initializer_list<const char*> initList) {
35  for (const std::string& item : initList) {
36  AddSegment(item);
37  }
38  }
39 
40  Segments(std::initializer_list<std::string> initList) {
41  for (const std::string& item : initList) {
42  AddSegment(item);
43  }
44  }
45 
46  void AddSegment(const char* unmanagedString) {
47  indexes.push_back(std::make_pair(unmanaged.size(), false));
48  unmanaged.push_back(unmanagedString);
49  }
50 
51  void AddSegment(const std::string& str) {
52  indexes.push_back(std::make_pair(managed.size(), true));
53  managed.push_back(str);
54  }
55 
56  class iterator : public std::iterator<std::input_iterator_tag, const char*> {
57  public:
58  iterator(const Segments* const _segments, size_t _cursor)
59  : segments(_segments), cursor(_cursor) {}
60 
61  iterator& operator++() {
62  cursor++;
63  return *this;
64  }
65 
66  bool operator==(const iterator& that) const {
67  return cursor == that.cursor && segments == that.segments;
68  }
69 
70  bool operator!=(const iterator& that) const {
71  return !this->operator==(that);
72  }
73 
74  const char* operator*() const { return segments->At(cursor); }
75 
76  private:
77  const Segments* const segments;
78  size_t cursor;
79  };
80 
81  const char* At(size_t cursor) const {
82  const auto& index = indexes[cursor];
83  if (index.second) {
84  return managed[index.first].c_str();
85  } else {
86  return unmanaged[index.first];
87  }
88  }
89 
90  size_t Length() const { return indexes.size(); }
91 
92  iterator begin() const { return iterator(this, 0); }
93 
94  iterator end() const { return iterator(this, indexes.size()); }
95 
96  std::string ToString() const {
97  // TODO implement a nested structure to reduce concatenation,
98  // like a purely functional differential list
99  std::ostringstream buffer;
100  for (const char* segment : *this) {
101  buffer << segment;
102  }
103  return buffer.str();
104  }
105 
106 private:
107  Segments(const Segments&) {}
108 
109  std::vector<const char*> unmanaged;
110  std::vector<std::string> managed;
111  // index, managed
112  std::vector<std::pair<size_t, bool>> indexes;
113 };
114 } // namespace opencc
Definition: Segments.hpp:56
Segmented text.
Definition: Segments.hpp:30