OpenMW
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
keywordsearch.hpp
Go to the documentation of this file.
1 #ifndef GAME_MWDIALOGUE_KEYWORDSEARCH_H
2 #define GAME_MWDIALOGUE_KEYWORDSEARCH_H
3 
4 #include <map>
5 #include <cctype>
6 #include <stdexcept>
7 #include <vector>
8 #include <algorithm> // std::reverse
9 
11 
12 namespace MWDialogue
13 {
14 
15 template <typename string_t, typename value_t>
17 {
18 public:
19 
20  typedef typename string_t::const_iterator Point;
21 
22  struct Match
23  {
26  value_t mValue;
27  };
28 
29  void seed (string_t keyword, value_t value)
30  {
31  if (keyword.empty())
32  return;
33  seed_impl (/*std::move*/ (keyword), /*std::move*/ (value), 0, mRoot);
34  }
35 
36  void clear ()
37  {
38  mRoot.mChildren.clear ();
39  mRoot.mKeyword.clear ();
40  }
41 
42  bool containsKeyword (string_t keyword, value_t& value)
43  {
44  typename Entry::childen_t::iterator current;
45  typename Entry::childen_t::iterator next;
46 
47  current = mRoot.mChildren.find (Misc::StringUtils::toLower (*keyword.begin()));
48  if (current == mRoot.mChildren.end())
49  return false;
50  else if (current->second.mKeyword.size() && Misc::StringUtils::ciEqual(current->second.mKeyword, keyword))
51  {
52  value = current->second.mValue;
53  return true;
54  }
55 
56  for (Point i = ++keyword.begin(); i != keyword.end(); ++i)
57  {
58  next = current->second.mChildren.find(Misc::StringUtils::toLower (*i));
59  if (next == current->second.mChildren.end())
60  return false;
61  if (Misc::StringUtils::ciEqual(next->second.mKeyword, keyword))
62  {
63  value = next->second.mValue;
64  return true;
65  }
66  current = next;
67  }
68  return false;
69  }
70 
71  static bool sortMatches(const Match& left, const Match& right)
72  {
73  return left.mBeg < right.mBeg;
74  }
75 
76  void highlightKeywords (Point beg, Point end, std::vector<Match>& out)
77  {
78  std::vector<Match> matches;
79  for (Point i = beg; i != end; ++i)
80  {
81  // check if previous character marked start of new word
82  if (i != beg)
83  {
84  Point prev = i;
85  --prev;
86  if(isalpha(*prev))
87  continue;
88  }
89 
90 
91  // check first character
92  typename Entry::childen_t::iterator candidate = mRoot.mChildren.find (Misc::StringUtils::toLower (*i));
93 
94  // no match, on to next character
95  if (candidate == mRoot.mChildren.end ())
96  continue;
97 
98  // see how far the match goes
99  Point j = i;
100 
101  // some keywords might be longer variations of other keywords, so we definitely need a list of candidates
102  // the first element in the pair is length of the match, i.e. depth from the first character on
103  std::vector< typename std::pair<int, typename Entry::childen_t::iterator> > candidates;
104 
105  while ((j + 1) != end)
106  {
107  typename Entry::childen_t::iterator next = candidate->second.mChildren.find (Misc::StringUtils::toLower (*++j));
108 
109  if (next == candidate->second.mChildren.end ())
110  {
111  if (candidate->second.mKeyword.size() > 0)
112  candidates.push_back(std::make_pair((j-i), candidate));
113  break;
114  }
115 
116  candidate = next;
117 
118  if (candidate->second.mKeyword.size() > 0)
119  candidates.push_back(std::make_pair((j-i), candidate));
120  }
121 
122  if (candidates.empty())
123  continue; // didn't match enough to disambiguate, on to next character
124 
125  // shorter candidates will be added to the vector first. however, we want to check against longer candidates first
126  std::reverse(candidates.begin(), candidates.end());
127 
128  for (typename std::vector< std::pair<int, typename Entry::childen_t::iterator> >::iterator it = candidates.begin();
129  it != candidates.end(); ++it)
130  {
131  candidate = it->second;
132  // try to match the rest of the keyword
133  Point k = i + it->first;
134  typename string_t::const_iterator t = candidate->second.mKeyword.begin () + (k - i);
135 
136 
137  while (k != end && t != candidate->second.mKeyword.end ())
138  {
140  break;
141 
142  ++k, ++t;
143  }
144 
145  // didn't match full keyword, try the next candidate
146  if (t != candidate->second.mKeyword.end ())
147  continue;
148 
149  // found a keyword, but there might still be longer keywords that start somewhere _within_ this keyword
150  // we will resolve these overlapping keywords later, choosing the longest one in case of conflict
151  Match match;
152  match.mValue = candidate->second.mValue;
153  match.mBeg = i;
154  match.mEnd = k;
155  matches.push_back(match);
156  break;
157  }
158  }
159 
160  // resolve overlapping keywords
161  while (!matches.empty())
162  {
163  int longestKeywordSize = 0;
164  typename std::vector<Match>::iterator longestKeyword = matches.begin();
165  for (typename std::vector<Match>::iterator it = matches.begin(); it != matches.end(); ++it)
166  {
167  int size = it->mEnd - it->mBeg;
168  if (size > longestKeywordSize)
169  {
170  longestKeywordSize = size;
171  longestKeyword = it;
172  }
173 
174  typename std::vector<Match>::iterator next = it;
175  ++next;
176 
177  if (next == matches.end())
178  break;
179 
180  if (it->mEnd <= next->mBeg)
181  {
182  break; // no overlap
183  }
184  }
185 
186  Match keyword = *longestKeyword;
187  matches.erase(longestKeyword);
188  out.push_back(keyword);
189  // erase anything that overlaps with the keyword we just added to the output
190  for (typename std::vector<Match>::iterator it = matches.begin(); it != matches.end();)
191  {
192  if (it->mBeg < keyword.mEnd && it->mEnd > keyword.mBeg)
193  it = matches.erase(it);
194  else
195  ++it;
196  }
197  }
198 
199  std::sort(out.begin(), out.end(), sortMatches);
200  }
201 
202 private:
203 
204  struct Entry
205  {
206  typedef std::map <wchar_t, Entry> childen_t;
207 
208  string_t mKeyword;
209  value_t mValue;
211  };
212 
213  void seed_impl (string_t keyword, value_t value, size_t depth, Entry & entry)
214  {
215  int ch = Misc::StringUtils::toLower (keyword.at (depth));
216 
217  typename Entry::childen_t::iterator j = entry.mChildren.find (ch);
218 
219  if (j == entry.mChildren.end ())
220  {
221  entry.mChildren [ch].mValue = /*std::move*/ (value);
222  entry.mChildren [ch].mKeyword = /*std::move*/ (keyword);
223  }
224  else
225  {
226  if (j->second.mKeyword.size () > 0)
227  {
228  if (keyword == j->second.mKeyword)
229  throw std::runtime_error ("duplicate keyword inserted");
230 
231  value_t pushValue = /*std::move*/ (j->second.mValue);
232  string_t pushKeyword = /*std::move*/ (j->second.mKeyword);
233 
234  if (depth >= pushKeyword.size ())
235  throw std::runtime_error ("unexpected");
236 
237  if (depth+1 < pushKeyword.size())
238  {
239  seed_impl (/*std::move*/ (pushKeyword), /*std::move*/ (pushValue), depth+1, j->second);
240  j->second.mKeyword.clear ();
241  }
242  }
243  if (depth+1 == keyword.size())
244  j->second.mKeyword = value;
245  else // depth+1 < keyword.size()
246  seed_impl (/*std::move*/ (keyword), /*std::move*/ (value), depth+1, j->second);
247  }
248 
249  }
250 
252 };
253 
254 }
255 
256 #endif
Basic quest/dialogue/topic entry.
Definition: journalentry.hpp:19
Point mBeg
Definition: keywordsearch.hpp:24
static bool ciEqual(const std::string &x, const std::string &y)
Definition: stringops.hpp:62
static bool sortMatches(const Match &left, const Match &right)
Definition: keywordsearch.hpp:71
void highlightKeywords(Point beg, Point end, std::vector< Match > &out)
Definition: keywordsearch.hpp:76
static char toLower(char c)
Definition: stringops.hpp:24
void clear()
Definition: keywordsearch.hpp:36
Point mEnd
Definition: keywordsearch.hpp:25
Entry mRoot
Definition: keywordsearch.hpp:251
bool containsKeyword(string_t keyword, value_t &value)
Definition: keywordsearch.hpp:42
Definition: keywordsearch.hpp:204
value_t mValue
Definition: keywordsearch.hpp:209
childen_t mChildren
Definition: keywordsearch.hpp:210
std::map< wchar_t, Entry > childen_t
Definition: keywordsearch.hpp:206
value_t mValue
Definition: keywordsearch.hpp:26
void seed(string_t keyword, value_t value)
Definition: keywordsearch.hpp:29
Definition: keywordsearch.hpp:22
void seed_impl(string_t keyword, value_t value, size_t depth, Entry &entry)
Definition: keywordsearch.hpp:213
Definition: keywordsearch.hpp:16
string_t mKeyword
Definition: keywordsearch.hpp:208
string_t::const_iterator Point
Definition: keywordsearch.hpp:20