emoji_suggestions.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. // This file is part of Desktop App Toolkit,
  2. // a set of libraries for developing nice desktop applications.
  3. //
  4. // For license and copyright information please follow this link:
  5. // https://github.com/desktop-app/legal/blob/master/LEGAL
  6. //
  7. #include "emoji_suggestions.h"
  8. #include <algorithm>
  9. #include "emoji_suggestions_data.h"
  10. #ifndef Expects
  11. #include <cassert>
  12. #define Expects(condition) assert(condition)
  13. #endif // Expects
  14. namespace Ui {
  15. namespace Emoji {
  16. namespace internal {
  17. namespace {
  18. checksum Crc32Table[256];
  19. class Crc32Initializer {
  20. public:
  21. Crc32Initializer() {
  22. checksum poly = 0x04C11DB7U;
  23. for (auto i = 0; i != 256; ++i) {
  24. Crc32Table[i] = reflect(i, 8) << 24;
  25. for (auto j = 0; j != 8; ++j) {
  26. Crc32Table[i] = (Crc32Table[i] << 1) ^ (Crc32Table[i] & (1 << 31) ? poly : 0);
  27. }
  28. Crc32Table[i] = reflect(Crc32Table[i], 32);
  29. }
  30. }
  31. private:
  32. checksum reflect(checksum val, char ch) {
  33. checksum result = 0;
  34. for (int i = 1; i < (ch + 1); ++i) {
  35. if (val & 1) {
  36. result |= 1 << (ch - i);
  37. }
  38. val >>= 1;
  39. }
  40. return result;
  41. }
  42. };
  43. } // namespace
  44. checksum countChecksum(const void *data, std::size_t size) {
  45. static Crc32Initializer InitTable;
  46. auto buffer = static_cast<const unsigned char*>(data);
  47. auto result = checksum(0xFFFFFFFFU);
  48. for (auto i = std::size_t(0); i != size; ++i) {
  49. result = (result >> 8) ^ Crc32Table[(result & 0xFFU) ^ buffer[i]];
  50. }
  51. return (result ^ 0xFFFFFFFFU);
  52. }
  53. } // namespace internal
  54. namespace {
  55. class string_span {
  56. public:
  57. string_span() = default;
  58. string_span(const utf16string *data, std::size_t size) : begin_(data), size_(size) {
  59. }
  60. string_span(const std::vector<utf16string> &data) : begin_(data.data()), size_(data.size()) {
  61. }
  62. string_span(const string_span &other) = default;
  63. string_span &operator=(const string_span &other) = default;
  64. const utf16string *begin() const {
  65. return begin_;
  66. }
  67. const utf16string *end() const {
  68. return begin_ + size_;
  69. }
  70. std::size_t size() const {
  71. return size_;
  72. }
  73. string_span subspan(std::size_t offset, std::size_t size) {
  74. return string_span(begin_ + offset, size);
  75. }
  76. private:
  77. const utf16string *begin_ = nullptr;
  78. std::size_t size_ = 0;
  79. };
  80. bool IsNumber(utf16char ch) {
  81. return (ch >= '0' && ch <= '9');
  82. }
  83. bool IsLetterOrNumber(utf16char ch) {
  84. return (ch >= 'a' && ch <= 'z') || IsNumber(ch);
  85. }
  86. using Replacement = internal::Replacement;
  87. class Completer {
  88. public:
  89. Completer(utf16string query);
  90. std::vector<Suggestion> resolve();
  91. private:
  92. struct Result {
  93. const Replacement *replacement;
  94. int wordsUsed;
  95. };
  96. static std::vector<utf16char> NormalizeQuery(utf16string query);
  97. void addResult(const Replacement *replacement);
  98. bool isDuplicateOfLastResult(const Replacement *replacement) const;
  99. bool isBetterThanLastResult(const Replacement *replacement) const;
  100. void processInitialList();
  101. void filterInitialList();
  102. void initWordsTracking();
  103. bool matchQueryForCurrentItem();
  104. bool matchQueryTailStartingFrom(int position);
  105. string_span findWordsStartingWith(utf16char ch);
  106. int findEqualCharsCount(int position, const utf16string *word);
  107. std::vector<Suggestion> prepareResult();
  108. bool startsWithQuery(utf16string word);
  109. bool isExactMatch(utf16string replacement);
  110. std::vector<Result> _result;
  111. utf16string _initialQuery;
  112. const std::vector<utf16char> _query;
  113. const utf16char *_queryBegin = nullptr;
  114. int _querySize = 0;
  115. const std::vector<const Replacement*> *_initialList = nullptr;
  116. string_span _currentItemWords;
  117. int _currentItemWordsUsedCount = 0;
  118. class UsedWordGuard {
  119. public:
  120. UsedWordGuard(std::vector<small> &map, int index);
  121. UsedWordGuard(const UsedWordGuard &other) = delete;
  122. [[maybe_unused]] UsedWordGuard(UsedWordGuard &&other);
  123. UsedWordGuard &operator=(const UsedWordGuard &other) = delete;
  124. UsedWordGuard &operator=(UsedWordGuard &&other) = delete;
  125. explicit operator bool() const;
  126. ~UsedWordGuard();
  127. private:
  128. std::vector<small> &_map;
  129. int _index = 0;
  130. small _guarded = 0;
  131. };
  132. std::vector<small> _currentItemWordsUsedMap;
  133. };
  134. Completer::UsedWordGuard::UsedWordGuard(std::vector<small> &map, int index) : _map(map), _index(index) {
  135. Expects(_map.size() > _index);
  136. if (!_map[_index]) {
  137. _guarded = _map[_index] = 1;
  138. }
  139. }
  140. Completer::UsedWordGuard::UsedWordGuard(UsedWordGuard &&other) : _map(other._map), _index(other._index), _guarded(other._guarded) {
  141. other._guarded = 0;
  142. }
  143. Completer::UsedWordGuard::operator bool() const {
  144. return _guarded;
  145. }
  146. Completer::UsedWordGuard::~UsedWordGuard() {
  147. if (_guarded) {
  148. _map[_index] = 0;
  149. }
  150. }
  151. Completer::Completer(utf16string query) : _initialQuery(query), _query(NormalizeQuery(query)) {
  152. }
  153. // Remove all non-letters-or-numbers.
  154. // Leave '-' and '+' only if they're followed by a number or
  155. // at the end of the query (so it is possibly followed by a number).
  156. std::vector<utf16char> Completer::NormalizeQuery(utf16string query) {
  157. auto result = std::vector<utf16char>();
  158. result.reserve(query.size());
  159. auto copyFrom = query.data();
  160. auto e = copyFrom + query.size();
  161. auto copyTo = result.data();
  162. for (auto i = query.data(); i != e; ++i) {
  163. if (IsLetterOrNumber(*i)) {
  164. continue;
  165. } else if (*i == '-' || *i == '+') {
  166. if (i + 1 == e || IsNumber(*(i + 1))) {
  167. continue;
  168. }
  169. }
  170. if (i > copyFrom) {
  171. result.resize(result.size() + (i - copyFrom));
  172. memcpy(copyTo, copyFrom, (i - copyFrom) * sizeof(utf16char));
  173. copyTo += (i - copyFrom);
  174. }
  175. copyFrom = i + 1;
  176. }
  177. if (e > copyFrom) {
  178. result.resize(result.size() + (e - copyFrom));
  179. memcpy(copyTo, copyFrom, (e - copyFrom) * sizeof(utf16char));
  180. copyTo += (e - copyFrom);
  181. }
  182. return result;
  183. }
  184. std::vector<Suggestion> Completer::resolve() {
  185. _queryBegin = _query.data();
  186. _querySize = _query.size();
  187. if (!_querySize) {
  188. return std::vector<Suggestion>();
  189. }
  190. _initialList = internal::GetReplacements(*_queryBegin);
  191. if (!_initialList) {
  192. return std::vector<Suggestion>();
  193. }
  194. _result.reserve(_initialList->size());
  195. processInitialList();
  196. return prepareResult();
  197. }
  198. bool Completer::isDuplicateOfLastResult(const Replacement *item) const {
  199. if (_result.empty()) {
  200. return false;
  201. }
  202. return (_result.back().replacement->emoji == item->emoji);
  203. }
  204. bool Completer::isBetterThanLastResult(const Replacement *item) const {
  205. Expects(!_result.empty());
  206. auto &last = _result.back();
  207. if (_currentItemWordsUsedCount < last.wordsUsed) {
  208. return true;
  209. }
  210. auto firstCharOfQuery = _query[0];
  211. auto firstCharAfterColonLast = last.replacement->replacement[1];
  212. auto firstCharAfterColonCurrent = item->replacement[1];
  213. auto goodLast = (firstCharAfterColonLast == firstCharOfQuery);
  214. auto goodCurrent = (firstCharAfterColonCurrent == firstCharOfQuery);
  215. return !goodLast && goodCurrent;
  216. }
  217. void Completer::addResult(const Replacement *item) {
  218. if (!isDuplicateOfLastResult(item)) {
  219. _result.push_back({ item, _currentItemWordsUsedCount });
  220. } else if (isBetterThanLastResult(item)) {
  221. _result.back() = { item, _currentItemWordsUsedCount };
  222. }
  223. }
  224. void Completer::processInitialList() {
  225. if (_querySize > 1) {
  226. filterInitialList();
  227. } else {
  228. _currentItemWordsUsedCount = 1;
  229. for (auto item : *_initialList) {
  230. addResult(item);
  231. }
  232. }
  233. }
  234. void Completer::initWordsTracking() {
  235. auto maxWordsCount = 0;
  236. for (auto item : *_initialList) {
  237. auto wordsCount = item->words.size();
  238. if (maxWordsCount < wordsCount) {
  239. maxWordsCount = wordsCount;
  240. }
  241. }
  242. _currentItemWordsUsedMap = std::vector<small>(maxWordsCount, 0);
  243. }
  244. void Completer::filterInitialList() {
  245. initWordsTracking();
  246. for (auto item : *_initialList) {
  247. _currentItemWords = string_span(item->words);
  248. _currentItemWordsUsedCount = 1;
  249. if (matchQueryForCurrentItem()) {
  250. addResult(item);
  251. }
  252. _currentItemWordsUsedCount = 0;
  253. }
  254. }
  255. bool Completer::matchQueryForCurrentItem() {
  256. Expects(_currentItemWords.size() != 0);
  257. if (_currentItemWords.size() < 2) {
  258. return startsWithQuery(*_currentItemWords.begin());
  259. }
  260. return matchQueryTailStartingFrom(0);
  261. }
  262. bool Completer::startsWithQuery(utf16string word) {
  263. if (word.size() < _query.size()) {
  264. return false;
  265. }
  266. for (auto i = std::size_t(0), size = _query.size(); i != size; ++i) {
  267. if (word[i] != _query[i]) {
  268. return false;
  269. }
  270. }
  271. return true;
  272. }
  273. bool Completer::isExactMatch(utf16string replacement) {
  274. if (replacement.size() != _initialQuery.size() + 1) {
  275. return false;
  276. }
  277. for (auto i = std::size_t(0), size = _initialQuery.size(); i != size; ++i) {
  278. if (replacement[i] != _initialQuery[i]) {
  279. return false;
  280. }
  281. }
  282. return true;
  283. }
  284. bool Completer::matchQueryTailStartingFrom(int position) {
  285. auto charsLeftToMatch = (_querySize - position);
  286. if (!charsLeftToMatch) {
  287. return true;
  288. }
  289. auto firstCharToMatch = *(_queryBegin + position);
  290. auto foundWords = findWordsStartingWith(firstCharToMatch);
  291. for (auto word = foundWords.begin(), foundWordsEnd = word + foundWords.size(); word != foundWordsEnd; ++word) {
  292. auto wordIndex = word - _currentItemWords.begin();
  293. if (auto guard = UsedWordGuard(_currentItemWordsUsedMap, wordIndex)) {
  294. ++_currentItemWordsUsedCount;
  295. auto equalCharsCount = findEqualCharsCount(position, word);
  296. for (auto check = equalCharsCount; check != 0; --check) {
  297. if (matchQueryTailStartingFrom(position + check)) {
  298. return true;
  299. }
  300. }
  301. --_currentItemWordsUsedCount;
  302. }
  303. }
  304. return false;
  305. }
  306. int Completer::findEqualCharsCount(int position, const utf16string *word) {
  307. auto charsLeft = (_querySize - position);
  308. auto wordBegin = word->data();
  309. auto wordSize = word->size();
  310. auto possibleEqualCharsCount = (charsLeft > wordSize ? wordSize : charsLeft);
  311. for (auto equalTill = 1; equalTill != possibleEqualCharsCount; ++equalTill) {
  312. auto wordCh = *(wordBegin + equalTill);
  313. auto queryCh = *(_queryBegin + position + equalTill);
  314. if (wordCh != queryCh) {
  315. return equalTill;
  316. }
  317. }
  318. return possibleEqualCharsCount;
  319. }
  320. std::vector<Suggestion> Completer::prepareResult() {
  321. const auto firstCharOfQuery = _query[0];
  322. const auto reorder = [&](auto &&predicate) {
  323. std::stable_partition(
  324. std::begin(_result),
  325. std::end(_result),
  326. std::forward<decltype(predicate)>(predicate));
  327. };
  328. reorder([&](Result &result) {
  329. const auto firstCharAfterColon = result.replacement->replacement[1];
  330. return (firstCharAfterColon == firstCharOfQuery);
  331. });
  332. reorder([](Result &result) {
  333. return (result.wordsUsed < 2);
  334. });
  335. reorder([](Result &result) {
  336. return (result.wordsUsed < 3);
  337. });
  338. reorder([&](Result &result) {
  339. return isExactMatch(result.replacement->replacement);
  340. });
  341. auto result = std::vector<Suggestion>();
  342. result.reserve(_result.size());
  343. for (auto &item : _result) {
  344. result.emplace_back(
  345. item.replacement->emoji,
  346. item.replacement->replacement,
  347. item.replacement->replacement);
  348. }
  349. return result;
  350. }
  351. string_span Completer::findWordsStartingWith(utf16char ch) {
  352. auto begin = std::lower_bound(
  353. std::begin(_currentItemWords),
  354. std::end(_currentItemWords),
  355. ch,
  356. [](utf16string word, utf16char ch) { return word[0] < ch; });
  357. auto end = std::upper_bound(
  358. std::begin(_currentItemWords),
  359. std::end(_currentItemWords),
  360. ch,
  361. [](utf16char ch, utf16string word) { return ch < word[0]; });
  362. return _currentItemWords.subspan(
  363. begin - _currentItemWords.begin(),
  364. end - begin);
  365. }
  366. } // namespace
  367. std::vector<Suggestion> GetSuggestions(utf16string query) {
  368. return Completer(query).resolve();
  369. }
  370. int GetSuggestionMaxLength() {
  371. return internal::kReplacementMaxLength;
  372. }
  373. } // namespace Emoji
  374. } // namespace Ui