| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433 |
- // This file is part of Desktop App Toolkit,
- // a set of libraries for developing nice desktop applications.
- //
- // For license and copyright information please follow this link:
- // https://github.com/desktop-app/legal/blob/master/LEGAL
- //
- #include "emoji_suggestions.h"
- #include <algorithm>
- #include "emoji_suggestions_data.h"
- #ifndef Expects
- #include <cassert>
- #define Expects(condition) assert(condition)
- #endif // Expects
- namespace Ui {
- namespace Emoji {
- namespace internal {
- namespace {
- checksum Crc32Table[256];
- class Crc32Initializer {
- public:
- Crc32Initializer() {
- checksum poly = 0x04C11DB7U;
- for (auto i = 0; i != 256; ++i) {
- Crc32Table[i] = reflect(i, 8) << 24;
- for (auto j = 0; j != 8; ++j) {
- Crc32Table[i] = (Crc32Table[i] << 1) ^ (Crc32Table[i] & (1 << 31) ? poly : 0);
- }
- Crc32Table[i] = reflect(Crc32Table[i], 32);
- }
- }
- private:
- checksum reflect(checksum val, char ch) {
- checksum result = 0;
- for (int i = 1; i < (ch + 1); ++i) {
- if (val & 1) {
- result |= 1 << (ch - i);
- }
- val >>= 1;
- }
- return result;
- }
- };
- } // namespace
- checksum countChecksum(const void *data, std::size_t size) {
- static Crc32Initializer InitTable;
- auto buffer = static_cast<const unsigned char*>(data);
- auto result = checksum(0xFFFFFFFFU);
- for (auto i = std::size_t(0); i != size; ++i) {
- result = (result >> 8) ^ Crc32Table[(result & 0xFFU) ^ buffer[i]];
- }
- return (result ^ 0xFFFFFFFFU);
- }
- } // namespace internal
- namespace {
- class string_span {
- public:
- string_span() = default;
- string_span(const utf16string *data, std::size_t size) : begin_(data), size_(size) {
- }
- string_span(const std::vector<utf16string> &data) : begin_(data.data()), size_(data.size()) {
- }
- string_span(const string_span &other) = default;
- string_span &operator=(const string_span &other) = default;
- const utf16string *begin() const {
- return begin_;
- }
- const utf16string *end() const {
- return begin_ + size_;
- }
- std::size_t size() const {
- return size_;
- }
- string_span subspan(std::size_t offset, std::size_t size) {
- return string_span(begin_ + offset, size);
- }
- private:
- const utf16string *begin_ = nullptr;
- std::size_t size_ = 0;
- };
- bool IsNumber(utf16char ch) {
- return (ch >= '0' && ch <= '9');
- }
- bool IsLetterOrNumber(utf16char ch) {
- return (ch >= 'a' && ch <= 'z') || IsNumber(ch);
- }
- using Replacement = internal::Replacement;
- class Completer {
- public:
- Completer(utf16string query);
- std::vector<Suggestion> resolve();
- private:
- struct Result {
- const Replacement *replacement;
- int wordsUsed;
- };
- static std::vector<utf16char> NormalizeQuery(utf16string query);
- void addResult(const Replacement *replacement);
- bool isDuplicateOfLastResult(const Replacement *replacement) const;
- bool isBetterThanLastResult(const Replacement *replacement) const;
- void processInitialList();
- void filterInitialList();
- void initWordsTracking();
- bool matchQueryForCurrentItem();
- bool matchQueryTailStartingFrom(int position);
- string_span findWordsStartingWith(utf16char ch);
- int findEqualCharsCount(int position, const utf16string *word);
- std::vector<Suggestion> prepareResult();
- bool startsWithQuery(utf16string word);
- bool isExactMatch(utf16string replacement);
- std::vector<Result> _result;
- utf16string _initialQuery;
- const std::vector<utf16char> _query;
- const utf16char *_queryBegin = nullptr;
- int _querySize = 0;
- const std::vector<const Replacement*> *_initialList = nullptr;
- string_span _currentItemWords;
- int _currentItemWordsUsedCount = 0;
- class UsedWordGuard {
- public:
- UsedWordGuard(std::vector<small> &map, int index);
- UsedWordGuard(const UsedWordGuard &other) = delete;
- [[maybe_unused]] UsedWordGuard(UsedWordGuard &&other);
- UsedWordGuard &operator=(const UsedWordGuard &other) = delete;
- UsedWordGuard &operator=(UsedWordGuard &&other) = delete;
- explicit operator bool() const;
- ~UsedWordGuard();
- private:
- std::vector<small> &_map;
- int _index = 0;
- small _guarded = 0;
- };
- std::vector<small> _currentItemWordsUsedMap;
- };
- Completer::UsedWordGuard::UsedWordGuard(std::vector<small> &map, int index) : _map(map), _index(index) {
- Expects(_map.size() > _index);
- if (!_map[_index]) {
- _guarded = _map[_index] = 1;
- }
- }
- Completer::UsedWordGuard::UsedWordGuard(UsedWordGuard &&other) : _map(other._map), _index(other._index), _guarded(other._guarded) {
- other._guarded = 0;
- }
- Completer::UsedWordGuard::operator bool() const {
- return _guarded;
- }
- Completer::UsedWordGuard::~UsedWordGuard() {
- if (_guarded) {
- _map[_index] = 0;
- }
- }
- Completer::Completer(utf16string query) : _initialQuery(query), _query(NormalizeQuery(query)) {
- }
- // Remove all non-letters-or-numbers.
- // Leave '-' and '+' only if they're followed by a number or
- // at the end of the query (so it is possibly followed by a number).
- std::vector<utf16char> Completer::NormalizeQuery(utf16string query) {
- auto result = std::vector<utf16char>();
- result.reserve(query.size());
- auto copyFrom = query.data();
- auto e = copyFrom + query.size();
- auto copyTo = result.data();
- for (auto i = query.data(); i != e; ++i) {
- if (IsLetterOrNumber(*i)) {
- continue;
- } else if (*i == '-' || *i == '+') {
- if (i + 1 == e || IsNumber(*(i + 1))) {
- continue;
- }
- }
- if (i > copyFrom) {
- result.resize(result.size() + (i - copyFrom));
- memcpy(copyTo, copyFrom, (i - copyFrom) * sizeof(utf16char));
- copyTo += (i - copyFrom);
- }
- copyFrom = i + 1;
- }
- if (e > copyFrom) {
- result.resize(result.size() + (e - copyFrom));
- memcpy(copyTo, copyFrom, (e - copyFrom) * sizeof(utf16char));
- copyTo += (e - copyFrom);
- }
- return result;
- }
- std::vector<Suggestion> Completer::resolve() {
- _queryBegin = _query.data();
- _querySize = _query.size();
- if (!_querySize) {
- return std::vector<Suggestion>();
- }
- _initialList = internal::GetReplacements(*_queryBegin);
- if (!_initialList) {
- return std::vector<Suggestion>();
- }
- _result.reserve(_initialList->size());
- processInitialList();
- return prepareResult();
- }
- bool Completer::isDuplicateOfLastResult(const Replacement *item) const {
- if (_result.empty()) {
- return false;
- }
- return (_result.back().replacement->emoji == item->emoji);
- }
- bool Completer::isBetterThanLastResult(const Replacement *item) const {
- Expects(!_result.empty());
- auto &last = _result.back();
- if (_currentItemWordsUsedCount < last.wordsUsed) {
- return true;
- }
- auto firstCharOfQuery = _query[0];
- auto firstCharAfterColonLast = last.replacement->replacement[1];
- auto firstCharAfterColonCurrent = item->replacement[1];
- auto goodLast = (firstCharAfterColonLast == firstCharOfQuery);
- auto goodCurrent = (firstCharAfterColonCurrent == firstCharOfQuery);
- return !goodLast && goodCurrent;
- }
- void Completer::addResult(const Replacement *item) {
- if (!isDuplicateOfLastResult(item)) {
- _result.push_back({ item, _currentItemWordsUsedCount });
- } else if (isBetterThanLastResult(item)) {
- _result.back() = { item, _currentItemWordsUsedCount };
- }
- }
- void Completer::processInitialList() {
- if (_querySize > 1) {
- filterInitialList();
- } else {
- _currentItemWordsUsedCount = 1;
- for (auto item : *_initialList) {
- addResult(item);
- }
- }
- }
- void Completer::initWordsTracking() {
- auto maxWordsCount = 0;
- for (auto item : *_initialList) {
- auto wordsCount = item->words.size();
- if (maxWordsCount < wordsCount) {
- maxWordsCount = wordsCount;
- }
- }
- _currentItemWordsUsedMap = std::vector<small>(maxWordsCount, 0);
- }
- void Completer::filterInitialList() {
- initWordsTracking();
- for (auto item : *_initialList) {
- _currentItemWords = string_span(item->words);
- _currentItemWordsUsedCount = 1;
- if (matchQueryForCurrentItem()) {
- addResult(item);
- }
- _currentItemWordsUsedCount = 0;
- }
- }
- bool Completer::matchQueryForCurrentItem() {
- Expects(_currentItemWords.size() != 0);
- if (_currentItemWords.size() < 2) {
- return startsWithQuery(*_currentItemWords.begin());
- }
- return matchQueryTailStartingFrom(0);
- }
- bool Completer::startsWithQuery(utf16string word) {
- if (word.size() < _query.size()) {
- return false;
- }
- for (auto i = std::size_t(0), size = _query.size(); i != size; ++i) {
- if (word[i] != _query[i]) {
- return false;
- }
- }
- return true;
- }
- bool Completer::isExactMatch(utf16string replacement) {
- if (replacement.size() != _initialQuery.size() + 1) {
- return false;
- }
- for (auto i = std::size_t(0), size = _initialQuery.size(); i != size; ++i) {
- if (replacement[i] != _initialQuery[i]) {
- return false;
- }
- }
- return true;
- }
- bool Completer::matchQueryTailStartingFrom(int position) {
- auto charsLeftToMatch = (_querySize - position);
- if (!charsLeftToMatch) {
- return true;
- }
- auto firstCharToMatch = *(_queryBegin + position);
- auto foundWords = findWordsStartingWith(firstCharToMatch);
- for (auto word = foundWords.begin(), foundWordsEnd = word + foundWords.size(); word != foundWordsEnd; ++word) {
- auto wordIndex = word - _currentItemWords.begin();
- if (auto guard = UsedWordGuard(_currentItemWordsUsedMap, wordIndex)) {
- ++_currentItemWordsUsedCount;
- auto equalCharsCount = findEqualCharsCount(position, word);
- for (auto check = equalCharsCount; check != 0; --check) {
- if (matchQueryTailStartingFrom(position + check)) {
- return true;
- }
- }
- --_currentItemWordsUsedCount;
- }
- }
- return false;
- }
- int Completer::findEqualCharsCount(int position, const utf16string *word) {
- auto charsLeft = (_querySize - position);
- auto wordBegin = word->data();
- auto wordSize = word->size();
- auto possibleEqualCharsCount = (charsLeft > wordSize ? wordSize : charsLeft);
- for (auto equalTill = 1; equalTill != possibleEqualCharsCount; ++equalTill) {
- auto wordCh = *(wordBegin + equalTill);
- auto queryCh = *(_queryBegin + position + equalTill);
- if (wordCh != queryCh) {
- return equalTill;
- }
- }
- return possibleEqualCharsCount;
- }
- std::vector<Suggestion> Completer::prepareResult() {
- const auto firstCharOfQuery = _query[0];
- const auto reorder = [&](auto &&predicate) {
- std::stable_partition(
- std::begin(_result),
- std::end(_result),
- std::forward<decltype(predicate)>(predicate));
- };
- reorder([&](Result &result) {
- const auto firstCharAfterColon = result.replacement->replacement[1];
- return (firstCharAfterColon == firstCharOfQuery);
- });
- reorder([](Result &result) {
- return (result.wordsUsed < 2);
- });
- reorder([](Result &result) {
- return (result.wordsUsed < 3);
- });
- reorder([&](Result &result) {
- return isExactMatch(result.replacement->replacement);
- });
- auto result = std::vector<Suggestion>();
- result.reserve(_result.size());
- for (auto &item : _result) {
- result.emplace_back(
- item.replacement->emoji,
- item.replacement->replacement,
- item.replacement->replacement);
- }
- return result;
- }
- string_span Completer::findWordsStartingWith(utf16char ch) {
- auto begin = std::lower_bound(
- std::begin(_currentItemWords),
- std::end(_currentItemWords),
- ch,
- [](utf16string word, utf16char ch) { return word[0] < ch; });
- auto end = std::upper_bound(
- std::begin(_currentItemWords),
- std::end(_currentItemWords),
- ch,
- [](utf16char ch, utf16string word) { return ch < word[0]; });
- return _currentItemWords.subspan(
- begin - _currentItemWords.begin(),
- end - begin);
- }
- } // namespace
- std::vector<Suggestion> GetSuggestions(utf16string query) {
- return Completer(query).resolve();
- }
- int GetSuggestionMaxLength() {
- return internal::kReplacementMaxLength;
- }
- } // namespace Emoji
- } // namespace Ui
|