| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640 |
- // This file is part of Desktop App Toolkit,
- // a set of libraries for developing nice desktop applications.
- //
- // For license and copyright information please follow this link:
- // https://github.com/desktop-app/legal/blob/master/LEGAL
- //
- #include "spellcheck/third_party/hunspell_controller.h"
- #include "spellcheck/spellcheck_value.h"
- #include <mutex>
- #include <shared_mutex>
- #include <QDir>
- #include <QFileInfo>
- #include <hunspell/hunspell.hxx>
- #if __has_include(<glib/glib.hpp>)
- #include <glib/glib.hpp>
- using namespace gi::repository;
- #elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
- #include <QTextCodec>
- #endif // Qt < 6.0.0
- namespace Platform::Spellchecker::ThirdParty {
- namespace {
- using WordsMap = std::map<QChar::Script, std::vector<QString>>;
- // Maximum number of words in the custom spellcheck dictionary.
- constexpr auto kMaxSyncableDictionaryWords = 1300;
- constexpr auto kTimeLimitSuggestion = crl::time(1000);
- #ifdef Q_OS_WIN
- const auto kLineBreak = QByteArrayLiteral("\r\n");
- #else // Q_OS_WIN
- const auto kLineBreak = QByteArrayLiteral("\n");
- #endif // Q_OS_WIN
- struct PathPair {
- QByteArray aff;
- QByteArray dic;
- };
- [[nodiscard]] PathPair PreparePaths(const QString &aff, const QString &dic) {
- const auto convert = [&](const QString &path) {
- const auto result = QDir::toNativeSeparators(path).toUtf8();
- #ifdef Q_OS_WIN
- return "\\\\?\\" + result;
- #else // Q_OS_WIN
- return result;
- #endif // !Q_OS_WIN
- };
- return {
- .aff = convert(aff),
- .dic = convert(dic),
- };
- }
- auto LocaleNameFromLangId(int langId) {
- return ::Spellchecker::LocaleFromLangId(langId).name();
- }
- QString CustomDictionaryPath() {
- return QStringLiteral("%1/%2").arg(
- ::Spellchecker::WorkingDirPath(),
- "custom");
- }
- [[nodiscard]] Hunspell LoadUtfInitializer() {
- const auto full = [&](const QString &name) {
- return ::Spellchecker::WorkingDirPath() + '/' + name;
- };
- const auto aff = full(u"utf_helper.aff"_q);
- const auto dic = full(u"utf_helper.dic"_q);
- if (!QFile::exists(aff)) {
- QDir().mkpath(::Spellchecker::WorkingDirPath());
- auto f = QFile(aff);
- if (f.open(QIODevice::WriteOnly)) {
- f.write("SET UTF-8" + kLineBreak);
- }
- }
- if (!QFile::exists(dic)) {
- auto f = QFile(dic);
- if (f.open(QIODevice::WriteOnly)) {
- f.write("1" + kLineBreak + "Zzz" + kLineBreak);
- }
- }
- const auto prepared = PreparePaths(aff, dic);
- return Hunspell(prepared.aff.constData(), prepared.dic.constData());
- }
- class CharsetConverter final {
- public:
- CharsetConverter(const std::string &charset)
- #if __has_include(<glib/glib.hpp>)
- : _charset(charset)
- #elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
- : _codec(QTextCodec::codecForName(charset.c_str()))
- #endif // Qt < 6.0.0
- {}
- [[nodiscard]] bool isValid() const {
- #if __has_include(<glib/glib.hpp>)
- const uchar empty[] = "";
- return GLib::convert(empty, 0, _charset, "UTF-8")
- && GLib::convert(empty, 0, "UTF-8", _charset);
- #elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
- return _codec;
- #else // Qt < 6.0.0
- return false;
- #endif // Qt >= 6.0.0 && !__has_include(<glib/glib.hpp>)
- }
- [[nodiscard]] std::string fromUnicode(const QString &data) {
- #if __has_include(<glib/glib.hpp>)
- const auto utf8 = data.toStdString();
- return GLib::convert(
- reinterpret_cast<const uchar*>(utf8.data()),
- utf8.size(),
- _charset,
- "UTF-8",
- nullptr,
- nullptr) | ranges::to<std::string>;
- #elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
- return _codec->fromUnicode(data).toStdString();
- #else // Qt < 6.0.0
- return {};
- #endif // Qt >= 6.0.0 && !__has_include(<glib/glib.hpp>)
- }
- [[nodiscard]] QString toUnicode(const std::string &data) {
- #if __has_include(<glib/glib.hpp>)
- return QString::fromStdString(GLib::convert(
- reinterpret_cast<const uchar*>(data.data()),
- data.size(),
- "UTF-8",
- _charset,
- nullptr,
- nullptr) | ranges::to<std::string>);
- #elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
- return _codec->toUnicode(data.data(), data.size());
- #else // Qt < 6.0.0
- return {};
- #endif // Qt >= 6.0.0 && !__has_include(<glib/glib.hpp>)
- }
- private:
- #if __has_include(<glib/glib.hpp>)
- std::string _charset;
- #elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
- QTextCodec *_codec;
- #endif // Qt < 6.0.0
- };
- class HunspellEngine {
- public:
- HunspellEngine(const QString &lang);
- ~HunspellEngine() = default;
- bool isValid() const;
- bool spell(const QString &word) const;
- void suggest(
- const QString &wrongWord,
- std::vector<QString> *optionalSuggestions);
- QString lang();
- QChar::Script script();
- HunspellEngine(const HunspellEngine &) = delete;
- HunspellEngine &operator=(const HunspellEngine &) = delete;
- private:
- QString _lang;
- QChar::Script _script;
- std::unique_ptr<Hunspell> _hunspell;
- std::unique_ptr<CharsetConverter> _converter;
- };
- class HunspellService {
- public:
- HunspellService();
- ~HunspellService();
- void updateLanguages(std::vector<QString> langs);
- std::vector<QString> activeLanguages();
- [[nodiscard]] bool checkSpelling(const QString &wordToCheck);
- void fillSuggestionList(
- const QString &wrongWord,
- std::vector<QString> *optionalSuggestions);
- void addWord(const QString &word);
- void removeWord(const QString &word);
- void ignoreWord(const QString &word);
- bool isWordInDictionary(const QString &word);
- private:
- void writeToFile();
- void readFile();
- std::vector<QString> &addedWords(const QString &word);
- std::shared_ptr<std::vector<std::unique_ptr<HunspellEngine>>> _engines;
- std::vector<QString> _activeLanguages;
- // Use an empty Hunspell dictionary to fill it with our remembered words
- // for getting suggests.
- std::unique_ptr<Hunspell> _customDict;
- WordsMap _ignoredWords;
- WordsMap _addedWords;
- std::shared_ptr<std::atomic<int>> _epoch;
- std::atomic<int> _suggestionsEpoch = 0;
- std::shared_ptr<std::shared_mutex> _engineMutex;
- };
- HunspellEngine::HunspellEngine(const QString &lang)
- : _lang(lang)
- , _script(::Spellchecker::LocaleToScriptCode(lang)) {
- const auto workingDir = ::Spellchecker::WorkingDirPath();
- if (workingDir.isEmpty()) {
- return;
- }
- const auto rawPath = QString("%1/%2/%2").arg(workingDir, lang);
- const auto affPath = rawPath + ".aff";
- const auto dicPath = rawPath + ".dic";
- if (!QFileInfo(affPath).isFile() || !QFileInfo(dicPath).isFile()) {
- return;
- }
- const auto prepared = PreparePaths(affPath, dicPath);
- _hunspell = std::make_unique<Hunspell>(
- prepared.aff.constData(),
- prepared.dic.constData());
- _converter = std::make_unique<CharsetConverter>(
- _hunspell->get_dic_encoding());
- if (!_converter->isValid()) {
- _hunspell.reset();
- }
- }
- bool HunspellEngine::isValid() const {
- return _hunspell != nullptr;
- }
- bool HunspellEngine::spell(const QString &word) const {
- return _hunspell->spell(_converter->fromUnicode(word));
- }
- void HunspellEngine::suggest(
- const QString &wrongWord,
- std::vector<QString> *optionalSuggestions) {
- const auto stdWord = _converter->fromUnicode(wrongWord);
- for (const auto &guess : _hunspell->suggest(stdWord)) {
- if (optionalSuggestions->size() == kMaxSuggestions) {
- return;
- }
- const auto qguess = _converter->toUnicode(guess);
- if (ranges::contains(*optionalSuggestions, qguess)) {
- continue;
- }
- optionalSuggestions->push_back(qguess);
- }
- }
- QString HunspellEngine::lang() {
- return _lang;
- }
- QChar::Script HunspellEngine::script() {
- return _script;
- }
- std::vector<QString> HunspellService::activeLanguages() {
- return _activeLanguages;
- }
- // Thread: Any.
- HunspellService::HunspellService()
- : _engines(std::make_shared<std::vector<std::unique_ptr<HunspellEngine>>>())
- , _customDict(std::make_unique<Hunspell>("", ""))
- , _epoch(std::make_shared<std::atomic<int>>(0))
- , _engineMutex(std::make_shared<std::shared_mutex>()) {
- // This is not perfectly safe, but should be mostly fine.
- static const auto UtfInitializer = LoadUtfInitializer();
- readFile();
- }
- // Thread: Main.
- HunspellService::~HunspellService() {
- std::unique_lock lock(*_engineMutex);
- }
- // Thread: Main.
- std::vector<QString> &HunspellService::addedWords(const QString &word) {
- return _addedWords[::Spellchecker::WordScript(word)];
- }
- // Thread: Main.
- void HunspellService::updateLanguages(std::vector<QString> langs) {
- Expects(_suggestionsEpoch.load() == 0);
- *_epoch += 1;
- _activeLanguages.clear();
- const auto savedEpoch = _epoch.get()->load();
- crl::async([=,
- epoch = _epoch,
- engineMutex = _engineMutex,
- engines = _engines] {
- using UniqueEngine = std::unique_ptr<HunspellEngine>;
- const auto engineLangFilter = [&](const UniqueEngine &engine) {
- return engine ? ranges::contains(langs, engine->lang()) : false;
- };
- if (savedEpoch != epoch.get()->load()) {
- return;
- }
- const auto engineLang = [](const UniqueEngine &engine) {
- return engine ? engine->lang() : QString();
- };
- const auto missedLangs = [&] {
- std::shared_lock lock(*engineMutex);
- return ranges::views::all(
- langs
- ) | ranges::views::filter([&](auto &lang) {
- return !ranges::contains(*engines, lang, engineLang);
- }) | ranges::to_vector;
- }();
- // Added new enabled engines.
- auto localEngines = ranges::views::all(
- missedLangs
- ) | ranges::views::transform([&](auto &lang) -> UniqueEngine {
- if (savedEpoch != epoch.get()->load()) {
- return nullptr;
- }
- auto engine = std::make_unique<HunspellEngine>(lang);
- if (!engine->isValid()) {
- return nullptr;
- }
- return engine;
- }) | ranges::to_vector;
- if (savedEpoch != epoch.get()->load()) {
- return;
- }
- {
- std::unique_lock lock(*engineMutex);
- *engines = ranges::views::concat(
- *engines, localEngines
- ) | ranges::views::filter(
- // All filtered objects will be automatically released.
- engineLangFilter
- ) | ranges::views::transform([](auto &engine) {
- return std::move(engine);
- }) | ranges::to_vector;
- }
- crl::on_main([=] {
- if (savedEpoch != epoch.get()->load()) {
- return;
- }
- *epoch = 0;
- _activeLanguages = ranges::views::all(
- *engines
- ) | ranges::views::transform(&HunspellEngine::lang)
- | ranges::to_vector;
- ::Spellchecker::UpdateSupportedScripts(_activeLanguages);
- });
- });
- }
- // Thread: Any.
- bool HunspellService::checkSpelling(const QString &wordToCheck) {
- const auto wordScript = ::Spellchecker::WordScript(wordToCheck);
- if (ranges::contains(_ignoredWords[wordScript], wordToCheck)) {
- return true;
- }
- if (ranges::contains(_addedWords[wordScript], wordToCheck)) {
- return true;
- }
- std::shared_lock lock(*_engineMutex);
- for (const auto &engine : *_engines) {
- if (wordScript != engine->script()) {
- continue;
- }
- if (engine->spell(wordToCheck)) {
- return true;
- }
- }
- return false;
- }
- // Thread: Any.
- void HunspellService::fillSuggestionList(
- const QString &wrongWord,
- std::vector<QString> *optionalSuggestions) {
- const auto wordScript = ::Spellchecker::WordScript(wrongWord);
- const auto customGuesses = _customDict->suggest(wrongWord.toStdString());
- *optionalSuggestions = ranges::views::all(
- customGuesses
- ) | ranges::views::take(
- kMaxSuggestions
- ) | ranges::views::transform([](auto &guess) {
- return QString::fromStdString(guess);
- }) | ranges::to_vector;
- const auto startTime = crl::now();
- _suggestionsEpoch++;
- const auto savedEpoch = _suggestionsEpoch.load();
- {
- std::shared_lock lock(*_engineMutex);
- for (const auto &engine : *_engines) {
- if (_suggestionsEpoch.load() > savedEpoch) {
- // There is a newer request to fill suggestion list,
- // So we should drop the current one.
- optionalSuggestions->clear();
- break;
- }
- if (optionalSuggestions->size() == kMaxSuggestions
- || ((crl::now() - startTime) > kTimeLimitSuggestion)) {
- break;
- }
- if (wordScript != engine->script()) {
- continue;
- }
- engine->suggest(wrongWord, optionalSuggestions);
- }
- }
- _suggestionsEpoch--;
- }
- // Thread: Main.
- void HunspellService::ignoreWord(const QString &word) {
- const auto wordScript = ::Spellchecker::WordScript(word);
- _customDict->add(word.toStdString());
- _ignoredWords[wordScript].push_back(word);
- }
- // Thread: Main.
- bool HunspellService::isWordInDictionary(const QString &word) {
- return ranges::contains(addedWords(word), word);
- }
- // Thread: Main.
- void HunspellService::addWord(const QString &word) {
- const auto count = ranges::accumulate(
- ranges::views::values(_addedWords),
- 0,
- ranges::plus(),
- &std::vector<QString>::size);
- if (count > kMaxSyncableDictionaryWords) {
- return;
- }
- _customDict->add(word.toStdString());
- addedWords(word).push_back(word);
- writeToFile();
- }
- // Thread: Main.
- void HunspellService::removeWord(const QString &word) {
- _customDict->remove(word.toStdString());
- auto &vector = addedWords(word);
- vector.erase(ranges::remove(vector, word), end(vector));
- writeToFile();
- }
- // Thread: Main.
- void HunspellService::writeToFile() {
- auto f = QFile(CustomDictionaryPath());
- if (!f.open(QIODevice::WriteOnly)) {
- return;
- }
- auto &&temp = ranges::views::join(
- ranges::views::values(_addedWords)
- ) | ranges::views::transform([&](auto &str) {
- return str + kLineBreak;
- });
- const auto result = ranges::accumulate(std::move(temp), QString{});
- f.write(result.toUtf8());
- f.close();
- }
- // Thread: Main.
- void HunspellService::readFile() {
- using namespace ::Spellchecker;
- auto f = QFile(CustomDictionaryPath());
- if (const auto info = QFileInfo(f);
- !info.isFile()
- || (info.size() > 100 * 1024)
- || !f.open(QIODevice::ReadOnly)) {
- if (info.isDir()) {
- QDir(info.path()).removeRecursively();
- }
- return;
- }
- const auto data = f.readAll();
- f.close();
- if (data.isEmpty()) {
- return;
- }
- // {"a", "1", "β"};
- auto splitedWords = QString::fromUtf8(data).split(kLineBreak)
- | ranges::to_vector
- | ranges::actions::sort
- | ranges::actions::unique;
- auto filteredWords = (
- splitedWords
- ) | ranges::views::filter([](auto &word) {
- // Ignore words with mixed scripts or non-words characters.
- return !word.isEmpty() && !IsWordSkippable(word, false);
- }) | ranges::views::take(
- kMaxSyncableDictionaryWords
- ) | ranges::views::transform([](auto &word) {
- return std::move(word);
- }) | ranges::to_vector;
- ranges::for_each(filteredWords, [&](auto &word) {
- _customDict->add(word.toStdString());
- });
- // {{"a"}, {"β"}};
- auto groupedWords = ranges::views::all(
- filteredWords
- ) | ranges::views::chunk_by([](auto &a, auto &b) {
- return WordScript(a) == WordScript(b);
- }) | ranges::views::transform([](auto &&rng) {
- return rng | ranges::to_vector;
- }) | ranges::to_vector;
- // {QChar::Script_Latin, QChar::Script_Greek};
- auto scripts = ranges::views::all(
- groupedWords
- ) | ranges::views::transform([](auto &vector) {
- return WordScript(vector.front());
- }) | ranges::to_vector;
- // {QChar::Script_Latin : {"a"}, QChar::Script_Greek : {"β"}};
- auto &&zip = ranges::views::zip(
- scripts, groupedWords
- );
- _addedWords = zip | ranges::to<WordsMap>();
- }
- ////// End of HunspellService class.
- HunspellService &SharedSpellChecker() {
- static auto spellchecker = HunspellService();
- return spellchecker;
- }
- } // namespace
- bool CheckSpelling(const QString &wordToCheck) {
- return SharedSpellChecker().checkSpelling(wordToCheck);
- }
- void FillSuggestionList(
- const QString &wrongWord,
- std::vector<QString> *optionalSuggestions) {
- SharedSpellChecker().fillSuggestionList(wrongWord, optionalSuggestions);
- }
- void AddWord(const QString &word) {
- SharedSpellChecker().addWord(word);
- }
- void RemoveWord(const QString &word) {
- SharedSpellChecker().removeWord(word);
- }
- void IgnoreWord(const QString &word) {
- SharedSpellChecker().ignoreWord(word);
- }
- bool IsWordInDictionary(const QString &wordToCheck) {
- return SharedSpellChecker().isWordInDictionary(wordToCheck);
- }
- void UpdateLanguages(std::vector<int> languages) {
- const auto languageCodes = ranges::views::all(
- languages
- ) | ranges::views::transform(
- LocaleNameFromLangId
- ) | ranges::to_vector;
- ::Spellchecker::UpdateSupportedScripts(std::vector<QString>());
- SharedSpellChecker().updateLanguages(languageCodes);
- }
- std::vector<QString> ActiveLanguages() {
- return SharedSpellChecker().activeLanguages();
- }
- void CheckSpellingText(
- const QString &text,
- MisspelledWords *misspelledWords) {
- *misspelledWords = ::Spellchecker::RangesFromText(
- text,
- [](const QString &word) {
- return !::Spellchecker::IsWordSkippable(word)
- && CheckSpelling(word);
- });
- }
- } // namespace Platform::Spellchecker::ThirdParty
|