hunspell_controller.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. // This file is part of Desktop App Toolkit,
  2. // a set of libraries for developing nice desktop applications.
  3. //
  4. // For license and copyright information please follow this link:
  5. // https://github.com/desktop-app/legal/blob/master/LEGAL
  6. //
  7. #include "spellcheck/third_party/hunspell_controller.h"
  8. #include "spellcheck/spellcheck_value.h"
  9. #include <mutex>
  10. #include <shared_mutex>
  11. #include <QDir>
  12. #include <QFileInfo>
  13. #include <hunspell/hunspell.hxx>
  14. #if __has_include(<glib/glib.hpp>)
  15. #include <glib/glib.hpp>
  16. using namespace gi::repository;
  17. #elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
  18. #include <QTextCodec>
  19. #endif // Qt < 6.0.0
  20. namespace Platform::Spellchecker::ThirdParty {
  21. namespace {
  22. using WordsMap = std::map<QChar::Script, std::vector<QString>>;
  23. // Maximum number of words in the custom spellcheck dictionary.
  24. constexpr auto kMaxSyncableDictionaryWords = 1300;
  25. constexpr auto kTimeLimitSuggestion = crl::time(1000);
  26. #ifdef Q_OS_WIN
  27. const auto kLineBreak = QByteArrayLiteral("\r\n");
  28. #else // Q_OS_WIN
  29. const auto kLineBreak = QByteArrayLiteral("\n");
  30. #endif // Q_OS_WIN
  31. struct PathPair {
  32. QByteArray aff;
  33. QByteArray dic;
  34. };
  35. [[nodiscard]] PathPair PreparePaths(const QString &aff, const QString &dic) {
  36. const auto convert = [&](const QString &path) {
  37. const auto result = QDir::toNativeSeparators(path).toUtf8();
  38. #ifdef Q_OS_WIN
  39. return "\\\\?\\" + result;
  40. #else // Q_OS_WIN
  41. return result;
  42. #endif // !Q_OS_WIN
  43. };
  44. return {
  45. .aff = convert(aff),
  46. .dic = convert(dic),
  47. };
  48. }
  49. auto LocaleNameFromLangId(int langId) {
  50. return ::Spellchecker::LocaleFromLangId(langId).name();
  51. }
  52. QString CustomDictionaryPath() {
  53. return QStringLiteral("%1/%2").arg(
  54. ::Spellchecker::WorkingDirPath(),
  55. "custom");
  56. }
  57. [[nodiscard]] Hunspell LoadUtfInitializer() {
  58. const auto full = [&](const QString &name) {
  59. return ::Spellchecker::WorkingDirPath() + '/' + name;
  60. };
  61. const auto aff = full(u"utf_helper.aff"_q);
  62. const auto dic = full(u"utf_helper.dic"_q);
  63. if (!QFile::exists(aff)) {
  64. QDir().mkpath(::Spellchecker::WorkingDirPath());
  65. auto f = QFile(aff);
  66. if (f.open(QIODevice::WriteOnly)) {
  67. f.write("SET UTF-8" + kLineBreak);
  68. }
  69. }
  70. if (!QFile::exists(dic)) {
  71. auto f = QFile(dic);
  72. if (f.open(QIODevice::WriteOnly)) {
  73. f.write("1" + kLineBreak + "Zzz" + kLineBreak);
  74. }
  75. }
  76. const auto prepared = PreparePaths(aff, dic);
  77. return Hunspell(prepared.aff.constData(), prepared.dic.constData());
  78. }
  79. class CharsetConverter final {
  80. public:
  81. CharsetConverter(const std::string &charset)
  82. #if __has_include(<glib/glib.hpp>)
  83. : _charset(charset)
  84. #elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
  85. : _codec(QTextCodec::codecForName(charset.c_str()))
  86. #endif // Qt < 6.0.0
  87. {}
  88. [[nodiscard]] bool isValid() const {
  89. #if __has_include(<glib/glib.hpp>)
  90. const uchar empty[] = "";
  91. return GLib::convert(empty, 0, _charset, "UTF-8")
  92. && GLib::convert(empty, 0, "UTF-8", _charset);
  93. #elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
  94. return _codec;
  95. #else // Qt < 6.0.0
  96. return false;
  97. #endif // Qt >= 6.0.0 && !__has_include(<glib/glib.hpp>)
  98. }
  99. [[nodiscard]] std::string fromUnicode(const QString &data) {
  100. #if __has_include(<glib/glib.hpp>)
  101. const auto utf8 = data.toStdString();
  102. return GLib::convert(
  103. reinterpret_cast<const uchar*>(utf8.data()),
  104. utf8.size(),
  105. _charset,
  106. "UTF-8",
  107. nullptr,
  108. nullptr) | ranges::to<std::string>;
  109. #elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
  110. return _codec->fromUnicode(data).toStdString();
  111. #else // Qt < 6.0.0
  112. return {};
  113. #endif // Qt >= 6.0.0 && !__has_include(<glib/glib.hpp>)
  114. }
  115. [[nodiscard]] QString toUnicode(const std::string &data) {
  116. #if __has_include(<glib/glib.hpp>)
  117. return QString::fromStdString(GLib::convert(
  118. reinterpret_cast<const uchar*>(data.data()),
  119. data.size(),
  120. "UTF-8",
  121. _charset,
  122. nullptr,
  123. nullptr) | ranges::to<std::string>);
  124. #elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
  125. return _codec->toUnicode(data.data(), data.size());
  126. #else // Qt < 6.0.0
  127. return {};
  128. #endif // Qt >= 6.0.0 && !__has_include(<glib/glib.hpp>)
  129. }
  130. private:
  131. #if __has_include(<glib/glib.hpp>)
  132. std::string _charset;
  133. #elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
  134. QTextCodec *_codec;
  135. #endif // Qt < 6.0.0
  136. };
  137. class HunspellEngine {
  138. public:
  139. HunspellEngine(const QString &lang);
  140. ~HunspellEngine() = default;
  141. bool isValid() const;
  142. bool spell(const QString &word) const;
  143. void suggest(
  144. const QString &wrongWord,
  145. std::vector<QString> *optionalSuggestions);
  146. QString lang();
  147. QChar::Script script();
  148. HunspellEngine(const HunspellEngine &) = delete;
  149. HunspellEngine &operator=(const HunspellEngine &) = delete;
  150. private:
  151. QString _lang;
  152. QChar::Script _script;
  153. std::unique_ptr<Hunspell> _hunspell;
  154. std::unique_ptr<CharsetConverter> _converter;
  155. };
  156. class HunspellService {
  157. public:
  158. HunspellService();
  159. ~HunspellService();
  160. void updateLanguages(std::vector<QString> langs);
  161. std::vector<QString> activeLanguages();
  162. [[nodiscard]] bool checkSpelling(const QString &wordToCheck);
  163. void fillSuggestionList(
  164. const QString &wrongWord,
  165. std::vector<QString> *optionalSuggestions);
  166. void addWord(const QString &word);
  167. void removeWord(const QString &word);
  168. void ignoreWord(const QString &word);
  169. bool isWordInDictionary(const QString &word);
  170. private:
  171. void writeToFile();
  172. void readFile();
  173. std::vector<QString> &addedWords(const QString &word);
  174. std::shared_ptr<std::vector<std::unique_ptr<HunspellEngine>>> _engines;
  175. std::vector<QString> _activeLanguages;
  176. // Use an empty Hunspell dictionary to fill it with our remembered words
  177. // for getting suggests.
  178. std::unique_ptr<Hunspell> _customDict;
  179. WordsMap _ignoredWords;
  180. WordsMap _addedWords;
  181. std::shared_ptr<std::atomic<int>> _epoch;
  182. std::atomic<int> _suggestionsEpoch = 0;
  183. std::shared_ptr<std::shared_mutex> _engineMutex;
  184. };
  185. HunspellEngine::HunspellEngine(const QString &lang)
  186. : _lang(lang)
  187. , _script(::Spellchecker::LocaleToScriptCode(lang)) {
  188. const auto workingDir = ::Spellchecker::WorkingDirPath();
  189. if (workingDir.isEmpty()) {
  190. return;
  191. }
  192. const auto rawPath = QString("%1/%2/%2").arg(workingDir, lang);
  193. const auto affPath = rawPath + ".aff";
  194. const auto dicPath = rawPath + ".dic";
  195. if (!QFileInfo(affPath).isFile() || !QFileInfo(dicPath).isFile()) {
  196. return;
  197. }
  198. const auto prepared = PreparePaths(affPath, dicPath);
  199. _hunspell = std::make_unique<Hunspell>(
  200. prepared.aff.constData(),
  201. prepared.dic.constData());
  202. _converter = std::make_unique<CharsetConverter>(
  203. _hunspell->get_dic_encoding());
  204. if (!_converter->isValid()) {
  205. _hunspell.reset();
  206. }
  207. }
  208. bool HunspellEngine::isValid() const {
  209. return _hunspell != nullptr;
  210. }
  211. bool HunspellEngine::spell(const QString &word) const {
  212. return _hunspell->spell(_converter->fromUnicode(word));
  213. }
  214. void HunspellEngine::suggest(
  215. const QString &wrongWord,
  216. std::vector<QString> *optionalSuggestions) {
  217. const auto stdWord = _converter->fromUnicode(wrongWord);
  218. for (const auto &guess : _hunspell->suggest(stdWord)) {
  219. if (optionalSuggestions->size() == kMaxSuggestions) {
  220. return;
  221. }
  222. const auto qguess = _converter->toUnicode(guess);
  223. if (ranges::contains(*optionalSuggestions, qguess)) {
  224. continue;
  225. }
  226. optionalSuggestions->push_back(qguess);
  227. }
  228. }
  229. QString HunspellEngine::lang() {
  230. return _lang;
  231. }
  232. QChar::Script HunspellEngine::script() {
  233. return _script;
  234. }
  235. std::vector<QString> HunspellService::activeLanguages() {
  236. return _activeLanguages;
  237. }
  238. // Thread: Any.
  239. HunspellService::HunspellService()
  240. : _engines(std::make_shared<std::vector<std::unique_ptr<HunspellEngine>>>())
  241. , _customDict(std::make_unique<Hunspell>("", ""))
  242. , _epoch(std::make_shared<std::atomic<int>>(0))
  243. , _engineMutex(std::make_shared<std::shared_mutex>()) {
  244. // This is not perfectly safe, but should be mostly fine.
  245. static const auto UtfInitializer = LoadUtfInitializer();
  246. readFile();
  247. }
  248. // Thread: Main.
  249. HunspellService::~HunspellService() {
  250. std::unique_lock lock(*_engineMutex);
  251. }
  252. // Thread: Main.
  253. std::vector<QString> &HunspellService::addedWords(const QString &word) {
  254. return _addedWords[::Spellchecker::WordScript(word)];
  255. }
  256. // Thread: Main.
  257. void HunspellService::updateLanguages(std::vector<QString> langs) {
  258. Expects(_suggestionsEpoch.load() == 0);
  259. *_epoch += 1;
  260. _activeLanguages.clear();
  261. const auto savedEpoch = _epoch.get()->load();
  262. crl::async([=,
  263. epoch = _epoch,
  264. engineMutex = _engineMutex,
  265. engines = _engines] {
  266. using UniqueEngine = std::unique_ptr<HunspellEngine>;
  267. const auto engineLangFilter = [&](const UniqueEngine &engine) {
  268. return engine ? ranges::contains(langs, engine->lang()) : false;
  269. };
  270. if (savedEpoch != epoch.get()->load()) {
  271. return;
  272. }
  273. const auto engineLang = [](const UniqueEngine &engine) {
  274. return engine ? engine->lang() : QString();
  275. };
  276. const auto missedLangs = [&] {
  277. std::shared_lock lock(*engineMutex);
  278. return ranges::views::all(
  279. langs
  280. ) | ranges::views::filter([&](auto &lang) {
  281. return !ranges::contains(*engines, lang, engineLang);
  282. }) | ranges::to_vector;
  283. }();
  284. // Added new enabled engines.
  285. auto localEngines = ranges::views::all(
  286. missedLangs
  287. ) | ranges::views::transform([&](auto &lang) -> UniqueEngine {
  288. if (savedEpoch != epoch.get()->load()) {
  289. return nullptr;
  290. }
  291. auto engine = std::make_unique<HunspellEngine>(lang);
  292. if (!engine->isValid()) {
  293. return nullptr;
  294. }
  295. return engine;
  296. }) | ranges::to_vector;
  297. if (savedEpoch != epoch.get()->load()) {
  298. return;
  299. }
  300. {
  301. std::unique_lock lock(*engineMutex);
  302. *engines = ranges::views::concat(
  303. *engines, localEngines
  304. ) | ranges::views::filter(
  305. // All filtered objects will be automatically released.
  306. engineLangFilter
  307. ) | ranges::views::transform([](auto &engine) {
  308. return std::move(engine);
  309. }) | ranges::to_vector;
  310. }
  311. crl::on_main([=] {
  312. if (savedEpoch != epoch.get()->load()) {
  313. return;
  314. }
  315. *epoch = 0;
  316. _activeLanguages = ranges::views::all(
  317. *engines
  318. ) | ranges::views::transform(&HunspellEngine::lang)
  319. | ranges::to_vector;
  320. ::Spellchecker::UpdateSupportedScripts(_activeLanguages);
  321. });
  322. });
  323. }
  324. // Thread: Any.
  325. bool HunspellService::checkSpelling(const QString &wordToCheck) {
  326. const auto wordScript = ::Spellchecker::WordScript(wordToCheck);
  327. if (ranges::contains(_ignoredWords[wordScript], wordToCheck)) {
  328. return true;
  329. }
  330. if (ranges::contains(_addedWords[wordScript], wordToCheck)) {
  331. return true;
  332. }
  333. std::shared_lock lock(*_engineMutex);
  334. for (const auto &engine : *_engines) {
  335. if (wordScript != engine->script()) {
  336. continue;
  337. }
  338. if (engine->spell(wordToCheck)) {
  339. return true;
  340. }
  341. }
  342. return false;
  343. }
  344. // Thread: Any.
  345. void HunspellService::fillSuggestionList(
  346. const QString &wrongWord,
  347. std::vector<QString> *optionalSuggestions) {
  348. const auto wordScript = ::Spellchecker::WordScript(wrongWord);
  349. const auto customGuesses = _customDict->suggest(wrongWord.toStdString());
  350. *optionalSuggestions = ranges::views::all(
  351. customGuesses
  352. ) | ranges::views::take(
  353. kMaxSuggestions
  354. ) | ranges::views::transform([](auto &guess) {
  355. return QString::fromStdString(guess);
  356. }) | ranges::to_vector;
  357. const auto startTime = crl::now();
  358. _suggestionsEpoch++;
  359. const auto savedEpoch = _suggestionsEpoch.load();
  360. {
  361. std::shared_lock lock(*_engineMutex);
  362. for (const auto &engine : *_engines) {
  363. if (_suggestionsEpoch.load() > savedEpoch) {
  364. // There is a newer request to fill suggestion list,
  365. // So we should drop the current one.
  366. optionalSuggestions->clear();
  367. break;
  368. }
  369. if (optionalSuggestions->size() == kMaxSuggestions
  370. || ((crl::now() - startTime) > kTimeLimitSuggestion)) {
  371. break;
  372. }
  373. if (wordScript != engine->script()) {
  374. continue;
  375. }
  376. engine->suggest(wrongWord, optionalSuggestions);
  377. }
  378. }
  379. _suggestionsEpoch--;
  380. }
  381. // Thread: Main.
  382. void HunspellService::ignoreWord(const QString &word) {
  383. const auto wordScript = ::Spellchecker::WordScript(word);
  384. _customDict->add(word.toStdString());
  385. _ignoredWords[wordScript].push_back(word);
  386. }
  387. // Thread: Main.
  388. bool HunspellService::isWordInDictionary(const QString &word) {
  389. return ranges::contains(addedWords(word), word);
  390. }
  391. // Thread: Main.
  392. void HunspellService::addWord(const QString &word) {
  393. const auto count = ranges::accumulate(
  394. ranges::views::values(_addedWords),
  395. 0,
  396. ranges::plus(),
  397. &std::vector<QString>::size);
  398. if (count > kMaxSyncableDictionaryWords) {
  399. return;
  400. }
  401. _customDict->add(word.toStdString());
  402. addedWords(word).push_back(word);
  403. writeToFile();
  404. }
  405. // Thread: Main.
  406. void HunspellService::removeWord(const QString &word) {
  407. _customDict->remove(word.toStdString());
  408. auto &vector = addedWords(word);
  409. vector.erase(ranges::remove(vector, word), end(vector));
  410. writeToFile();
  411. }
  412. // Thread: Main.
  413. void HunspellService::writeToFile() {
  414. auto f = QFile(CustomDictionaryPath());
  415. if (!f.open(QIODevice::WriteOnly)) {
  416. return;
  417. }
  418. auto &&temp = ranges::views::join(
  419. ranges::views::values(_addedWords)
  420. ) | ranges::views::transform([&](auto &str) {
  421. return str + kLineBreak;
  422. });
  423. const auto result = ranges::accumulate(std::move(temp), QString{});
  424. f.write(result.toUtf8());
  425. f.close();
  426. }
  427. // Thread: Main.
  428. void HunspellService::readFile() {
  429. using namespace ::Spellchecker;
  430. auto f = QFile(CustomDictionaryPath());
  431. if (const auto info = QFileInfo(f);
  432. !info.isFile()
  433. || (info.size() > 100 * 1024)
  434. || !f.open(QIODevice::ReadOnly)) {
  435. if (info.isDir()) {
  436. QDir(info.path()).removeRecursively();
  437. }
  438. return;
  439. }
  440. const auto data = f.readAll();
  441. f.close();
  442. if (data.isEmpty()) {
  443. return;
  444. }
  445. // {"a", "1", "β"};
  446. auto splitedWords = QString::fromUtf8(data).split(kLineBreak)
  447. | ranges::to_vector
  448. | ranges::actions::sort
  449. | ranges::actions::unique;
  450. auto filteredWords = (
  451. splitedWords
  452. ) | ranges::views::filter([](auto &word) {
  453. // Ignore words with mixed scripts or non-words characters.
  454. return !word.isEmpty() && !IsWordSkippable(word, false);
  455. }) | ranges::views::take(
  456. kMaxSyncableDictionaryWords
  457. ) | ranges::views::transform([](auto &word) {
  458. return std::move(word);
  459. }) | ranges::to_vector;
  460. ranges::for_each(filteredWords, [&](auto &word) {
  461. _customDict->add(word.toStdString());
  462. });
  463. // {{"a"}, {"β"}};
  464. auto groupedWords = ranges::views::all(
  465. filteredWords
  466. ) | ranges::views::chunk_by([](auto &a, auto &b) {
  467. return WordScript(a) == WordScript(b);
  468. }) | ranges::views::transform([](auto &&rng) {
  469. return rng | ranges::to_vector;
  470. }) | ranges::to_vector;
  471. // {QChar::Script_Latin, QChar::Script_Greek};
  472. auto scripts = ranges::views::all(
  473. groupedWords
  474. ) | ranges::views::transform([](auto &vector) {
  475. return WordScript(vector.front());
  476. }) | ranges::to_vector;
  477. // {QChar::Script_Latin : {"a"}, QChar::Script_Greek : {"β"}};
  478. auto &&zip = ranges::views::zip(
  479. scripts, groupedWords
  480. );
  481. _addedWords = zip | ranges::to<WordsMap>();
  482. }
  483. ////// End of HunspellService class.
  484. HunspellService &SharedSpellChecker() {
  485. static auto spellchecker = HunspellService();
  486. return spellchecker;
  487. }
  488. } // namespace
  489. bool CheckSpelling(const QString &wordToCheck) {
  490. return SharedSpellChecker().checkSpelling(wordToCheck);
  491. }
  492. void FillSuggestionList(
  493. const QString &wrongWord,
  494. std::vector<QString> *optionalSuggestions) {
  495. SharedSpellChecker().fillSuggestionList(wrongWord, optionalSuggestions);
  496. }
  497. void AddWord(const QString &word) {
  498. SharedSpellChecker().addWord(word);
  499. }
  500. void RemoveWord(const QString &word) {
  501. SharedSpellChecker().removeWord(word);
  502. }
  503. void IgnoreWord(const QString &word) {
  504. SharedSpellChecker().ignoreWord(word);
  505. }
  506. bool IsWordInDictionary(const QString &wordToCheck) {
  507. return SharedSpellChecker().isWordInDictionary(wordToCheck);
  508. }
  509. void UpdateLanguages(std::vector<int> languages) {
  510. const auto languageCodes = ranges::views::all(
  511. languages
  512. ) | ranges::views::transform(
  513. LocaleNameFromLangId
  514. ) | ranges::to_vector;
  515. ::Spellchecker::UpdateSupportedScripts(std::vector<QString>());
  516. SharedSpellChecker().updateLanguages(languageCodes);
  517. }
  518. std::vector<QString> ActiveLanguages() {
  519. return SharedSpellChecker().activeLanguages();
  520. }
  521. void CheckSpellingText(
  522. const QString &text,
  523. MisspelledWords *misspelledWords) {
  524. *misspelledWords = ::Spellchecker::RangesFromText(
  525. text,
  526. [](const QString &word) {
  527. return !::Spellchecker::IsWordSkippable(word)
  528. && CheckSpelling(word);
  529. });
  530. }
  531. } // namespace Platform::Spellchecker::ThirdParty