spellcheck_win.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. // This file is part of Desktop App Toolkit,
  2. // a set of libraries for developing nice desktop applications.
  3. //
  4. // For license and copyright information please follow this link:
  5. // https://github.com/desktop-app/legal/blob/master/LEGAL
  6. //
  7. #include "spellcheck/platform/win/spellcheck_win.h"
  8. #include "base/platform/base_platform_info.h"
  9. #include "spellcheck/third_party/hunspell_controller.h"
  10. #include <wrl/client.h>
  11. #include <spellcheck.h>
  12. #include <QtCore/QDir>
  13. #include <QtCore/QLocale>
  14. #include <QVector>
  15. using namespace Microsoft::WRL;
  16. namespace Platform::Spellchecker {
  17. namespace {
  18. constexpr auto kChunk = 5000;
  19. // Seems like ISpellChecker API has bugs for Persian language (aka Farsi).
  20. [[nodiscard]] inline bool IsPersianLanguage(const QString &langTag) {
  21. return langTag.startsWith(QStringLiteral("fa"));
  22. }
  23. [[nodiscard]] inline LPCWSTR Q2WString(QStringView string) {
  24. return (LPCWSTR)string.utf16();
  25. }
  26. [[nodiscard]] inline auto SystemLanguages() {
  27. const auto appdata = qEnvironmentVariable("appdata");
  28. const auto dir = QDir(appdata + QString("\\Microsoft\\Spelling"));
  29. auto list = QStringList(SystemLanguage());
  30. list << (dir.exists()
  31. ? dir.entryList(QDir::Dirs | QDir::NoDotAndDotDot)
  32. : QLocale::system().uiLanguages());
  33. list.removeDuplicates();
  34. return list | ranges::to_vector;
  35. }
  36. // WindowsSpellChecker class is used to store all the COM objects and
  37. // control their lifetime. The class also provides wrappers for
  38. // ISpellCheckerFactory and ISpellChecker APIs. All COM calls are on the
  39. // background thread.
  40. class WindowsSpellChecker {
  41. public:
  42. WindowsSpellChecker();
  43. void addWord(LPCWSTR word);
  44. void removeWord(LPCWSTR word);
  45. void ignoreWord(LPCWSTR word);
  46. [[nodiscard]] bool checkSpelling(LPCWSTR word);
  47. void fillSuggestionList(
  48. LPCWSTR wrongWord,
  49. std::vector<QString> *optionalSuggestions);
  50. void checkSpellingText(
  51. LPCWSTR text,
  52. MisspelledWords *misspelledWordRanges,
  53. int offset);
  54. [[nodiscard]] std::vector<QString> systemLanguages();
  55. void chunkedCheckSpellingText(
  56. QStringView textView,
  57. MisspelledWords *misspelledWords);
  58. private:
  59. void createFactory();
  60. [[nodiscard]] bool isLanguageSupported(const LPCWSTR &lang);
  61. void createSpellCheckers();
  62. ComPtr<ISpellCheckerFactory> _spellcheckerFactory;
  63. std::vector<std::pair<QString, ComPtr<ISpellChecker>>> _spellcheckerMap;
  64. };
  65. WindowsSpellChecker::WindowsSpellChecker() {
  66. createFactory();
  67. createSpellCheckers();
  68. }
  69. void WindowsSpellChecker::createFactory() {
  70. if (FAILED(CoCreateInstance(
  71. __uuidof(SpellCheckerFactory),
  72. nullptr,
  73. (CLSCTX_INPROC_SERVER | CLSCTX_LOCAL_SERVER),
  74. IID_PPV_ARGS(&_spellcheckerFactory)))) {
  75. _spellcheckerFactory = nullptr;
  76. }
  77. }
  78. void WindowsSpellChecker::createSpellCheckers() {
  79. if (!_spellcheckerFactory) {
  80. return;
  81. }
  82. for (const auto &lang : SystemLanguages()) {
  83. const auto wlang = Q2WString(lang);
  84. if (!isLanguageSupported(wlang)) {
  85. continue;
  86. }
  87. if (ranges::contains(ranges::views::keys(_spellcheckerMap), lang)) {
  88. continue;
  89. }
  90. auto spellchecker = ComPtr<ISpellChecker>();
  91. auto hr = _spellcheckerFactory->CreateSpellChecker(
  92. wlang,
  93. &spellchecker);
  94. if (SUCCEEDED(hr)) {
  95. _spellcheckerMap.push_back({ lang, spellchecker });
  96. }
  97. }
  98. }
  99. bool WindowsSpellChecker::isLanguageSupported(const LPCWSTR &lang) {
  100. if (!_spellcheckerFactory) {
  101. return false;
  102. }
  103. auto isSupported = (BOOL)false;
  104. auto hr = _spellcheckerFactory->IsSupported(lang, &isSupported);
  105. return SUCCEEDED(hr) && isSupported;
  106. }
  107. void WindowsSpellChecker::fillSuggestionList(
  108. LPCWSTR wrongWord,
  109. std::vector<QString> *optionalSuggestions) {
  110. auto i = 0;
  111. for (const auto &[langTag, spellchecker] : _spellcheckerMap) {
  112. if (IsPersianLanguage(langTag)) {
  113. continue;
  114. }
  115. auto suggestions = ComPtr<IEnumString>();
  116. auto hr = spellchecker->Suggest(wrongWord, &suggestions);
  117. if (hr != S_OK) {
  118. continue;
  119. }
  120. while (true) {
  121. wchar_t *suggestion = nullptr;
  122. hr = suggestions->Next(1, &suggestion, nullptr);
  123. if (hr != S_OK) {
  124. break;
  125. }
  126. const auto guess = QString::fromWCharArray(
  127. suggestion,
  128. wcslen(suggestion));
  129. CoTaskMemFree(suggestion);
  130. if (!guess.isEmpty()) {
  131. optionalSuggestions->push_back(guess);
  132. if (++i >= kMaxSuggestions) {
  133. return;
  134. }
  135. }
  136. }
  137. }
  138. }
  139. bool WindowsSpellChecker::checkSpelling(LPCWSTR word) {
  140. for (const auto &[_, spellchecker] : _spellcheckerMap) {
  141. auto spellingErrors = ComPtr<IEnumSpellingError>();
  142. auto hr = spellchecker->Check(word, &spellingErrors);
  143. if (SUCCEEDED(hr) && spellingErrors) {
  144. auto spellingError = ComPtr<ISpellingError>();
  145. auto startIndex = ULONG(0);
  146. auto errorLength = ULONG(0);
  147. auto action = CORRECTIVE_ACTION_NONE;
  148. hr = spellingErrors->Next(&spellingError);
  149. if (SUCCEEDED(hr) &&
  150. spellingError &&
  151. SUCCEEDED(spellingError->get_StartIndex(&startIndex)) &&
  152. SUCCEEDED(spellingError->get_Length(&errorLength)) &&
  153. SUCCEEDED(spellingError->get_CorrectiveAction(&action)) &&
  154. (action == CORRECTIVE_ACTION_GET_SUGGESTIONS ||
  155. action == CORRECTIVE_ACTION_REPLACE)) {
  156. } else {
  157. return true;
  158. }
  159. }
  160. }
  161. return false;
  162. }
  163. void WindowsSpellChecker::checkSpellingText(
  164. LPCWSTR text,
  165. MisspelledWords *misspelledWordRanges,
  166. int offset) {
  167. // The spellchecker marks words not from its own language as misspelled.
  168. // So we only return words that are marked
  169. // as misspelled in all spellcheckers.
  170. auto misspelledWords = MisspelledWords();
  171. constexpr auto isActionGood = [](auto action) {
  172. return action == CORRECTIVE_ACTION_GET_SUGGESTIONS
  173. || action == CORRECTIVE_ACTION_REPLACE;
  174. };
  175. for (const auto &[langTag, spellchecker] : _spellcheckerMap) {
  176. auto spellingErrors = ComPtr<IEnumSpellingError>();
  177. auto hr = IsPersianLanguage(langTag)
  178. ? spellchecker->Check(text, &spellingErrors)
  179. : spellchecker->ComprehensiveCheck(text, &spellingErrors);
  180. if (!(SUCCEEDED(hr) && spellingErrors)) {
  181. continue;
  182. }
  183. auto tempMisspelled = MisspelledWords();
  184. auto spellingError = ComPtr<ISpellingError>();
  185. for (; hr == S_OK; hr = spellingErrors->Next(&spellingError)) {
  186. auto startIndex = ULONG(0);
  187. auto errorLength = ULONG(0);
  188. auto action = CORRECTIVE_ACTION_NONE;
  189. if (!(SUCCEEDED(hr)
  190. && spellingError
  191. && SUCCEEDED(spellingError->get_StartIndex(&startIndex))
  192. && SUCCEEDED(spellingError->get_Length(&errorLength))
  193. && SUCCEEDED(spellingError->get_CorrectiveAction(&action))
  194. && isActionGood(action))) {
  195. continue;
  196. }
  197. const auto word = std::pair(
  198. (int)startIndex + offset,
  199. (int)errorLength);
  200. if (misspelledWords.empty()
  201. || ranges::contains(misspelledWords, word)) {
  202. tempMisspelled.push_back(std::move(word));
  203. }
  204. }
  205. // If the tempMisspelled vector is empty at least once,
  206. // it means that the all words will be correct in the end
  207. // and it makes no sense to check other languages.
  208. if (tempMisspelled.empty()) {
  209. return;
  210. }
  211. misspelledWords = std::move(tempMisspelled);
  212. }
  213. if (offset) {
  214. for (auto &m : misspelledWords) {
  215. misspelledWordRanges->push_back(std::move(m));
  216. }
  217. } else {
  218. *misspelledWordRanges = misspelledWords;
  219. }
  220. }
  221. void WindowsSpellChecker::addWord(LPCWSTR word) {
  222. for (const auto &[_, spellchecker] : _spellcheckerMap) {
  223. spellchecker->Add(word);
  224. }
  225. }
  226. void WindowsSpellChecker::removeWord(LPCWSTR word) {
  227. for (const auto &[_, spellchecker] : _spellcheckerMap) {
  228. auto spellchecker2 = ComPtr<ISpellChecker2>();
  229. spellchecker->QueryInterface(IID_PPV_ARGS(&spellchecker2));
  230. if (spellchecker2) {
  231. spellchecker2->Remove(word);
  232. }
  233. }
  234. }
  235. void WindowsSpellChecker::ignoreWord(LPCWSTR word) {
  236. for (const auto &[_, spellchecker] : _spellcheckerMap) {
  237. spellchecker->Ignore(word);
  238. }
  239. }
  240. std::vector<QString> WindowsSpellChecker::systemLanguages() {
  241. return ranges::views::keys(_spellcheckerMap) | ranges::to_vector;
  242. }
  243. void WindowsSpellChecker::chunkedCheckSpellingText(
  244. QStringView textView,
  245. MisspelledWords *misspelledWords) {
  246. auto i = 0;
  247. auto chunkBuffer = std::vector<wchar_t>();
  248. while (i != textView.size()) {
  249. const auto provisionalChunkSize = std::min(
  250. kChunk,
  251. int(textView.size() - i));
  252. const auto chunkSize = [&] {
  253. const auto until = std::max(
  254. 0,
  255. provisionalChunkSize - ::Spellchecker::kMaxWordSize);
  256. for (auto n = provisionalChunkSize; n > until; n--) {
  257. if (textView.at(i + n - 1).isLetterOrNumber()) {
  258. continue;
  259. } else {
  260. return n;
  261. }
  262. }
  263. return provisionalChunkSize;
  264. }();
  265. const auto chunk = textView.mid(i, chunkSize);
  266. chunkBuffer.resize(chunk.size() + 1);
  267. const auto count = chunk.toWCharArray(chunkBuffer.data());
  268. chunkBuffer[count] = '\0';
  269. checkSpellingText(
  270. (LPCWSTR)chunkBuffer.data(),
  271. misspelledWords,
  272. i);
  273. i += chunk.size();
  274. }
  275. }
  276. ////// End of WindowsSpellChecker class.
  277. WindowsSpellChecker &SharedSpellChecker() {
  278. static auto spellchecker = WindowsSpellChecker();
  279. return spellchecker;
  280. }
  281. } // namespace
  282. // TODO: Add a better work with the Threading Models.
  283. // All COM objects should be created asynchronously
  284. // if we want to work with them asynchronously.
  285. // Some calls can be made in the main thread before spellchecking
  286. // (e.g. KnownLanguages), so we have to init it asynchronously first.
  287. void Init() {
  288. if (IsSystemSpellchecker()) {
  289. crl::async(SharedSpellChecker);
  290. }
  291. }
  292. bool IsSystemSpellchecker() {
  293. // Windows 7 does not support spellchecking.
  294. // https://docs.microsoft.com/en-us/windows/win32/api/spellcheck/nn-spellcheck-ispellchecker
  295. return IsWindows8OrGreater();
  296. }
  297. std::vector<QString> ActiveLanguages() {
  298. if (IsSystemSpellchecker()) {
  299. return SharedSpellChecker().systemLanguages();
  300. }
  301. return ThirdParty::ActiveLanguages();
  302. }
  303. bool CheckSpelling(const QString &wordToCheck) {
  304. if (!IsSystemSpellchecker()) {
  305. return ThirdParty::CheckSpelling(wordToCheck);
  306. }
  307. return SharedSpellChecker().checkSpelling(Q2WString(wordToCheck));
  308. }
  309. void FillSuggestionList(
  310. const QString &wrongWord,
  311. std::vector<QString> *optionalSuggestions) {
  312. if (IsSystemSpellchecker()) {
  313. SharedSpellChecker().fillSuggestionList(
  314. Q2WString(wrongWord),
  315. optionalSuggestions);
  316. return;
  317. }
  318. ThirdParty::FillSuggestionList(
  319. wrongWord,
  320. optionalSuggestions);
  321. }
  322. void AddWord(const QString &word) {
  323. if (IsSystemSpellchecker()) {
  324. SharedSpellChecker().addWord(Q2WString(word));
  325. } else {
  326. ThirdParty::AddWord(word);
  327. }
  328. }
  329. void RemoveWord(const QString &word) {
  330. if (IsSystemSpellchecker()) {
  331. SharedSpellChecker().removeWord(Q2WString(word));
  332. } else {
  333. ThirdParty::RemoveWord(word);
  334. }
  335. }
  336. void IgnoreWord(const QString &word) {
  337. if (IsSystemSpellchecker()) {
  338. SharedSpellChecker().ignoreWord(Q2WString(word));
  339. } else {
  340. ThirdParty::IgnoreWord(word);
  341. }
  342. }
  343. bool IsWordInDictionary(const QString &wordToCheck) {
  344. if (IsSystemSpellchecker()) {
  345. // ISpellChecker can't check if a word is in the dictionary.
  346. return false;
  347. }
  348. return ThirdParty::IsWordInDictionary(wordToCheck);
  349. }
  350. void UpdateLanguages(std::vector<int> languages) {
  351. if (!IsSystemSpellchecker()) {
  352. ThirdParty::UpdateLanguages(languages);
  353. return;
  354. }
  355. crl::async([=] {
  356. const auto result = ActiveLanguages();
  357. crl::on_main([=] {
  358. ::Spellchecker::UpdateSupportedScripts(result);
  359. });
  360. });
  361. }
  362. void CheckSpellingText(
  363. const QString &text,
  364. MisspelledWords *misspelledWords) {
  365. if (IsSystemSpellchecker()) {
  366. // There are certain strings with a lot of 'paragraph separators'
  367. // that crash the native Windows spellchecker. We replace them
  368. // with spaces (no difference for the checking), they don't crash.
  369. const auto check = QString(text).replace(QChar(8233), QChar(32));
  370. if (check.size() > kChunk) {
  371. // On some versions of Windows 10,
  372. // checking large text with specific characters (e.g. @)
  373. // will throw the std::regex_error::error_complexity exception,
  374. // so we have to split the text.
  375. SharedSpellChecker().chunkedCheckSpellingText(
  376. check,
  377. misspelledWords);
  378. } else {
  379. SharedSpellChecker().checkSpellingText(
  380. (LPCWSTR)check.utf16(),
  381. misspelledWords,
  382. 0);
  383. }
  384. return;
  385. }
  386. ThirdParty::CheckSpellingText(text, misspelledWords);
  387. }
  388. } // namespace Platform::Spellchecker