spellcheck_highlight_syntax.cpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. // This file is part of Desktop App Toolkit,
  2. // a set of libraries for developing nice desktop applications.
  3. //
  4. // For license and copyright information please follow this link:
  5. // https://github.com/desktop-app/legal/blob/master/LEGAL
  6. //
  7. #include "spellcheck/spellcheck_highlight_syntax.h"
  8. #include "base/base_file_utilities.h"
  9. #include "base/debug_log.h"
  10. #include "base/flat_map.h"
  11. #include "crl/crl_object_on_queue.h"
  12. #include "SyntaxHighlighter.h"
  13. #include <QtCore/QFile>
  14. #include <xxhash.h>
  15. #include <variant>
  16. #include <string>
  17. void spellchecker_InitHighlightingResource() {
  18. #ifdef Q_OS_MAC // Use resources from the .app bundle on macOS.
  19. base::RegisterBundledResources(u"lib_spellcheck.rcc"_q);
  20. #else // Q_OS_MAC
  21. Q_INIT_RESOURCE(highlighting);
  22. #endif // Q_OS_MAC
  23. }
  24. namespace Spellchecker {
  25. namespace {
  26. base::flat_map<XXH64_hash_t, EntitiesInText> Cache;
  27. HighlightProcessId ProcessIdAutoIncrement/* = 0*/;
  28. rpl::event_stream<HighlightProcessId> ReadyStream;
  29. class QueuedHighlighter final {
  30. public:
  31. QueuedHighlighter();
  32. struct Request {
  33. uint64 hash = 0;
  34. QString text;
  35. QString language;
  36. };
  37. void process(Request request);
  38. void notify(HighlightProcessId id);
  39. private:
  40. using Task = std::variant<Request, HighlightProcessId>;
  41. std::vector<Task> _tasks;
  42. std::unique_ptr<SyntaxHighlighter> _highlighter;
  43. };
  44. [[nodiscard]] crl::object_on_queue<QueuedHighlighter> &Highlighter() {
  45. static auto result = crl::object_on_queue<QueuedHighlighter>();
  46. return result;
  47. }
  48. [[nodiscard]] const QString &LookupAlias(const QString &language) {
  49. static const auto kAliases = base::flat_map<QString, QString>{
  50. { u"diff"_q, u"git"_q },
  51. { u"patch"_q, u"git"_q },
  52. };
  53. const auto i = kAliases.find(language);
  54. return (i != end(kAliases)) ? i->second : language;
  55. }
  56. QueuedHighlighter::QueuedHighlighter() {
  57. spellchecker_InitHighlightingResource();
  58. }
  59. void QueuedHighlighter::process(Request request) {
  60. if (!_highlighter) {
  61. auto file = QFile(":/misc/grammars.dat");
  62. const auto size = file.size();
  63. const auto ok1 = file.open(QIODevice::ReadOnly);
  64. auto grammars = std::string();
  65. grammars.resize(size);
  66. const auto ok2 = (file.read(grammars.data(), size) == size);
  67. Assert(ok1 && ok2);
  68. _highlighter = std::make_unique<SyntaxHighlighter>(grammars);
  69. }
  70. const auto text = request.text.toStdString();
  71. const auto language = LookupAlias(request.language.toLower());
  72. const auto tokens = _highlighter->tokenize(text, language.toStdString());
  73. static const auto colors = base::flat_map<std::string, int>{
  74. { "comment" , 1 },
  75. { "block-comment", 1 },
  76. { "prolog" , 1 },
  77. { "doctype" , 1 },
  78. { "cdata" , 1 },
  79. { "punctuation" , 2 },
  80. { "property" , 3 },
  81. { "tag" , 3 },
  82. { "boolean" , 3 },
  83. { "number" , 3 },
  84. { "constant" , 3 },
  85. { "symbol" , 3 },
  86. { "deleted" , 3 },
  87. { "selector" , 4 },
  88. { "attr-name" , 4 },
  89. { "string" , 4 },
  90. { "char" , 4 },
  91. { "builtin" , 4 },
  92. { "operator" , 5 },
  93. { "entity" , 5 },
  94. { "url" , 5 },
  95. { "atrule" , 6 },
  96. { "attr-value" , 6 },
  97. { "keyword" , 6 },
  98. { "function" , 6 },
  99. { "class-name" , 7 },
  100. { "inserted" , 8 },
  101. };
  102. auto offset = 0;
  103. auto entities = EntitiesInText();
  104. auto rebuilt = QString();
  105. rebuilt.reserve(request.text.size());
  106. const auto enumerate = [&](
  107. const TokenList &list,
  108. const std::string &type,
  109. auto &&self) -> void {
  110. for (const auto &node : list) {
  111. if (node.isSyntax()) {
  112. const auto &syntax = static_cast<const Syntax&>(node);
  113. self(syntax.children(), syntax.type(), self);
  114. } else {
  115. const auto text = static_cast<const Text&>(node).value();
  116. const auto utf16 = QString::fromUtf8(
  117. text.data(),
  118. text.size());
  119. const auto length = utf16.size();
  120. rebuilt.append(utf16);
  121. if (!type.empty()) {
  122. const auto i = colors.find(type);
  123. if (i != end(colors)) {
  124. entities.push_back(EntityInText(
  125. EntityType::Colorized,
  126. offset,
  127. length,
  128. QChar(ushort(i->second))));
  129. }
  130. }
  131. offset += length;
  132. }
  133. }
  134. };
  135. enumerate(tokens, std::string(), enumerate);
  136. const auto hash = request.hash;
  137. if (offset != request.text.size()) {
  138. // Something went wrong.
  139. LOG(("Highlighting Error: for language '%1', text: %2"
  140. ).arg(request.language, request.text));
  141. entities.clear();
  142. }
  143. crl::on_main([hash, entities = std::move(entities)]() mutable {
  144. Cache.emplace(hash, std::move(entities));
  145. });
  146. }
  147. void QueuedHighlighter::notify(HighlightProcessId id) {
  148. crl::on_main([=] {
  149. ReadyStream.fire_copy(id);
  150. });
  151. }
  152. struct CacheResult {
  153. uint64 hash = 0;
  154. const EntitiesInText *list = nullptr;
  155. explicit operator bool() const {
  156. return list != nullptr;
  157. }
  158. };
  159. [[nodiscard]] CacheResult FindInCache(
  160. const TextWithEntities &text,
  161. EntitiesInText::const_iterator i) {
  162. const auto view = QStringView(text.text).mid(i->offset(), i->length());
  163. const auto language = i->data();
  164. struct Destroyer {
  165. void operator()(XXH64_state_t *state) {
  166. if (state) {
  167. XXH64_freeState(state);
  168. }
  169. }
  170. };
  171. static const auto S = std::unique_ptr<XXH64_state_t, Destroyer>(
  172. XXH64_createState());
  173. const auto state = S.get();
  174. XXH64_reset(state, 0);
  175. XXH64_update(state, view.data(), view.size() * sizeof(ushort));
  176. XXH64_update(state, language.data(), language.size() * sizeof(ushort));
  177. const auto hash = XXH64_digest(state);
  178. const auto j = Cache.find(hash);
  179. return { hash, (j != Cache.cend()) ? &j->second : nullptr };
  180. }
  181. EntitiesInText::iterator Insert(
  182. TextWithEntities &text,
  183. EntitiesInText::iterator i,
  184. const EntitiesInText &entities) {
  185. auto next = i + 1;
  186. if (entities.empty()) {
  187. return next;
  188. }
  189. const auto offset = i->offset();
  190. if (next != text.entities.cend()
  191. && next->type() == entities.front().type()
  192. && next->offset() == offset + entities.front().offset()) {
  193. return next;
  194. }
  195. const auto length = i->length();
  196. for (const auto &entity : entities) {
  197. if (entity.offset() + entity.length() > length) {
  198. break;
  199. }
  200. auto j = text.entities.insert(next, entity);
  201. j->shiftRight(offset);
  202. next = j + 1;
  203. }
  204. return next;
  205. }
  206. void Schedule(
  207. uint64 hash,
  208. const TextWithEntities &text,
  209. EntitiesInText::const_iterator i) {
  210. Highlighter().with([
  211. hash,
  212. text = text.text.mid(i->offset(), i->length()),
  213. language = i->data()
  214. ](QueuedHighlighter &instance) mutable {
  215. instance.process({ hash, std::move(text), std::move(language) });
  216. });
  217. }
  218. void Notify(uint64 processId) {
  219. Highlighter().with([processId](QueuedHighlighter &instance) {
  220. instance.notify(processId);
  221. });
  222. }
  223. } // namespace
  224. HighlightProcessId TryHighlightSyntax(TextWithEntities &text) {
  225. auto b = text.entities.begin();
  226. auto i = b;
  227. auto e = text.entities.end();
  228. const auto checking = [](const EntityInText &entity) {
  229. return (entity.type() == EntityType::Pre)
  230. && !entity.data().isEmpty();
  231. };
  232. auto processId = HighlightProcessId();
  233. while (true) {
  234. i = std::find_if(i, e, checking);
  235. if (i == e) {
  236. break;
  237. } else if (const auto already = FindInCache(text, i)) {
  238. i = Insert(text, i, *already.list);
  239. b = text.entities.begin();
  240. e = text.entities.end();
  241. } else {
  242. Schedule(already.hash, text, i);
  243. if (!processId) {
  244. processId = ++ProcessIdAutoIncrement;
  245. }
  246. ++i;
  247. }
  248. }
  249. if (processId) {
  250. Notify(processId);
  251. }
  252. return processId;
  253. }
  254. rpl::producer<HighlightProcessId> HighlightReady() {
  255. return ReadyStream.events();
  256. }
  257. } // namespace Spellchecker