text_entity.cpp 72 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435
  1. // This file is part of Desktop App Toolkit,
  2. // a set of libraries for developing nice desktop applications.
  3. //
  4. // For license and copyright information please follow this link:
  5. // https://github.com/desktop-app/legal/blob/master/LEGAL
  6. //
  7. #include "ui/text/text_entity.h"
  8. #include "base/qthelp_url.h"
  9. #include "base/qthelp_regex.h"
  10. #include "base/crc32hash.h"
  11. #include "ui/text/text.h"
  12. #include "ui/widgets/fields/input_field.h"
  13. #include "ui/emoji_config.h"
  14. #include "ui/basic_click_handlers.h"
  15. #include "ui/integration.h"
  16. #include "base/qt/qt_common_adapters.h"
  17. #include <QtCore/QStack>
  18. #include <QtCore/QMimeData>
  19. #include <QtGui/QGuiApplication>
  20. #include <QtGui/QClipboard>
  21. namespace TextUtilities {
  22. namespace {
  23. constexpr auto kTagSeparator = '\\';
  24. using namespace Ui::Text;
  25. QString ExpressionMailNameAtEnd() {
  26. // Matches email first part (before '@') at the end of the string.
  27. // First we find a domain without protocol (like "gmail.com"), then
  28. // we find '@' before it and then we look for the name before '@'.
  29. return QString::fromUtf8("[a-zA-Z\\-_\\.0-9]{1,256}$");
  30. }
  31. QString Quotes() {
  32. // UTF8 quotes and ellipsis
  33. return QString::fromUtf8("\xC2\xAB\xC2\xBB\xE2\x80\x9C\xE2\x80\x9D\xE2\x80\x98\xE2\x80\x99\xE2\x80\xA6");
  34. }
  35. QString ExpressionSeparators(const QString &additional) {
  36. static const auto quotes = Quotes();
  37. return QString::fromUtf8("\\s\\.,:;<>|'\"\\[\\]\\{\\}\\~\\!\\?\\%\\^\\(\\)\\-\\+=\\x10") + quotes + additional;
  38. }
  39. QString Separators(const QString &additional) {
  40. static const auto quotes = Quotes();
  41. return QString::fromUtf8(" \x10\n\r\t.,:;<>|'\"[]{}!?%^()-+=")
  42. + QChar(0xfdd0) // QTextBeginningOfFrame
  43. + QChar(0xfdd1) // QTextEndOfFrame
  44. + QChar(QChar::ParagraphSeparator)
  45. + QChar(QChar::LineSeparator)
  46. + quotes
  47. + additional;
  48. }
  49. QString SeparatorsBold() {
  50. return Separators(QString::fromUtf8("`~/"));
  51. }
  52. QString SeparatorsItalic() {
  53. return Separators(QString::fromUtf8("`*~/"));
  54. }
  55. QString SeparatorsStrikeOut() {
  56. return Separators(QString::fromUtf8("`*~/"));
  57. }
  58. QString SeparatorsMono() {
  59. return Separators(QString::fromUtf8("*~/"));
  60. }
  61. QString SeparatorsSpoiler() {
  62. return Separators(QString::fromUtf8("|*~/"));
  63. }
  64. QString ExpressionHashtag() {
  65. return QString::fromUtf8("(^|[") + ExpressionSeparators(QString::fromUtf8("`\\*/")) + QString::fromUtf8("])#[\\w]{2,64}([\\W]|$)");
  66. }
  67. QString ExpressionHashtagMention() {
  68. return QString::fromUtf8("(^|[") + ExpressionSeparators(QString::fromUtf8("`\\*/")) + QString::fromUtf8("])#[\\w]{2,64}(@[A-Za-z_0-9]{1,32})?([\\W]|$)");
  69. }
  70. QString ExpressionHashtagExclude() {
  71. return QString::fromUtf8("^#?\\d+$");
  72. }
  73. QString ExpressionMention() {
  74. return QString::fromUtf8("(^|[") + ExpressionSeparators(QString::fromUtf8("`\\*/")) + QString::fromUtf8("])@[A-Za-z_0-9]{1,32}([\\W]|$)");
  75. }
  76. QString ExpressionBotCommand() {
  77. return QString::fromUtf8("(^|[") + ExpressionSeparators(QString::fromUtf8("`\\*")) + QString::fromUtf8("])/[A-Za-z_0-9]{1,64}(@[A-Za-z_0-9]{5,32})?([\\W]|$)");
  78. }
  79. QRegularExpression CreateRegExp(const QString &expression) {
  80. auto result = QRegularExpression(
  81. expression,
  82. QRegularExpression::UseUnicodePropertiesOption);
  83. result.optimize();
  84. return result;
  85. }
  86. base::flat_set<int32> CreateValidProtocols() {
  87. auto result = base::flat_set<int32>();
  88. const auto addOne = [&](const QString &string) {
  89. result.insert(base::crc32(string.constData(), string.size() * sizeof(QChar)));
  90. };
  91. addOne(QString::fromLatin1("itmss")); // itunes
  92. addOne(QString::fromLatin1("http"));
  93. addOne(QString::fromLatin1("https"));
  94. addOne(QString::fromLatin1("ftp"));
  95. addOne(QString::fromLatin1("tg")); // local urls
  96. return result;
  97. }
  98. base::flat_set<int32> CreateValidTopDomains() {
  99. auto result = base::flat_set<int32>();
  100. auto addOne = [&result](const QString &string) {
  101. result.insert(base::crc32(string.constData(), string.size() * sizeof(QChar)));
  102. };
  103. addOne(QString::fromLatin1("ac"));
  104. addOne(QString::fromLatin1("ad"));
  105. addOne(QString::fromLatin1("ae"));
  106. addOne(QString::fromLatin1("af"));
  107. addOne(QString::fromLatin1("ag"));
  108. addOne(QString::fromLatin1("ai"));
  109. addOne(QString::fromLatin1("al"));
  110. addOne(QString::fromLatin1("am"));
  111. addOne(QString::fromLatin1("an"));
  112. addOne(QString::fromLatin1("ao"));
  113. addOne(QString::fromLatin1("aq"));
  114. addOne(QString::fromLatin1("ar"));
  115. addOne(QString::fromLatin1("as"));
  116. addOne(QString::fromLatin1("at"));
  117. addOne(QString::fromLatin1("au"));
  118. addOne(QString::fromLatin1("aw"));
  119. addOne(QString::fromLatin1("ax"));
  120. addOne(QString::fromLatin1("az"));
  121. addOne(QString::fromLatin1("ba"));
  122. addOne(QString::fromLatin1("bb"));
  123. addOne(QString::fromLatin1("bd"));
  124. addOne(QString::fromLatin1("be"));
  125. addOne(QString::fromLatin1("bf"));
  126. addOne(QString::fromLatin1("bg"));
  127. addOne(QString::fromLatin1("bh"));
  128. addOne(QString::fromLatin1("bi"));
  129. addOne(QString::fromLatin1("bj"));
  130. addOne(QString::fromLatin1("bm"));
  131. addOne(QString::fromLatin1("bn"));
  132. addOne(QString::fromLatin1("bo"));
  133. addOne(QString::fromLatin1("br"));
  134. addOne(QString::fromLatin1("bs"));
  135. addOne(QString::fromLatin1("bt"));
  136. addOne(QString::fromLatin1("bv"));
  137. addOne(QString::fromLatin1("bw"));
  138. addOne(QString::fromLatin1("by"));
  139. addOne(QString::fromLatin1("bz"));
  140. addOne(QString::fromLatin1("ca"));
  141. addOne(QString::fromLatin1("cc"));
  142. addOne(QString::fromLatin1("cd"));
  143. addOne(QString::fromLatin1("cf"));
  144. addOne(QString::fromLatin1("cg"));
  145. addOne(QString::fromLatin1("ch"));
  146. addOne(QString::fromLatin1("ci"));
  147. addOne(QString::fromLatin1("ck"));
  148. addOne(QString::fromLatin1("cl"));
  149. addOne(QString::fromLatin1("cm"));
  150. addOne(QString::fromLatin1("cn"));
  151. addOne(QString::fromLatin1("co"));
  152. addOne(QString::fromLatin1("cr"));
  153. addOne(QString::fromLatin1("cu"));
  154. addOne(QString::fromLatin1("cv"));
  155. addOne(QString::fromLatin1("cx"));
  156. addOne(QString::fromLatin1("cy"));
  157. addOne(QString::fromLatin1("cz"));
  158. addOne(QString::fromLatin1("de"));
  159. addOne(QString::fromLatin1("dj"));
  160. addOne(QString::fromLatin1("dk"));
  161. addOne(QString::fromLatin1("dm"));
  162. addOne(QString::fromLatin1("do"));
  163. addOne(QString::fromLatin1("dz"));
  164. addOne(QString::fromLatin1("ec"));
  165. addOne(QString::fromLatin1("ee"));
  166. addOne(QString::fromLatin1("eg"));
  167. addOne(QString::fromLatin1("eh"));
  168. addOne(QString::fromLatin1("er"));
  169. addOne(QString::fromLatin1("es"));
  170. addOne(QString::fromLatin1("et"));
  171. addOne(QString::fromLatin1("eu"));
  172. addOne(QString::fromLatin1("fi"));
  173. addOne(QString::fromLatin1("fj"));
  174. addOne(QString::fromLatin1("fk"));
  175. addOne(QString::fromLatin1("fm"));
  176. addOne(QString::fromLatin1("fo"));
  177. addOne(QString::fromLatin1("fr"));
  178. addOne(QString::fromLatin1("ga"));
  179. addOne(QString::fromLatin1("gd"));
  180. addOne(QString::fromLatin1("ge"));
  181. addOne(QString::fromLatin1("gf"));
  182. addOne(QString::fromLatin1("gg"));
  183. addOne(QString::fromLatin1("gh"));
  184. addOne(QString::fromLatin1("gi"));
  185. addOne(QString::fromLatin1("gl"));
  186. addOne(QString::fromLatin1("gm"));
  187. addOne(QString::fromLatin1("gn"));
  188. addOne(QString::fromLatin1("gp"));
  189. addOne(QString::fromLatin1("gq"));
  190. addOne(QString::fromLatin1("gr"));
  191. addOne(QString::fromLatin1("gs"));
  192. addOne(QString::fromLatin1("gt"));
  193. addOne(QString::fromLatin1("gu"));
  194. addOne(QString::fromLatin1("gw"));
  195. addOne(QString::fromLatin1("gy"));
  196. addOne(QString::fromLatin1("hk"));
  197. addOne(QString::fromLatin1("hm"));
  198. addOne(QString::fromLatin1("hn"));
  199. addOne(QString::fromLatin1("hr"));
  200. addOne(QString::fromLatin1("ht"));
  201. addOne(QString::fromLatin1("hu"));
  202. addOne(QString::fromLatin1("id"));
  203. addOne(QString::fromLatin1("ie"));
  204. addOne(QString::fromLatin1("il"));
  205. addOne(QString::fromLatin1("im"));
  206. addOne(QString::fromLatin1("in"));
  207. addOne(QString::fromLatin1("io"));
  208. addOne(QString::fromLatin1("iq"));
  209. addOne(QString::fromLatin1("ir"));
  210. addOne(QString::fromLatin1("is"));
  211. addOne(QString::fromLatin1("it"));
  212. addOne(QString::fromLatin1("je"));
  213. addOne(QString::fromLatin1("jm"));
  214. addOne(QString::fromLatin1("jo"));
  215. addOne(QString::fromLatin1("jp"));
  216. addOne(QString::fromLatin1("ke"));
  217. addOne(QString::fromLatin1("kg"));
  218. addOne(QString::fromLatin1("kh"));
  219. addOne(QString::fromLatin1("ki"));
  220. addOne(QString::fromLatin1("km"));
  221. addOne(QString::fromLatin1("kn"));
  222. addOne(QString::fromLatin1("kp"));
  223. addOne(QString::fromLatin1("kr"));
  224. addOne(QString::fromLatin1("kw"));
  225. addOne(QString::fromLatin1("ky"));
  226. addOne(QString::fromLatin1("kz"));
  227. addOne(QString::fromLatin1("la"));
  228. addOne(QString::fromLatin1("lb"));
  229. addOne(QString::fromLatin1("lc"));
  230. addOne(QString::fromLatin1("li"));
  231. addOne(QString::fromLatin1("lk"));
  232. addOne(QString::fromLatin1("lr"));
  233. addOne(QString::fromLatin1("ls"));
  234. addOne(QString::fromLatin1("lt"));
  235. addOne(QString::fromLatin1("lu"));
  236. addOne(QString::fromLatin1("lv"));
  237. addOne(QString::fromLatin1("ly"));
  238. addOne(QString::fromLatin1("ma"));
  239. addOne(QString::fromLatin1("mc"));
  240. addOne(QString::fromLatin1("md"));
  241. addOne(QString::fromLatin1("me"));
  242. addOne(QString::fromLatin1("mg"));
  243. addOne(QString::fromLatin1("mh"));
  244. addOne(QString::fromLatin1("mk"));
  245. addOne(QString::fromLatin1("ml"));
  246. addOne(QString::fromLatin1("mm"));
  247. addOne(QString::fromLatin1("mn"));
  248. addOne(QString::fromLatin1("mo"));
  249. addOne(QString::fromLatin1("mp"));
  250. addOne(QString::fromLatin1("mq"));
  251. addOne(QString::fromLatin1("mr"));
  252. addOne(QString::fromLatin1("ms"));
  253. addOne(QString::fromLatin1("mt"));
  254. addOne(QString::fromLatin1("mu"));
  255. addOne(QString::fromLatin1("mv"));
  256. addOne(QString::fromLatin1("mw"));
  257. addOne(QString::fromLatin1("mx"));
  258. addOne(QString::fromLatin1("my"));
  259. addOne(QString::fromLatin1("mz"));
  260. addOne(QString::fromLatin1("na"));
  261. addOne(QString::fromLatin1("nc"));
  262. addOne(QString::fromLatin1("ne"));
  263. addOne(QString::fromLatin1("nf"));
  264. addOne(QString::fromLatin1("ng"));
  265. addOne(QString::fromLatin1("ni"));
  266. addOne(QString::fromLatin1("nl"));
  267. addOne(QString::fromLatin1("no"));
  268. addOne(QString::fromLatin1("np"));
  269. addOne(QString::fromLatin1("nr"));
  270. addOne(QString::fromLatin1("nu"));
  271. addOne(QString::fromLatin1("nz"));
  272. addOne(QString::fromLatin1("om"));
  273. addOne(QString::fromLatin1("pa"));
  274. addOne(QString::fromLatin1("pe"));
  275. addOne(QString::fromLatin1("pf"));
  276. addOne(QString::fromLatin1("pg"));
  277. addOne(QString::fromLatin1("ph"));
  278. addOne(QString::fromLatin1("pk"));
  279. addOne(QString::fromLatin1("pl"));
  280. addOne(QString::fromLatin1("pm"));
  281. addOne(QString::fromLatin1("pn"));
  282. addOne(QString::fromLatin1("pr"));
  283. addOne(QString::fromLatin1("ps"));
  284. addOne(QString::fromLatin1("pt"));
  285. addOne(QString::fromLatin1("pw"));
  286. addOne(QString::fromLatin1("py"));
  287. addOne(QString::fromLatin1("qa"));
  288. addOne(QString::fromLatin1("re"));
  289. addOne(QString::fromLatin1("ro"));
  290. addOne(QString::fromLatin1("ru"));
  291. addOne(QString::fromLatin1("rs"));
  292. addOne(QString::fromLatin1("rw"));
  293. addOne(QString::fromLatin1("sa"));
  294. addOne(QString::fromLatin1("sb"));
  295. addOne(QString::fromLatin1("sc"));
  296. addOne(QString::fromLatin1("sd"));
  297. addOne(QString::fromLatin1("se"));
  298. addOne(QString::fromLatin1("sg"));
  299. addOne(QString::fromLatin1("sh"));
  300. addOne(QString::fromLatin1("si"));
  301. addOne(QString::fromLatin1("sj"));
  302. addOne(QString::fromLatin1("sk"));
  303. addOne(QString::fromLatin1("sl"));
  304. addOne(QString::fromLatin1("sm"));
  305. addOne(QString::fromLatin1("sn"));
  306. addOne(QString::fromLatin1("so"));
  307. addOne(QString::fromLatin1("sr"));
  308. addOne(QString::fromLatin1("ss"));
  309. addOne(QString::fromLatin1("st"));
  310. addOne(QString::fromLatin1("su"));
  311. addOne(QString::fromLatin1("sv"));
  312. addOne(QString::fromLatin1("sx"));
  313. addOne(QString::fromLatin1("sy"));
  314. addOne(QString::fromLatin1("sz"));
  315. addOne(QString::fromLatin1("tc"));
  316. addOne(QString::fromLatin1("td"));
  317. addOne(QString::fromLatin1("tf"));
  318. addOne(QString::fromLatin1("tg"));
  319. addOne(QString::fromLatin1("th"));
  320. addOne(QString::fromLatin1("tj"));
  321. addOne(QString::fromLatin1("tk"));
  322. addOne(QString::fromLatin1("tl"));
  323. addOne(QString::fromLatin1("tm"));
  324. addOne(QString::fromLatin1("tn"));
  325. addOne(QString::fromLatin1("to"));
  326. addOne(QString::fromLatin1("tp"));
  327. addOne(QString::fromLatin1("tr"));
  328. addOne(QString::fromLatin1("tt"));
  329. addOne(QString::fromLatin1("tv"));
  330. addOne(QString::fromLatin1("tw"));
  331. addOne(QString::fromLatin1("tz"));
  332. addOne(QString::fromLatin1("ua"));
  333. addOne(QString::fromLatin1("ug"));
  334. addOne(QString::fromLatin1("uk"));
  335. addOne(QString::fromLatin1("um"));
  336. addOne(QString::fromLatin1("us"));
  337. addOne(QString::fromLatin1("uy"));
  338. addOne(QString::fromLatin1("uz"));
  339. addOne(QString::fromLatin1("va"));
  340. addOne(QString::fromLatin1("vc"));
  341. addOne(QString::fromLatin1("ve"));
  342. addOne(QString::fromLatin1("vg"));
  343. addOne(QString::fromLatin1("vi"));
  344. addOne(QString::fromLatin1("vn"));
  345. addOne(QString::fromLatin1("vu"));
  346. addOne(QString::fromLatin1("wf"));
  347. addOne(QString::fromLatin1("ws"));
  348. addOne(QString::fromLatin1("ye"));
  349. addOne(QString::fromLatin1("yt"));
  350. addOne(QString::fromLatin1("yu"));
  351. addOne(QString::fromLatin1("za"));
  352. addOne(QString::fromLatin1("zm"));
  353. addOne(QString::fromLatin1("zw"));
  354. addOne(QString::fromLatin1("arpa"));
  355. addOne(QString::fromLatin1("aero"));
  356. addOne(QString::fromLatin1("asia"));
  357. addOne(QString::fromLatin1("biz"));
  358. addOne(QString::fromLatin1("cat"));
  359. addOne(QString::fromLatin1("com"));
  360. addOne(QString::fromLatin1("coop"));
  361. addOne(QString::fromLatin1("info"));
  362. addOne(QString::fromLatin1("int"));
  363. addOne(QString::fromLatin1("jobs"));
  364. addOne(QString::fromLatin1("mobi"));
  365. addOne(QString::fromLatin1("museum"));
  366. addOne(QString::fromLatin1("name"));
  367. addOne(QString::fromLatin1("net"));
  368. addOne(QString::fromLatin1("org"));
  369. addOne(QString::fromLatin1("post"));
  370. addOne(QString::fromLatin1("pro"));
  371. addOne(QString::fromLatin1("tel"));
  372. addOne(QString::fromLatin1("travel"));
  373. addOne(QString::fromLatin1("xxx"));
  374. addOne(QString::fromLatin1("edu"));
  375. addOne(QString::fromLatin1("gov"));
  376. addOne(QString::fromLatin1("mil"));
  377. addOne(QString::fromLatin1("local"));
  378. addOne(QString::fromLatin1("xn--lgbbat1ad8j"));
  379. addOne(QString::fromLatin1("xn--54b7fta0cc"));
  380. addOne(QString::fromLatin1("xn--fiqs8s"));
  381. addOne(QString::fromLatin1("xn--fiqz9s"));
  382. addOne(QString::fromLatin1("xn--wgbh1c"));
  383. addOne(QString::fromLatin1("xn--node"));
  384. addOne(QString::fromLatin1("xn--j6w193g"));
  385. addOne(QString::fromLatin1("xn--h2brj9c"));
  386. addOne(QString::fromLatin1("xn--mgbbh1a71e"));
  387. addOne(QString::fromLatin1("xn--fpcrj9c3d"));
  388. addOne(QString::fromLatin1("xn--gecrj9c"));
  389. addOne(QString::fromLatin1("xn--s9brj9c"));
  390. addOne(QString::fromLatin1("xn--xkc2dl3a5ee0h"));
  391. addOne(QString::fromLatin1("xn--45brj9c"));
  392. addOne(QString::fromLatin1("xn--mgba3a4f16a"));
  393. addOne(QString::fromLatin1("xn--mgbayh7gpa"));
  394. addOne(QString::fromLatin1("xn--80ao21a"));
  395. addOne(QString::fromLatin1("xn--mgbx4cd0ab"));
  396. addOne(QString::fromLatin1("xn--l1acc"));
  397. addOne(QString::fromLatin1("xn--mgbc0a9azcg"));
  398. addOne(QString::fromLatin1("xn--mgb9awbf"));
  399. addOne(QString::fromLatin1("xn--mgbai9azgqp6j"));
  400. addOne(QString::fromLatin1("xn--ygbi2ammx"));
  401. addOne(QString::fromLatin1("xn--wgbl6a"));
  402. addOne(QString::fromLatin1("xn--p1ai"));
  403. addOne(QString::fromLatin1("xn--mgberp4a5d4ar"));
  404. addOne(QString::fromLatin1("xn--90a3ac"));
  405. addOne(QString::fromLatin1("xn--yfro4i67o"));
  406. addOne(QString::fromLatin1("xn--clchc0ea0b2g2a9gcd"));
  407. addOne(QString::fromLatin1("xn--3e0b707e"));
  408. addOne(QString::fromLatin1("xn--fzc2c9e2c"));
  409. addOne(QString::fromLatin1("xn--xkc2al3hye2a"));
  410. addOne(QString::fromLatin1("xn--mgbtf8fl"));
  411. addOne(QString::fromLatin1("xn--kprw13d"));
  412. addOne(QString::fromLatin1("xn--kpry57d"));
  413. addOne(QString::fromLatin1("xn--o3cw4h"));
  414. addOne(QString::fromLatin1("xn--pgbs0dh"));
  415. addOne(QString::fromLatin1("xn--j1amh"));
  416. addOne(QString::fromLatin1("xn--mgbaam7a8h"));
  417. addOne(QString::fromLatin1("xn--mgb2ddes"));
  418. addOne(QString::fromLatin1("xn--ogbpf8fl"));
  419. addOne(QString::fromUtf8("\xd1\x80\xd1\x84"));
  420. return result;
  421. }
  422. // accent char list taken from https://github.com/aristus/accent-folding
  423. inline QChar RemoveOneAccent(uint32 code) {
  424. switch (code) {
  425. case 7834: return QChar(97);
  426. case 193: return QChar(97);
  427. case 225: return QChar(97);
  428. case 192: return QChar(97);
  429. case 224: return QChar(97);
  430. case 258: return QChar(97);
  431. case 259: return QChar(97);
  432. case 7854: return QChar(97);
  433. case 7855: return QChar(97);
  434. case 7856: return QChar(97);
  435. case 7857: return QChar(97);
  436. case 7860: return QChar(97);
  437. case 7861: return QChar(97);
  438. case 7858: return QChar(97);
  439. case 7859: return QChar(97);
  440. case 194: return QChar(97);
  441. case 226: return QChar(97);
  442. case 7844: return QChar(97);
  443. case 7845: return QChar(97);
  444. case 7846: return QChar(97);
  445. case 7847: return QChar(97);
  446. case 7850: return QChar(97);
  447. case 7851: return QChar(97);
  448. case 7848: return QChar(97);
  449. case 7849: return QChar(97);
  450. case 461: return QChar(97);
  451. case 462: return QChar(97);
  452. case 197: return QChar(97);
  453. case 229: return QChar(97);
  454. case 506: return QChar(97);
  455. case 507: return QChar(97);
  456. case 196: return QChar(97);
  457. case 228: return QChar(97);
  458. case 478: return QChar(97);
  459. case 479: return QChar(97);
  460. case 195: return QChar(97);
  461. case 227: return QChar(97);
  462. case 550: return QChar(97);
  463. case 551: return QChar(97);
  464. case 480: return QChar(97);
  465. case 481: return QChar(97);
  466. case 260: return QChar(97);
  467. case 261: return QChar(97);
  468. case 256: return QChar(97);
  469. case 257: return QChar(97);
  470. case 7842: return QChar(97);
  471. case 7843: return QChar(97);
  472. case 512: return QChar(97);
  473. case 513: return QChar(97);
  474. case 514: return QChar(97);
  475. case 515: return QChar(97);
  476. case 7840: return QChar(97);
  477. case 7841: return QChar(97);
  478. case 7862: return QChar(97);
  479. case 7863: return QChar(97);
  480. case 7852: return QChar(97);
  481. case 7853: return QChar(97);
  482. case 7680: return QChar(97);
  483. case 7681: return QChar(97);
  484. case 570: return QChar(97);
  485. case 11365: return QChar(97);
  486. case 508: return QChar(97);
  487. case 509: return QChar(97);
  488. case 482: return QChar(97);
  489. case 483: return QChar(97);
  490. case 7682: return QChar(98);
  491. case 7683: return QChar(98);
  492. case 7684: return QChar(98);
  493. case 7685: return QChar(98);
  494. case 7686: return QChar(98);
  495. case 7687: return QChar(98);
  496. case 579: return QChar(98);
  497. case 384: return QChar(98);
  498. case 7532: return QChar(98);
  499. case 385: return QChar(98);
  500. case 595: return QChar(98);
  501. case 386: return QChar(98);
  502. case 387: return QChar(98);
  503. case 262: return QChar(99);
  504. case 263: return QChar(99);
  505. case 264: return QChar(99);
  506. case 265: return QChar(99);
  507. case 268: return QChar(99);
  508. case 269: return QChar(99);
  509. case 266: return QChar(99);
  510. case 267: return QChar(99);
  511. case 199: return QChar(99);
  512. case 231: return QChar(99);
  513. case 7688: return QChar(99);
  514. case 7689: return QChar(99);
  515. case 571: return QChar(99);
  516. case 572: return QChar(99);
  517. case 391: return QChar(99);
  518. case 392: return QChar(99);
  519. case 597: return QChar(99);
  520. case 270: return QChar(100);
  521. case 271: return QChar(100);
  522. case 7690: return QChar(100);
  523. case 7691: return QChar(100);
  524. case 7696: return QChar(100);
  525. case 7697: return QChar(100);
  526. case 7692: return QChar(100);
  527. case 7693: return QChar(100);
  528. case 7698: return QChar(100);
  529. case 7699: return QChar(100);
  530. case 7694: return QChar(100);
  531. case 7695: return QChar(100);
  532. case 272: return QChar(100);
  533. case 273: return QChar(100);
  534. case 7533: return QChar(100);
  535. case 393: return QChar(100);
  536. case 598: return QChar(100);
  537. case 394: return QChar(100);
  538. case 599: return QChar(100);
  539. case 395: return QChar(100);
  540. case 396: return QChar(100);
  541. case 545: return QChar(100);
  542. case 240: return QChar(100);
  543. case 201: return QChar(101);
  544. case 399: return QChar(101);
  545. case 398: return QChar(101);
  546. case 477: return QChar(101);
  547. case 233: return QChar(101);
  548. case 200: return QChar(101);
  549. case 232: return QChar(101);
  550. case 276: return QChar(101);
  551. case 277: return QChar(101);
  552. case 202: return QChar(101);
  553. case 234: return QChar(101);
  554. case 7870: return QChar(101);
  555. case 7871: return QChar(101);
  556. case 7872: return QChar(101);
  557. case 7873: return QChar(101);
  558. case 7876: return QChar(101);
  559. case 7877: return QChar(101);
  560. case 7874: return QChar(101);
  561. case 7875: return QChar(101);
  562. case 282: return QChar(101);
  563. case 283: return QChar(101);
  564. case 203: return QChar(101);
  565. case 235: return QChar(101);
  566. case 7868: return QChar(101);
  567. case 7869: return QChar(101);
  568. case 278: return QChar(101);
  569. case 279: return QChar(101);
  570. case 552: return QChar(101);
  571. case 553: return QChar(101);
  572. case 7708: return QChar(101);
  573. case 7709: return QChar(101);
  574. case 280: return QChar(101);
  575. case 281: return QChar(101);
  576. case 274: return QChar(101);
  577. case 275: return QChar(101);
  578. case 7702: return QChar(101);
  579. case 7703: return QChar(101);
  580. case 7700: return QChar(101);
  581. case 7701: return QChar(101);
  582. case 7866: return QChar(101);
  583. case 7867: return QChar(101);
  584. case 516: return QChar(101);
  585. case 517: return QChar(101);
  586. case 518: return QChar(101);
  587. case 519: return QChar(101);
  588. case 7864: return QChar(101);
  589. case 7865: return QChar(101);
  590. case 7878: return QChar(101);
  591. case 7879: return QChar(101);
  592. case 7704: return QChar(101);
  593. case 7705: return QChar(101);
  594. case 7706: return QChar(101);
  595. case 7707: return QChar(101);
  596. case 582: return QChar(101);
  597. case 583: return QChar(101);
  598. case 602: return QChar(101);
  599. case 605: return QChar(101);
  600. case 7710: return QChar(102);
  601. case 7711: return QChar(102);
  602. case 7534: return QChar(102);
  603. case 401: return QChar(102);
  604. case 402: return QChar(102);
  605. case 500: return QChar(103);
  606. case 501: return QChar(103);
  607. case 286: return QChar(103);
  608. case 287: return QChar(103);
  609. case 284: return QChar(103);
  610. case 285: return QChar(103);
  611. case 486: return QChar(103);
  612. case 487: return QChar(103);
  613. case 288: return QChar(103);
  614. case 289: return QChar(103);
  615. case 290: return QChar(103);
  616. case 291: return QChar(103);
  617. case 7712: return QChar(103);
  618. case 7713: return QChar(103);
  619. case 484: return QChar(103);
  620. case 485: return QChar(103);
  621. case 403: return QChar(103);
  622. case 608: return QChar(103);
  623. case 292: return QChar(104);
  624. case 293: return QChar(104);
  625. case 542: return QChar(104);
  626. case 543: return QChar(104);
  627. case 7718: return QChar(104);
  628. case 7719: return QChar(104);
  629. case 7714: return QChar(104);
  630. case 7715: return QChar(104);
  631. case 7720: return QChar(104);
  632. case 7721: return QChar(104);
  633. case 7716: return QChar(104);
  634. case 7717: return QChar(104);
  635. case 7722: return QChar(104);
  636. case 7723: return QChar(104);
  637. case 817: return QChar(104);
  638. case 7830: return QChar(104);
  639. case 294: return QChar(104);
  640. case 295: return QChar(104);
  641. case 11367: return QChar(104);
  642. case 11368: return QChar(104);
  643. case 205: return QChar(105);
  644. case 237: return QChar(105);
  645. case 204: return QChar(105);
  646. case 236: return QChar(105);
  647. case 300: return QChar(105);
  648. case 301: return QChar(105);
  649. case 206: return QChar(105);
  650. case 238: return QChar(105);
  651. case 463: return QChar(105);
  652. case 464: return QChar(105);
  653. case 207: return QChar(105);
  654. case 239: return QChar(105);
  655. case 7726: return QChar(105);
  656. case 7727: return QChar(105);
  657. case 296: return QChar(105);
  658. case 297: return QChar(105);
  659. case 304: return QChar(105);
  660. case 302: return QChar(105);
  661. case 303: return QChar(105);
  662. case 298: return QChar(105);
  663. case 299: return QChar(105);
  664. case 7880: return QChar(105);
  665. case 7881: return QChar(105);
  666. case 520: return QChar(105);
  667. case 521: return QChar(105);
  668. case 522: return QChar(105);
  669. case 523: return QChar(105);
  670. case 7882: return QChar(105);
  671. case 7883: return QChar(105);
  672. case 7724: return QChar(105);
  673. case 7725: return QChar(105);
  674. case 305: return QChar(105);
  675. case 407: return QChar(105);
  676. case 616: return QChar(105);
  677. case 308: return QChar(106);
  678. case 309: return QChar(106);
  679. case 780: return QChar(106);
  680. case 496: return QChar(106);
  681. case 567: return QChar(106);
  682. case 584: return QChar(106);
  683. case 585: return QChar(106);
  684. case 669: return QChar(106);
  685. case 607: return QChar(106);
  686. case 644: return QChar(106);
  687. case 7728: return QChar(107);
  688. case 7729: return QChar(107);
  689. case 488: return QChar(107);
  690. case 489: return QChar(107);
  691. case 310: return QChar(107);
  692. case 311: return QChar(107);
  693. case 7730: return QChar(107);
  694. case 7731: return QChar(107);
  695. case 7732: return QChar(107);
  696. case 7733: return QChar(107);
  697. case 408: return QChar(107);
  698. case 409: return QChar(107);
  699. case 11369: return QChar(107);
  700. case 11370: return QChar(107);
  701. case 313: return QChar(97);
  702. case 314: return QChar(108);
  703. case 317: return QChar(108);
  704. case 318: return QChar(108);
  705. case 315: return QChar(108);
  706. case 316: return QChar(108);
  707. case 7734: return QChar(108);
  708. case 7735: return QChar(108);
  709. case 7736: return QChar(108);
  710. case 7737: return QChar(108);
  711. case 7740: return QChar(108);
  712. case 7741: return QChar(108);
  713. case 7738: return QChar(108);
  714. case 7739: return QChar(108);
  715. case 321: return QChar(108);
  716. case 322: return QChar(108);
  717. case 803: return QChar(108);
  718. case 319: return QChar(108);
  719. case 320: return QChar(108);
  720. case 573: return QChar(108);
  721. case 410: return QChar(108);
  722. case 11360: return QChar(108);
  723. case 11361: return QChar(108);
  724. case 11362: return QChar(108);
  725. case 619: return QChar(108);
  726. case 620: return QChar(108);
  727. case 621: return QChar(108);
  728. case 564: return QChar(108);
  729. case 7742: return QChar(109);
  730. case 7743: return QChar(109);
  731. case 7744: return QChar(109);
  732. case 7745: return QChar(109);
  733. case 7746: return QChar(109);
  734. case 7747: return QChar(109);
  735. case 625: return QChar(109);
  736. case 323: return QChar(110);
  737. case 324: return QChar(110);
  738. case 504: return QChar(110);
  739. case 505: return QChar(110);
  740. case 327: return QChar(110);
  741. case 328: return QChar(110);
  742. case 209: return QChar(110);
  743. case 241: return QChar(110);
  744. case 7748: return QChar(110);
  745. case 7749: return QChar(110);
  746. case 325: return QChar(110);
  747. case 326: return QChar(110);
  748. case 7750: return QChar(110);
  749. case 7751: return QChar(110);
  750. case 7754: return QChar(110);
  751. case 7755: return QChar(110);
  752. case 7752: return QChar(110);
  753. case 7753: return QChar(110);
  754. case 413: return QChar(110);
  755. case 626: return QChar(110);
  756. case 544: return QChar(110);
  757. case 414: return QChar(110);
  758. case 627: return QChar(110);
  759. case 565: return QChar(110);
  760. case 776: return QChar(116);
  761. case 211: return QChar(111);
  762. case 243: return QChar(111);
  763. case 210: return QChar(111);
  764. case 242: return QChar(111);
  765. case 334: return QChar(111);
  766. case 335: return QChar(111);
  767. case 212: return QChar(111);
  768. case 244: return QChar(111);
  769. case 7888: return QChar(111);
  770. case 7889: return QChar(111);
  771. case 7890: return QChar(111);
  772. case 7891: return QChar(111);
  773. case 7894: return QChar(111);
  774. case 7895: return QChar(111);
  775. case 7892: return QChar(111);
  776. case 7893: return QChar(111);
  777. case 465: return QChar(111);
  778. case 466: return QChar(111);
  779. case 214: return QChar(111);
  780. case 246: return QChar(111);
  781. case 554: return QChar(111);
  782. case 555: return QChar(111);
  783. case 336: return QChar(111);
  784. case 337: return QChar(111);
  785. case 213: return QChar(111);
  786. case 245: return QChar(111);
  787. case 7756: return QChar(111);
  788. case 7757: return QChar(111);
  789. case 7758: return QChar(111);
  790. case 7759: return QChar(111);
  791. case 556: return QChar(111);
  792. case 557: return QChar(111);
  793. case 558: return QChar(111);
  794. case 559: return QChar(111);
  795. case 560: return QChar(111);
  796. case 561: return QChar(111);
  797. case 216: return QChar(111);
  798. case 248: return QChar(111);
  799. case 510: return QChar(111);
  800. case 511: return QChar(111);
  801. case 490: return QChar(111);
  802. case 491: return QChar(111);
  803. case 492: return QChar(111);
  804. case 493: return QChar(111);
  805. case 332: return QChar(111);
  806. case 333: return QChar(111);
  807. case 7762: return QChar(111);
  808. case 7763: return QChar(111);
  809. case 7760: return QChar(111);
  810. case 7761: return QChar(111);
  811. case 7886: return QChar(111);
  812. case 7887: return QChar(111);
  813. case 524: return QChar(111);
  814. case 525: return QChar(111);
  815. case 526: return QChar(111);
  816. case 527: return QChar(111);
  817. case 416: return QChar(111);
  818. case 417: return QChar(111);
  819. case 7898: return QChar(111);
  820. case 7899: return QChar(111);
  821. case 7900: return QChar(111);
  822. case 7901: return QChar(111);
  823. case 7904: return QChar(111);
  824. case 7905: return QChar(111);
  825. case 7902: return QChar(111);
  826. case 7903: return QChar(111);
  827. case 7906: return QChar(111);
  828. case 7907: return QChar(111);
  829. case 7884: return QChar(111);
  830. case 7885: return QChar(111);
  831. case 7896: return QChar(111);
  832. case 7897: return QChar(111);
  833. case 415: return QChar(111);
  834. case 629: return QChar(111);
  835. case 7764: return QChar(112);
  836. case 7765: return QChar(112);
  837. case 7766: return QChar(112);
  838. case 7767: return QChar(112);
  839. case 11363: return QChar(112);
  840. case 420: return QChar(112);
  841. case 421: return QChar(112);
  842. case 771: return QChar(112);
  843. case 672: return QChar(113);
  844. case 586: return QChar(113);
  845. case 587: return QChar(113);
  846. case 340: return QChar(114);
  847. case 341: return QChar(114);
  848. case 344: return QChar(114);
  849. case 345: return QChar(114);
  850. case 7768: return QChar(114);
  851. case 7769: return QChar(114);
  852. case 342: return QChar(114);
  853. case 343: return QChar(114);
  854. case 528: return QChar(114);
  855. case 529: return QChar(114);
  856. case 530: return QChar(114);
  857. case 531: return QChar(114);
  858. case 7770: return QChar(114);
  859. case 7771: return QChar(114);
  860. case 7772: return QChar(114);
  861. case 7773: return QChar(114);
  862. case 7774: return QChar(114);
  863. case 7775: return QChar(114);
  864. case 588: return QChar(114);
  865. case 589: return QChar(114);
  866. case 7538: return QChar(114);
  867. case 636: return QChar(114);
  868. case 11364: return QChar(114);
  869. case 637: return QChar(114);
  870. case 638: return QChar(114);
  871. case 7539: return QChar(114);
  872. case 223: return QChar(115);
  873. case 346: return QChar(115);
  874. case 347: return QChar(115);
  875. case 7780: return QChar(115);
  876. case 7781: return QChar(115);
  877. case 348: return QChar(115);
  878. case 349: return QChar(115);
  879. case 352: return QChar(115);
  880. case 353: return QChar(115);
  881. case 7782: return QChar(115);
  882. case 7783: return QChar(115);
  883. case 7776: return QChar(115);
  884. case 7777: return QChar(115);
  885. case 7835: return QChar(115);
  886. case 350: return QChar(115);
  887. case 351: return QChar(115);
  888. case 7778: return QChar(115);
  889. case 7779: return QChar(115);
  890. case 7784: return QChar(115);
  891. case 7785: return QChar(115);
  892. case 536: return QChar(115);
  893. case 537: return QChar(115);
  894. case 642: return QChar(115);
  895. case 809: return QChar(115);
  896. case 222: return QChar(116);
  897. case 254: return QChar(116);
  898. case 356: return QChar(116);
  899. case 357: return QChar(116);
  900. case 7831: return QChar(116);
  901. case 7786: return QChar(116);
  902. case 7787: return QChar(116);
  903. case 354: return QChar(116);
  904. case 355: return QChar(116);
  905. case 7788: return QChar(116);
  906. case 7789: return QChar(116);
  907. case 538: return QChar(116);
  908. case 539: return QChar(116);
  909. case 7792: return QChar(116);
  910. case 7793: return QChar(116);
  911. case 7790: return QChar(116);
  912. case 7791: return QChar(116);
  913. case 358: return QChar(116);
  914. case 359: return QChar(116);
  915. case 574: return QChar(116);
  916. case 11366: return QChar(116);
  917. case 7541: return QChar(116);
  918. case 427: return QChar(116);
  919. case 428: return QChar(116);
  920. case 429: return QChar(116);
  921. case 430: return QChar(116);
  922. case 648: return QChar(116);
  923. case 566: return QChar(116);
  924. case 218: return QChar(117);
  925. case 250: return QChar(117);
  926. case 217: return QChar(117);
  927. case 249: return QChar(117);
  928. case 364: return QChar(117);
  929. case 365: return QChar(117);
  930. case 219: return QChar(117);
  931. case 251: return QChar(117);
  932. case 467: return QChar(117);
  933. case 468: return QChar(117);
  934. case 366: return QChar(117);
  935. case 367: return QChar(117);
  936. case 220: return QChar(117);
  937. case 252: return QChar(117);
  938. case 471: return QChar(117);
  939. case 472: return QChar(117);
  940. case 475: return QChar(117);
  941. case 476: return QChar(117);
  942. case 473: return QChar(117);
  943. case 474: return QChar(117);
  944. case 469: return QChar(117);
  945. case 470: return QChar(117);
  946. case 368: return QChar(117);
  947. case 369: return QChar(117);
  948. case 360: return QChar(117);
  949. case 361: return QChar(117);
  950. case 7800: return QChar(117);
  951. case 7801: return QChar(117);
  952. case 370: return QChar(117);
  953. case 371: return QChar(117);
  954. case 362: return QChar(117);
  955. case 363: return QChar(117);
  956. case 7802: return QChar(117);
  957. case 7803: return QChar(117);
  958. case 7910: return QChar(117);
  959. case 7911: return QChar(117);
  960. case 532: return QChar(117);
  961. case 533: return QChar(117);
  962. case 534: return QChar(117);
  963. case 535: return QChar(117);
  964. case 431: return QChar(117);
  965. case 432: return QChar(117);
  966. case 7912: return QChar(117);
  967. case 7913: return QChar(117);
  968. case 7914: return QChar(117);
  969. case 7915: return QChar(117);
  970. case 7918: return QChar(117);
  971. case 7919: return QChar(117);
  972. case 7916: return QChar(117);
  973. case 7917: return QChar(117);
  974. case 7920: return QChar(117);
  975. case 7921: return QChar(117);
  976. case 7908: return QChar(117);
  977. case 7909: return QChar(117);
  978. case 7794: return QChar(117);
  979. case 7795: return QChar(117);
  980. case 7798: return QChar(117);
  981. case 7799: return QChar(117);
  982. case 7796: return QChar(117);
  983. case 7797: return QChar(117);
  984. case 580: return QChar(117);
  985. case 649: return QChar(117);
  986. case 7804: return QChar(118);
  987. case 7805: return QChar(118);
  988. case 7806: return QChar(118);
  989. case 7807: return QChar(118);
  990. case 434: return QChar(118);
  991. case 651: return QChar(118);
  992. case 7810: return QChar(119);
  993. case 7811: return QChar(119);
  994. case 7808: return QChar(119);
  995. case 7809: return QChar(119);
  996. case 372: return QChar(119);
  997. case 373: return QChar(119);
  998. case 778: return QChar(121);
  999. case 7832: return QChar(119);
  1000. case 7812: return QChar(119);
  1001. case 7813: return QChar(119);
  1002. case 7814: return QChar(119);
  1003. case 7815: return QChar(119);
  1004. case 7816: return QChar(119);
  1005. case 7817: return QChar(119);
  1006. case 7820: return QChar(120);
  1007. case 7821: return QChar(120);
  1008. case 7818: return QChar(120);
  1009. case 7819: return QChar(120);
  1010. case 221: return QChar(121);
  1011. case 253: return QChar(121);
  1012. case 7922: return QChar(121);
  1013. case 7923: return QChar(121);
  1014. case 374: return QChar(121);
  1015. case 375: return QChar(121);
  1016. case 7833: return QChar(121);
  1017. case 376: return QChar(121);
  1018. case 255: return QChar(121);
  1019. case 7928: return QChar(121);
  1020. case 7929: return QChar(121);
  1021. case 7822: return QChar(121);
  1022. case 7823: return QChar(121);
  1023. case 562: return QChar(121);
  1024. case 563: return QChar(121);
  1025. case 7926: return QChar(121);
  1026. case 7927: return QChar(121);
  1027. case 7924: return QChar(121);
  1028. case 7925: return QChar(121);
  1029. case 655: return QChar(121);
  1030. case 590: return QChar(121);
  1031. case 591: return QChar(121);
  1032. case 435: return QChar(121);
  1033. case 436: return QChar(121);
  1034. case 377: return QChar(122);
  1035. case 378: return QChar(122);
  1036. case 7824: return QChar(122);
  1037. case 7825: return QChar(122);
  1038. case 381: return QChar(122);
  1039. case 382: return QChar(122);
  1040. case 379: return QChar(122);
  1041. case 380: return QChar(122);
  1042. case 7826: return QChar(122);
  1043. case 7827: return QChar(122);
  1044. case 7828: return QChar(122);
  1045. case 7829: return QChar(122);
  1046. case 437: return QChar(122);
  1047. case 438: return QChar(122);
  1048. case 548: return QChar(122);
  1049. case 549: return QChar(122);
  1050. case 656: return QChar(122);
  1051. case 657: return QChar(122);
  1052. case 11371: return QChar(122);
  1053. case 11372: return QChar(122);
  1054. case 494: return QChar(122);
  1055. case 495: return QChar(122);
  1056. case 442: return QChar(122);
  1057. case 65298: return QChar(50);
  1058. case 65302: return QChar(54);
  1059. case 65314: return QChar(66);
  1060. case 65318: return QChar(70);
  1061. case 65322: return QChar(74);
  1062. case 65326: return QChar(78);
  1063. case 65330: return QChar(82);
  1064. case 65334: return QChar(86);
  1065. case 65338: return QChar(90);
  1066. case 65346: return QChar(98);
  1067. case 65350: return QChar(102);
  1068. case 65354: return QChar(106);
  1069. case 65358: return QChar(110);
  1070. case 65362: return QChar(114);
  1071. case 65366: return QChar(118);
  1072. case 65370: return QChar(122);
  1073. case 65297: return QChar(49);
  1074. case 65301: return QChar(53);
  1075. case 65305: return QChar(57);
  1076. case 65313: return QChar(65);
  1077. case 65317: return QChar(69);
  1078. case 65321: return QChar(73);
  1079. case 65325: return QChar(77);
  1080. case 65329: return QChar(81);
  1081. case 65333: return QChar(85);
  1082. case 65337: return QChar(89);
  1083. case 65345: return QChar(97);
  1084. case 65349: return QChar(101);
  1085. case 65353: return QChar(105);
  1086. case 65357: return QChar(109);
  1087. case 65361: return QChar(113);
  1088. case 65365: return QChar(117);
  1089. case 65369: return QChar(121);
  1090. case 65296: return QChar(48);
  1091. case 65300: return QChar(52);
  1092. case 65304: return QChar(56);
  1093. case 65316: return QChar(68);
  1094. case 65320: return QChar(72);
  1095. case 65324: return QChar(76);
  1096. case 65328: return QChar(80);
  1097. case 65332: return QChar(84);
  1098. case 65336: return QChar(88);
  1099. case 65348: return QChar(100);
  1100. case 65352: return QChar(104);
  1101. case 65356: return QChar(108);
  1102. case 65360: return QChar(112);
  1103. case 65364: return QChar(116);
  1104. case 65368: return QChar(120);
  1105. case 65299: return QChar(51);
  1106. case 65303: return QChar(55);
  1107. case 65315: return QChar(67);
  1108. case 65319: return QChar(71);
  1109. case 65323: return QChar(75);
  1110. case 65327: return QChar(79);
  1111. case 65331: return QChar(83);
  1112. case 65335: return QChar(87);
  1113. case 65347: return QChar(99);
  1114. case 65351: return QChar(103);
  1115. case 65355: return QChar(107);
  1116. case 65359: return QChar(111);
  1117. case 65363: return QChar(115);
  1118. case 65367: return QChar(119);
  1119. case 1105: return QChar(1077);
  1120. default:
  1121. break;
  1122. }
  1123. return QChar(0);
  1124. }
  1125. const QRegularExpression &RegExpWordSplit() {
  1126. static const auto result = QRegularExpression(QString::fromLatin1("[\\@\\s\\-\\+\\(\\)\\[\\]\\{\\}\\<\\>\\,\\.\\:\\!\\_\\;\\\"\\'\\x0]"));
  1127. return result;
  1128. }
  1129. [[nodiscard]] QString ExpandCustomLinks(const TextWithTags &text) {
  1130. const auto entities = ConvertTextTagsToEntities(text.tags);
  1131. auto &&urls = ranges::make_subrange(
  1132. entities.begin(),
  1133. entities.end()
  1134. ) | ranges::views::filter([](const EntityInText &entity) {
  1135. return entity.type() == EntityType::CustomUrl;
  1136. });
  1137. const auto &original = text.text;
  1138. if (urls.begin() == urls.end()) {
  1139. return original;
  1140. }
  1141. auto result = QString();
  1142. auto offset = 0;
  1143. for (const auto &entity : urls) {
  1144. const auto till = entity.offset() + entity.length();
  1145. if (till > offset) {
  1146. result.append(base::StringViewMid(original, offset, till - offset));
  1147. }
  1148. result.append(qstr(" (")).append(entity.data()).append(')');
  1149. offset = till;
  1150. }
  1151. if (original.size() > offset) {
  1152. result.append(base::StringViewMid(original, offset));
  1153. }
  1154. return result;
  1155. }
  1156. std::unique_ptr<QMimeData> MimeDataFromText(
  1157. TextWithTags &&text,
  1158. const QString &expanded) {
  1159. if (expanded.isEmpty()) {
  1160. return nullptr;
  1161. }
  1162. auto result = std::make_unique<QMimeData>();
  1163. result->setText(expanded);
  1164. if (!text.tags.isEmpty()) {
  1165. for (auto &tag : text.tags) {
  1166. tag.id = Ui::Integration::Instance().convertTagToMimeTag(tag.id);
  1167. }
  1168. result->setData(
  1169. TextUtilities::TagsTextMimeType(),
  1170. text.text.toUtf8());
  1171. result->setData(
  1172. TextUtilities::TagsMimeType(),
  1173. TextUtilities::SerializeTags(text.tags));
  1174. }
  1175. return result;
  1176. }
  1177. bool IsSentencePartEnd(QChar ch) {
  1178. return (ch == ',')
  1179. || (ch == ':')
  1180. || (ch == ';');
  1181. }
  1182. bool IsSentenceEnd(QChar ch) {
  1183. return (ch == '.')
  1184. || (ch == '?')
  1185. || (ch == '!');
  1186. }
  1187. } // namespace
  1188. const QRegularExpression &RegExpMailNameAtEnd() {
  1189. static const auto result = CreateRegExp(ExpressionMailNameAtEnd());
  1190. return result;
  1191. }
  1192. const QRegularExpression &RegExpHashtag(bool allowWithMention) {
  1193. if (allowWithMention) {
  1194. static const auto result = CreateRegExp(ExpressionHashtagMention());
  1195. return result;
  1196. } else {
  1197. static const auto result = CreateRegExp(ExpressionHashtag());
  1198. return result;
  1199. }
  1200. }
  1201. const QRegularExpression &RegExpHashtagExclude() {
  1202. static const auto result = CreateRegExp(ExpressionHashtagExclude());
  1203. return result;
  1204. }
  1205. const QRegularExpression &RegExpMention() {
  1206. static const auto result = CreateRegExp(ExpressionMention());
  1207. return result;
  1208. }
  1209. const QRegularExpression &RegExpBotCommand() {
  1210. static const auto result = CreateRegExp(ExpressionBotCommand());
  1211. return result;
  1212. }
  1213. const QRegularExpression &RegExpDigitsExclude() {
  1214. static const auto result = QRegularExpression("[^\\d]");
  1215. return result;
  1216. }
  1217. QString MarkdownBoldGoodBefore() {
  1218. return SeparatorsBold();
  1219. }
  1220. QString MarkdownBoldBadAfter() {
  1221. return QString::fromLatin1("*");
  1222. }
  1223. QString MarkdownItalicGoodBefore() {
  1224. return SeparatorsItalic();
  1225. }
  1226. QString MarkdownItalicBadAfter() {
  1227. return QString::fromLatin1("_");
  1228. }
  1229. QString MarkdownStrikeOutGoodBefore() {
  1230. return SeparatorsStrikeOut();
  1231. }
  1232. QString MarkdownStrikeOutBadAfter() {
  1233. return QString::fromLatin1("~");
  1234. }
  1235. QString MarkdownCodeGoodBefore() {
  1236. return SeparatorsMono();
  1237. }
  1238. QString MarkdownCodeBadAfter() {
  1239. return QString::fromLatin1("`\n\r");
  1240. }
  1241. QString MarkdownPreGoodBefore() {
  1242. return SeparatorsMono();
  1243. }
  1244. QString MarkdownPreBadAfter() {
  1245. return QString::fromLatin1("`");
  1246. }
  1247. QString MarkdownSpoilerGoodBefore() {
  1248. return SeparatorsSpoiler();
  1249. }
  1250. QString MarkdownSpoilerBadAfter() {
  1251. return QString::fromLatin1("|");
  1252. }
  1253. bool IsValidProtocol(const QString &protocol) {
  1254. static const auto list = CreateValidProtocols();
  1255. return list.contains(base::crc32(protocol.constData(), protocol.size() * sizeof(QChar)));
  1256. }
  1257. bool IsValidTopDomain(const QString &protocol) {
  1258. static const auto list = CreateValidTopDomains();
  1259. return list.contains(base::crc32(protocol.constData(), protocol.size() * sizeof(QChar)));
  1260. }
  1261. QString EscapeForRichParsing(const QString &text) {
  1262. QString result;
  1263. result.reserve(text.size());
  1264. auto s = text.constData(), ch = s;
  1265. for (const QChar *e = s + text.size(); ch != e; ++ch) {
  1266. // if (*ch == TextCommand) {
  1267. // if (ch > s) result.append(s, ch - s);
  1268. // result.append(QChar::Space);
  1269. // s = ch + 1;
  1270. // continue;
  1271. // }
  1272. if (ch->unicode() == '\\' || ch->unicode() == '[') {
  1273. if (ch > s) result.append(s, ch - s);
  1274. result.append('\\');
  1275. s = ch;
  1276. continue;
  1277. }
  1278. }
  1279. if (ch > s) result.append(s, ch - s);
  1280. return result;
  1281. }
  1282. QString SingleLine(const QString &text) {
  1283. auto result = text;
  1284. auto s = text.unicode(), e = text.unicode() + text.size();
  1285. // Trim.
  1286. while (s < e && IsTrimmed(*s)) {
  1287. ++s;
  1288. }
  1289. while (s < e && IsTrimmed(*(e - 1))) {
  1290. --e;
  1291. }
  1292. if (e - s != text.size()) {
  1293. result = text.mid(s - text.unicode(), e - s);
  1294. }
  1295. for (auto ch = s; ch != e; ++ch) {
  1296. if (IsNewline(*ch)/* || *ch == TextCommand*/) {
  1297. result[int(ch - s)] = QChar::Space;
  1298. }
  1299. }
  1300. return result;
  1301. }
  1302. TextWithEntities SingleLine(const TextWithEntities &text) {
  1303. auto copy = text;
  1304. Trim(copy);
  1305. return { SingleLine(copy.text), std::move(copy.entities) };
  1306. }
  1307. QString RemoveAccents(const QString &text) {
  1308. auto result = text;
  1309. auto copying = false;
  1310. auto i = 0;
  1311. for (auto s = text.unicode(), ch = s, e = text.unicode() + text.size(); ch != e; ++ch, ++i) {
  1312. if (ch->unicode() < 128) {
  1313. if (copying) result[i] = *ch;
  1314. continue;
  1315. }
  1316. if (IsDiacritic(*ch)) {
  1317. copying = true;
  1318. --i;
  1319. continue;
  1320. }
  1321. if (ch->isHighSurrogate() && ch + 1 < e && (ch + 1)->isLowSurrogate()) {
  1322. auto noAccent = RemoveOneAccent(QChar::surrogateToUcs4(*ch, *(ch + 1)));
  1323. if (noAccent.unicode() > 0) {
  1324. copying = true;
  1325. result[i] = noAccent;
  1326. } else {
  1327. if (copying) result[i] = *ch;
  1328. ++ch, ++i;
  1329. if (copying) result[i] = *ch;
  1330. }
  1331. } else {
  1332. auto noAccent = RemoveOneAccent(ch->unicode());
  1333. if (noAccent.unicode() > 0 && noAccent != *ch) {
  1334. result[i] = noAccent;
  1335. } else if (copying) {
  1336. result[i] = *ch;
  1337. }
  1338. }
  1339. }
  1340. return (i < result.size()) ? result.mid(0, i) : result;
  1341. }
  1342. QString RemoveEmoji(const QString &text) {
  1343. auto result = QString();
  1344. result.reserve(text.size());
  1345. auto begin = text.data();
  1346. const auto end = begin + text.size();
  1347. while (begin != end) {
  1348. auto length = 0;
  1349. if (Ui::Emoji::Find(begin, end, &length)) {
  1350. begin += length;
  1351. } else {
  1352. result.append(*begin++);
  1353. }
  1354. }
  1355. return result;
  1356. }
  1357. QString NameSortKey(const QString &text) {
  1358. return RemoveAccents(text).toLower();
  1359. }
  1360. QStringList PrepareSearchWords(
  1361. const QString &query,
  1362. const QRegularExpression *SplitterOverride) {
  1363. auto clean = RemoveAccents(query.trimmed().toLower());
  1364. auto result = QStringList();
  1365. if (!clean.isEmpty()) {
  1366. auto list = clean.split(SplitterOverride
  1367. ? *SplitterOverride
  1368. : RegExpWordSplit(),
  1369. Qt::SkipEmptyParts);
  1370. result.reserve(list.size());
  1371. for (const auto &word : std::as_const(list)) {
  1372. auto trimmed = word.trimmed();
  1373. if (!trimmed.isEmpty()) {
  1374. result.push_back(trimmed);
  1375. }
  1376. }
  1377. }
  1378. return result;
  1379. }
  1380. bool CutPart(TextWithEntities &sending, TextWithEntities &left, int32 limit) {
  1381. Expects(limit > 0);
  1382. if (left.text.isEmpty()) {
  1383. return false;
  1384. }
  1385. int32 currentEntity = 0, goodEntity = currentEntity, entityCount = left.entities.size();
  1386. bool goodInEntity = false, goodCanBreakEntity = false;
  1387. int32 s = 0, half = limit / 2, goodLevel = 0;
  1388. for (const QChar *start = left.text.constData(), *ch = start, *end = left.text.constEnd(), *good = ch; ch != end; ++ch, ++s) {
  1389. while (currentEntity < entityCount && ch >= start + left.entities[currentEntity].offset() + left.entities[currentEntity].length()) {
  1390. ++currentEntity;
  1391. }
  1392. if (s > half) {
  1393. bool inEntity = (currentEntity < entityCount) && (ch > start + left.entities[currentEntity].offset()) && (ch < start + left.entities[currentEntity].offset() + left.entities[currentEntity].length());
  1394. EntityType entityType = (currentEntity < entityCount) ? left.entities[currentEntity].type() : EntityType::Invalid;
  1395. bool canBreakEntity = (entityType == EntityType::Pre)
  1396. || (entityType == EntityType::Blockquote)
  1397. || (entityType == EntityType::Code); // #TODO entities
  1398. int32 noEntityLevel = inEntity ? 0 : 1;
  1399. auto markGoodAsLevel = [&](int newLevel) {
  1400. if (goodLevel > newLevel) {
  1401. return;
  1402. }
  1403. goodLevel = newLevel;
  1404. good = ch;
  1405. goodEntity = currentEntity;
  1406. goodInEntity = inEntity;
  1407. goodCanBreakEntity = canBreakEntity;
  1408. };
  1409. if (inEntity && !canBreakEntity) {
  1410. markGoodAsLevel(0);
  1411. } else {
  1412. if (IsNewline(*ch)) {
  1413. if (inEntity) {
  1414. if (ch + 1 < end && IsNewline(*(ch + 1))) {
  1415. markGoodAsLevel(12);
  1416. } else {
  1417. markGoodAsLevel(11);
  1418. }
  1419. } else if (ch + 1 < end && IsNewline(*(ch + 1))) {
  1420. markGoodAsLevel(15);
  1421. } else if (currentEntity < entityCount
  1422. && ch + 1 == start + left.entities[currentEntity].offset()
  1423. && (left.entities[currentEntity].type() == EntityType::Pre
  1424. || left.entities[currentEntity].type() == EntityType::Blockquote)) {
  1425. markGoodAsLevel(14);
  1426. } else if (currentEntity > 0
  1427. && ch == start + left.entities[currentEntity - 1].offset() + left.entities[currentEntity - 1].length()
  1428. && (left.entities[currentEntity - 1].type() == EntityType::Pre
  1429. || left.entities[currentEntity - 1].type() == EntityType::Blockquote)) {
  1430. markGoodAsLevel(14);
  1431. } else {
  1432. markGoodAsLevel(13);
  1433. }
  1434. } else if (IsSpace(*ch)) {
  1435. if (IsSentenceEnd(*(ch - 1))) {
  1436. markGoodAsLevel(9 + noEntityLevel);
  1437. } else if (IsSentencePartEnd(*(ch - 1))) {
  1438. markGoodAsLevel(7 + noEntityLevel);
  1439. } else {
  1440. markGoodAsLevel(5 + noEntityLevel);
  1441. }
  1442. } else if (IsWordSeparator(*(ch - 1))) {
  1443. markGoodAsLevel(3 + noEntityLevel);
  1444. } else {
  1445. markGoodAsLevel(1 + noEntityLevel);
  1446. }
  1447. }
  1448. }
  1449. int elen = 0;
  1450. if (Ui::Emoji::Find(ch, end, &elen)) {
  1451. for (int i = 0; i < elen; ++i, ++ch, ++s) {
  1452. if (ch->isHighSurrogate() && i + 1 < elen && (ch + 1)->isLowSurrogate()) {
  1453. ++ch;
  1454. ++i;
  1455. }
  1456. }
  1457. --ch;
  1458. --s;
  1459. } else if (ch->isHighSurrogate() && ch + 1 < end && (ch + 1)->isLowSurrogate()) {
  1460. ++ch;
  1461. }
  1462. if (s >= limit) {
  1463. sending.text = left.text.mid(0, good - start);
  1464. left.text = left.text.mid(good - start);
  1465. if (goodInEntity) {
  1466. if (goodCanBreakEntity) {
  1467. sending.entities = left.entities.mid(0, goodEntity + 1);
  1468. sending.entities.back().updateTextEnd(good - start);
  1469. left.entities = left.entities.mid(goodEntity);
  1470. for (auto &entity : left.entities) {
  1471. entity.shiftLeft(good - start);
  1472. }
  1473. } else {
  1474. sending.entities = left.entities.mid(0, goodEntity);
  1475. left.entities = left.entities.mid(goodEntity + 1);
  1476. }
  1477. } else {
  1478. sending.entities = left.entities.mid(0, goodEntity);
  1479. left.entities = left.entities.mid(goodEntity);
  1480. for (auto &entity : left.entities) {
  1481. entity.shiftLeft(good - start);
  1482. }
  1483. }
  1484. return true;
  1485. }
  1486. }
  1487. sending = base::take(left);
  1488. return true;
  1489. }
  1490. MentionNameFields MentionNameDataToFields(QStringView data) {
  1491. const auto components = data.split('.');
  1492. if (components.size() != 2) {
  1493. return {};
  1494. }
  1495. const auto parts = components[1].split(':');
  1496. if (parts.size() != 2) {
  1497. return {};
  1498. }
  1499. return {
  1500. .selfId = parts[1].toULongLong(),
  1501. .userId = components[0].toULongLong(),
  1502. .accessHash = parts[0].toULongLong(),
  1503. };
  1504. }
  1505. QString MentionNameDataFromFields(const MentionNameFields &fields) {
  1506. return u"%1.%2:%3"_q
  1507. .arg(fields.userId)
  1508. .arg(fields.accessHash)
  1509. .arg(fields.selfId);
  1510. }
  1511. TextWithEntities ParseEntities(const QString &text, int32 flags) {
  1512. auto result = TextWithEntities{ text, EntitiesInText() };
  1513. ParseEntities(result, flags);
  1514. return result;
  1515. }
  1516. // Some code is duplicated in message_field.cpp!
  1517. void ParseEntities(TextWithEntities &result, int32 flags) {
  1518. constexpr auto kNotFound = std::numeric_limits<int>::max();
  1519. auto newEntities = EntitiesInText();
  1520. bool withHashtags = (flags & TextParseHashtags);
  1521. bool withMentions = (flags & TextParseMentions);
  1522. bool withBotCommands = (flags & TextParseBotCommands);
  1523. int existingEntityIndex = 0, existingEntitiesCount = result.entities.size();
  1524. int existingEntityEnd = 0;
  1525. int32 len = result.text.size();
  1526. const auto start = result.text.constData();
  1527. const auto end = start + result.text.size();
  1528. for (int32 offset = 0, matchOffset = offset, mentionSkip = 0; offset < len;) {
  1529. auto mDomain = qthelp::RegExpDomain().match(result.text, matchOffset);
  1530. auto mExplicitDomain = qthelp::RegExpDomainExplicit().match(result.text, matchOffset);
  1531. auto mHashtag = withHashtags ? RegExpHashtag(true).match(result.text, matchOffset) : QRegularExpressionMatch();
  1532. auto mMention = withMentions ? RegExpMention().match(result.text, qMax(mentionSkip, matchOffset)) : QRegularExpressionMatch();
  1533. auto mBotCommand = withBotCommands ? RegExpBotCommand().match(result.text, matchOffset) : QRegularExpressionMatch();
  1534. auto lnkType = EntityType::Url;
  1535. int32 lnkStart = 0, lnkLength = 0;
  1536. auto domainStart = mDomain.hasMatch() ? mDomain.capturedStart() : kNotFound,
  1537. domainEnd = mDomain.hasMatch() ? mDomain.capturedEnd() : kNotFound,
  1538. explicitDomainStart = mExplicitDomain.hasMatch() ? mExplicitDomain.capturedStart() : kNotFound,
  1539. explicitDomainEnd = mExplicitDomain.hasMatch() ? mExplicitDomain.capturedEnd() : kNotFound,
  1540. hashtagStart = mHashtag.hasMatch() ? mHashtag.capturedStart() : kNotFound,
  1541. hashtagEnd = mHashtag.hasMatch() ? mHashtag.capturedEnd() : kNotFound,
  1542. mentionStart = mMention.hasMatch() ? mMention.capturedStart() : kNotFound,
  1543. mentionEnd = mMention.hasMatch() ? mMention.capturedEnd() : kNotFound,
  1544. botCommandStart = mBotCommand.hasMatch() ? mBotCommand.capturedStart() : kNotFound,
  1545. botCommandEnd = mBotCommand.hasMatch() ? mBotCommand.capturedEnd() : kNotFound;
  1546. auto hashtagIgnore = false;
  1547. auto mentionIgnore = false;
  1548. if (mHashtag.hasMatch()) {
  1549. if (!mHashtag.capturedView(1).isEmpty()) {
  1550. ++hashtagStart;
  1551. }
  1552. if (!mHashtag.capturedView(3).isEmpty()) {
  1553. --hashtagEnd;
  1554. }
  1555. if (RegExpHashtagExclude().match(
  1556. result.text.mid(
  1557. hashtagStart + 1,
  1558. hashtagEnd - hashtagStart - 1)).hasMatch()) {
  1559. hashtagIgnore = true;
  1560. }
  1561. }
  1562. while (mMention.hasMatch()) {
  1563. if (!mMention.capturedView(1).isEmpty()) {
  1564. ++mentionStart;
  1565. }
  1566. if (!mMention.capturedView(2).isEmpty()) {
  1567. --mentionEnd;
  1568. }
  1569. if (!(start + mentionStart + 1)->isLetter() || !(start + mentionEnd - 1)->isLetterOrNumber()) {
  1570. mentionSkip = mentionEnd;
  1571. if (mentionSkip < len
  1572. && (start + mentionSkip)->isLowSurrogate()) {
  1573. ++mentionSkip;
  1574. }
  1575. mMention = RegExpMention().match(result.text, qMax(mentionSkip, matchOffset));
  1576. if (mMention.hasMatch()) {
  1577. mentionStart = mMention.capturedStart();
  1578. mentionEnd = mMention.capturedEnd();
  1579. } else {
  1580. mentionIgnore = true;
  1581. }
  1582. } else {
  1583. break;
  1584. }
  1585. }
  1586. if (mBotCommand.hasMatch()) {
  1587. if (!mBotCommand.capturedView(1).isEmpty()) {
  1588. ++botCommandStart;
  1589. }
  1590. if (!mBotCommand.capturedView(3).isEmpty()) {
  1591. --botCommandEnd;
  1592. }
  1593. }
  1594. if (!mDomain.hasMatch()
  1595. && !mExplicitDomain.hasMatch()
  1596. && !mHashtag.hasMatch()
  1597. && !mMention.hasMatch()
  1598. && !mBotCommand.hasMatch()) {
  1599. break;
  1600. }
  1601. if (explicitDomainStart < domainStart) {
  1602. domainStart = explicitDomainStart;
  1603. domainEnd = explicitDomainEnd;
  1604. mDomain = mExplicitDomain;
  1605. }
  1606. if (mentionStart < hashtagStart
  1607. && mentionStart < domainStart
  1608. && mentionStart < botCommandStart) {
  1609. if (mentionIgnore) {
  1610. offset = matchOffset = mentionEnd;
  1611. continue;
  1612. }
  1613. lnkType = EntityType::Mention;
  1614. lnkStart = mentionStart;
  1615. lnkLength = mentionEnd - mentionStart;
  1616. } else if (hashtagStart < domainStart
  1617. && hashtagStart < botCommandStart) {
  1618. if (hashtagIgnore) {
  1619. offset = matchOffset = hashtagEnd;
  1620. continue;
  1621. }
  1622. lnkType = EntityType::Hashtag;
  1623. lnkStart = hashtagStart;
  1624. lnkLength = hashtagEnd - hashtagStart;
  1625. } else if (botCommandStart < domainStart) {
  1626. lnkType = EntityType::BotCommand;
  1627. lnkStart = botCommandStart;
  1628. lnkLength = botCommandEnd - botCommandStart;
  1629. } else {
  1630. auto protocol = mDomain.captured(1).toLower();
  1631. auto topDomain = mDomain.captured(3).toLower();
  1632. auto isProtocolValid = protocol.isEmpty() || IsValidProtocol(protocol);
  1633. auto isTopDomainValid = !protocol.isEmpty() || IsValidTopDomain(topDomain);
  1634. if (protocol.isEmpty() && domainStart > offset + 1 && *(start + domainStart - 1) == QChar('@')) {
  1635. auto forMailName = result.text.mid(offset, domainStart - offset - 1);
  1636. auto mMailName = RegExpMailNameAtEnd().match(forMailName);
  1637. if (mMailName.hasMatch()) {
  1638. auto mailStart = offset + mMailName.capturedStart();
  1639. if (mailStart < offset) {
  1640. mailStart = offset;
  1641. }
  1642. lnkType = EntityType::Email;
  1643. lnkStart = mailStart;
  1644. lnkLength = domainEnd - mailStart;
  1645. }
  1646. }
  1647. if (lnkType == EntityType::Url && !lnkLength) {
  1648. if (!isProtocolValid || !isTopDomainValid) {
  1649. matchOffset = domainEnd;
  1650. continue;
  1651. }
  1652. lnkStart = domainStart;
  1653. QStack<const QChar*> parenth;
  1654. const QChar *domainEnd = start + mDomain.capturedEnd(), *p = domainEnd;
  1655. for (; p < end; ++p) {
  1656. QChar ch(*p);
  1657. if (IsLinkEnd(ch)) {
  1658. break; // link finished
  1659. } else if (IsAlmostLinkEnd(ch)) {
  1660. const QChar *endTest = p + 1;
  1661. while (endTest < end && IsAlmostLinkEnd(*endTest)) {
  1662. ++endTest;
  1663. }
  1664. if (endTest >= end || IsLinkEnd(*endTest)) {
  1665. break; // link finished at p
  1666. }
  1667. p = endTest;
  1668. ch = *p;
  1669. }
  1670. if (ch == '(' || ch == '[' || ch == '{' || ch == '<') {
  1671. parenth.push(p);
  1672. } else if (ch == ')' || ch == ']' || ch == '}' || ch == '>') {
  1673. if (parenth.isEmpty()) break;
  1674. const QChar *q = parenth.pop(), open(*q);
  1675. if ((ch == ')' && open != '(') || (ch == ']' && open != '[') || (ch == '}' && open != '{') || (ch == '>' && open != '<')) {
  1676. p = q;
  1677. break;
  1678. }
  1679. }
  1680. }
  1681. if (p > domainEnd) { // check, that domain ended
  1682. if (domainEnd->unicode() != '/' && domainEnd->unicode() != '?') {
  1683. matchOffset = domainEnd - start;
  1684. continue;
  1685. }
  1686. }
  1687. lnkLength = (p - start) - lnkStart;
  1688. }
  1689. }
  1690. for (; existingEntityIndex < existingEntitiesCount && result.entities[existingEntityIndex].offset() <= lnkStart; ++existingEntityIndex) {
  1691. auto &entity = result.entities[existingEntityIndex];
  1692. accumulate_max(existingEntityEnd, entity.offset() + entity.length());
  1693. newEntities.push_back(entity);
  1694. }
  1695. if (lnkStart >= existingEntityEnd) {
  1696. result.entities.push_back({ lnkType, lnkStart, lnkLength });
  1697. }
  1698. offset = matchOffset = lnkStart + lnkLength;
  1699. }
  1700. if (!newEntities.isEmpty()) {
  1701. for (; existingEntityIndex < existingEntitiesCount; ++existingEntityIndex) {
  1702. auto &entity = result.entities[existingEntityIndex];
  1703. newEntities.push_back(entity);
  1704. }
  1705. result.entities = newEntities;
  1706. }
  1707. }
  1708. void MoveStringPart(TextWithEntities &result, int to, int from, int count) {
  1709. if (!count) return;
  1710. if (to != from) {
  1711. auto start = result.text.data();
  1712. memmove(start + to, start + from, count * sizeof(QChar));
  1713. for (auto &entity : result.entities) {
  1714. if (entity.offset() >= from + count) break;
  1715. if (entity.offset() + entity.length() <= from) continue;
  1716. if (entity.offset() >= from) {
  1717. entity.extendToLeft(from - to);
  1718. }
  1719. if (entity.offset() + entity.length() <= from + count) {
  1720. entity.shrinkFromRight(from - to);
  1721. }
  1722. }
  1723. }
  1724. }
  1725. void MovePartAndGoForward(TextWithEntities &result, int &to, int &from, int count) {
  1726. if (!count) return;
  1727. MoveStringPart(result, to, from, count);
  1728. to += count;
  1729. from += count;
  1730. }
  1731. void PrepareForSending(TextWithEntities &result, int32 flags) {
  1732. ApplyServerCleaning(result);
  1733. if (flags) {
  1734. ParseEntities(result, flags);
  1735. }
  1736. Trim(result);
  1737. }
  1738. // Replace bad symbols with space and remove '\r'.
  1739. void ApplyServerCleaning(TextWithEntities &result) {
  1740. auto len = result.text.size();
  1741. // Replace tabs with two spaces.
  1742. if (auto tabs = std::count(result.text.cbegin(), result.text.cend(), '\t')) {
  1743. auto replacement = QString::fromLatin1(" ");
  1744. auto replacementLength = replacement.size();
  1745. auto shift = (replacementLength - 1);
  1746. result.text.resize(len + shift * tabs);
  1747. for (auto i = len, movedTill = len, to = result.text.size(); i > 0; --i) {
  1748. if (result.text[i - 1] == '\t') {
  1749. auto toMove = movedTill - i;
  1750. to -= toMove;
  1751. MoveStringPart(result, to, i, toMove);
  1752. to -= replacementLength;
  1753. memcpy(result.text.data() + to, replacement.constData(), replacementLength * sizeof(QChar));
  1754. movedTill = i - 1;
  1755. }
  1756. }
  1757. len = result.text.size();
  1758. }
  1759. auto to = 0;
  1760. auto from = 0;
  1761. auto start = result.text.data();
  1762. for (auto ch = start, end = start + len; ch < end; ++ch) {
  1763. if (ch->unicode() == '\r') {
  1764. MovePartAndGoForward(result, to, from, (ch - start) - from);
  1765. ++from;
  1766. } else if (IsReplacedBySpace(*ch)) {
  1767. *ch = ' ';
  1768. }
  1769. }
  1770. MovePartAndGoForward(result, to, from, len - from);
  1771. if (to < len) result.text.resize(to);
  1772. }
  1773. void Trim(TextWithEntities &result) {
  1774. auto foundNotTrimmedChar = false;
  1775. // right trim
  1776. for (auto s = result.text.data(), e = s + result.text.size(), ch = e; ch != s;) {
  1777. --ch;
  1778. if (!IsTrimmed(*ch)) {
  1779. if (ch + 1 < e) {
  1780. auto l = ch + 1 - s;
  1781. for (auto &entity : result.entities) {
  1782. entity.updateTextEnd(l);
  1783. }
  1784. result.text.resize(l);
  1785. }
  1786. foundNotTrimmedChar = true;
  1787. break;
  1788. }
  1789. }
  1790. if (!foundNotTrimmedChar) {
  1791. result = TextWithEntities();
  1792. return;
  1793. }
  1794. const auto firstMonospaceOffset = EntityInText::FirstMonospaceOffset(
  1795. result.entities,
  1796. result.text.size());
  1797. // left trim
  1798. for (auto s = result.text.data(), ch = s, e = s + result.text.size(); ch != e; ++ch) {
  1799. if (!IsTrimmed(*ch) || (ch - s) == firstMonospaceOffset) {
  1800. if (ch > s) {
  1801. auto l = ch - s;
  1802. for (auto &entity : result.entities) {
  1803. entity.shiftLeft(l);
  1804. }
  1805. result.text = result.text.mid(l);
  1806. }
  1807. break;
  1808. }
  1809. }
  1810. }
  1811. int SerializeTagsSize(const TextWithTags::Tags &tags) {
  1812. auto result = int(sizeof(qint32)); // QByteArray size
  1813. if (tags.isEmpty()) {
  1814. return result;
  1815. }
  1816. result += sizeof(qint32);
  1817. for (const auto &tag : tags) {
  1818. result += 2 * sizeof(qint32) // offset, length
  1819. + sizeof(quint32) // id.size
  1820. + tag.id.size() * sizeof(ushort);
  1821. }
  1822. return result;
  1823. }
  1824. QByteArray SerializeTags(const TextWithTags::Tags &tags) {
  1825. if (tags.isEmpty()) {
  1826. return QByteArray();
  1827. }
  1828. QByteArray tagsSerialized;
  1829. {
  1830. QDataStream stream(&tagsSerialized, QIODevice::WriteOnly);
  1831. stream.setVersion(QDataStream::Qt_5_1);
  1832. stream << qint32(tags.size());
  1833. for (const auto &tag : tags) {
  1834. stream << qint32(tag.offset) << qint32(tag.length) << tag.id;
  1835. }
  1836. }
  1837. return tagsSerialized;
  1838. }
  1839. TextWithTags::Tags DeserializeTags(QByteArray data, int textLength) {
  1840. auto result = TextWithTags::Tags();
  1841. if (data.isEmpty()) {
  1842. return result;
  1843. }
  1844. QDataStream stream(data);
  1845. stream.setVersion(QDataStream::Qt_5_1);
  1846. qint32 tagCount = 0;
  1847. stream >> tagCount;
  1848. if (stream.status() != QDataStream::Ok) {
  1849. return result;
  1850. }
  1851. if (tagCount <= 0 || tagCount > textLength) {
  1852. return result;
  1853. }
  1854. for (auto i = 0; i != tagCount; ++i) {
  1855. qint32 offset = 0, length = 0;
  1856. QString id;
  1857. stream >> offset >> length >> id;
  1858. if (stream.status() != QDataStream::Ok) {
  1859. return result;
  1860. }
  1861. if (offset < 0 || length <= 0 || offset + length > textLength) {
  1862. return result;
  1863. }
  1864. result.push_back({ offset, length, id });
  1865. }
  1866. return result;
  1867. }
  1868. QString TagsMimeType() {
  1869. return QString::fromLatin1("application/x-td-field-tags");
  1870. }
  1871. QString TagsTextMimeType() {
  1872. return QString::fromLatin1("application/x-td-field-text");
  1873. }
  1874. bool IsMentionLink(QStringView link) {
  1875. return link.startsWith(kMentionTagStart);
  1876. }
  1877. QString MentionEntityData(QStringView link) {
  1878. const auto match = qthelp::regex_match(
  1879. "^(\\d+\\.\\d+:\\d+)(/|$)",
  1880. base::StringViewMid(link, kMentionTagStart.size()));
  1881. return match ? match->captured(1) : QString();
  1882. }
  1883. bool IsSeparateTag(QStringView tag) {
  1884. return (tag == Ui::InputField::kTagCode)
  1885. || (tag == Ui::InputField::kTagPre);
  1886. }
  1887. QString JoinTag(const QList<QStringView> &list) {
  1888. if (list.isEmpty()) {
  1889. return QString();
  1890. }
  1891. auto length = (list.size() - 1);
  1892. for (const auto &entry : list) {
  1893. length += entry.size();
  1894. }
  1895. auto result = QString();
  1896. result.reserve(length);
  1897. result.append(list.front());
  1898. for (auto i = 1, count = int(list.size()); i != count; ++i) {
  1899. if (!IsSeparateTag(list[i])) {
  1900. result.append(kTagSeparator).append(list[i]);
  1901. }
  1902. }
  1903. return result;
  1904. }
  1905. QList<QStringView> SplitTags(QStringView tag) {
  1906. return tag.split(kTagSeparator);
  1907. }
  1908. QString TagWithRemoved(const QString &tag, const QString &removed) {
  1909. if (tag == removed) {
  1910. return QString();
  1911. }
  1912. auto list = SplitTags(tag);
  1913. list.erase(ranges::remove(list, QStringView(removed)), list.end());
  1914. return JoinTag(list);
  1915. }
  1916. QString TagWithAdded(const QString &tag, const QString &added) {
  1917. if (tag.isEmpty() || tag == added) {
  1918. return added;
  1919. }
  1920. auto list = SplitTags(tag);
  1921. const auto ref = QStringView(added);
  1922. if (list.contains(ref)) {
  1923. return tag;
  1924. }
  1925. list.push_back(ref);
  1926. std::sort(list.begin(), list.end());
  1927. return JoinTag(list);
  1928. }
  1929. TextWithTags::Tags SimplifyTags(TextWithTags::Tags tags) {
  1930. for (auto i = tags.begin(); i != tags.end();) {
  1931. const auto j = i + 1;
  1932. if (j == tags.end()) {
  1933. break;
  1934. } else if (j->offset > i->offset + i->length) {
  1935. ++i;
  1936. continue;
  1937. }
  1938. auto il = SplitTags(i->id);
  1939. std::sort(il.begin(), il.end());
  1940. auto jl = SplitTags(j->id);
  1941. std::sort(jl.begin(), jl.end());
  1942. if (JoinTag(il) == JoinTag(jl)) {
  1943. i->length = j->offset + j->length - i->offset;
  1944. i = tags.erase(j) - 1;
  1945. } else {
  1946. ++i;
  1947. }
  1948. }
  1949. return tags;
  1950. }
  1951. EntitiesInText ConvertTextTagsToEntities(const TextWithTags::Tags &tags) {
  1952. auto result = EntitiesInText();
  1953. if (tags.isEmpty()) {
  1954. return result;
  1955. }
  1956. constexpr auto kInMaskTypesInline = std::array{
  1957. EntityType::Bold,
  1958. EntityType::Italic,
  1959. EntityType::Underline,
  1960. EntityType::StrikeOut,
  1961. EntityType::Spoiler,
  1962. EntityType::Code,
  1963. };
  1964. constexpr auto kInMaskTypesBlock = std::array{
  1965. EntityType::Pre,
  1966. EntityType::Blockquote,
  1967. };
  1968. struct State {
  1969. QString link;
  1970. QString language;
  1971. uint32 mask : 31 = 0;
  1972. uint32 collapsed : 1 = 0;
  1973. void set(EntityType type) {
  1974. mask |= (1 << int(type));
  1975. }
  1976. void remove(EntityType type) {
  1977. mask &= ~(1 << int(type));
  1978. }
  1979. [[nodiscard]] bool has(EntityType type) const {
  1980. return (mask & (1 << int(type)));
  1981. }
  1982. };
  1983. auto offset = 0;
  1984. auto state = State();
  1985. auto notClosedEntities = std::vector<int>(); // Stack of indices.
  1986. const auto closeOne = [&] {
  1987. Expects(!notClosedEntities.empty());
  1988. auto &entity = result[notClosedEntities.back()];
  1989. const auto type = entity.type();
  1990. entity = {
  1991. type,
  1992. entity.offset(),
  1993. offset - entity.offset(),
  1994. entity.data(),
  1995. };
  1996. if (ranges::contains(kInMaskTypesInline, type)
  1997. || ranges::contains(kInMaskTypesBlock, type)) {
  1998. state.remove(entity.type());
  1999. } else {
  2000. state.link = QString();
  2001. }
  2002. notClosedEntities.pop_back();
  2003. };
  2004. const auto closeType = [&](EntityType type) {
  2005. auto closeCount = 0;
  2006. const auto notClosedCount = notClosedEntities.size();
  2007. while (closeCount < notClosedCount) {
  2008. const auto index = notClosedCount - closeCount - 1;
  2009. if (result[notClosedEntities[index]].type() == type) {
  2010. for (auto i = 0; i != closeCount + 1; ++i) {
  2011. closeOne();
  2012. }
  2013. break;
  2014. }
  2015. ++closeCount;
  2016. }
  2017. };
  2018. const auto openType = [&](EntityType type, const QString &data = {}) {
  2019. notClosedEntities.push_back(result.size());
  2020. result.push_back({ type, offset, -1, data });
  2021. };
  2022. const auto processState = [&](State nextState) {
  2023. const auto linkChanged = (nextState.link != state.link);
  2024. const auto closeLink = linkChanged && !state.link.isEmpty();
  2025. const auto closeCustomEmoji = closeLink
  2026. && Ui::InputField::IsCustomEmojiLink(state.link);
  2027. if (closeCustomEmoji) {
  2028. closeType(EntityType::CustomEmoji);
  2029. }
  2030. for (const auto type : kInMaskTypesInline) {
  2031. if (state.has(type) && !nextState.has(type)) {
  2032. closeType(type);
  2033. }
  2034. }
  2035. if (closeLink && !closeCustomEmoji) {
  2036. if (IsMentionLink(state.link)) {
  2037. closeType(EntityType::MentionName);
  2038. } else {
  2039. closeType(EntityType::CustomUrl);
  2040. }
  2041. }
  2042. for (const auto type : kInMaskTypesBlock) {
  2043. if (state.has(type) && !nextState.has(type)) {
  2044. closeType(type);
  2045. }
  2046. }
  2047. const auto openLink = linkChanged && !nextState.link.isEmpty();
  2048. const auto openCustomEmoji = openLink
  2049. && Ui::InputField::IsCustomEmojiLink(nextState.link);
  2050. for (const auto type : kInMaskTypesBlock | ranges::views::reverse) {
  2051. if (nextState.has(type) && !state.has(type)) {
  2052. openType(type, (type == EntityType::Pre)
  2053. ? nextState.language
  2054. : (type == EntityType::Blockquote && nextState.collapsed)
  2055. ? u"1"_q
  2056. : QString());
  2057. }
  2058. }
  2059. if (openLink && !openCustomEmoji) {
  2060. if (IsMentionLink(nextState.link)) {
  2061. const auto data = MentionEntityData(nextState.link);
  2062. if (!data.isEmpty()) {
  2063. openType(EntityType::MentionName, data);
  2064. }
  2065. } else {
  2066. openType(EntityType::CustomUrl, nextState.link);
  2067. }
  2068. }
  2069. for (const auto type : kInMaskTypesInline | ranges::views::reverse) {
  2070. if (nextState.has(type) && !state.has(type)) {
  2071. openType(type);
  2072. }
  2073. }
  2074. if (openCustomEmoji) {
  2075. const auto data = Ui::InputField::CustomEmojiEntityData(
  2076. nextState.link);
  2077. if (!data.isEmpty()) {
  2078. openType(EntityType::CustomEmoji, data);
  2079. }
  2080. }
  2081. state = nextState;
  2082. };
  2083. const auto stateForTag = [&](const QString &tag) {
  2084. using Tags = Ui::InputField;
  2085. auto result = State();
  2086. const auto list = SplitTags(tag);
  2087. const auto languageStart = Tags::kTagPre.size();
  2088. for (const auto &single : list) {
  2089. if (single == Tags::kTagBold) {
  2090. result.set(EntityType::Bold);
  2091. } else if (single == Tags::kTagItalic) {
  2092. result.set(EntityType::Italic);
  2093. } else if (single == Tags::kTagUnderline) {
  2094. result.set(EntityType::Underline);
  2095. } else if (single == Tags::kTagStrikeOut) {
  2096. result.set(EntityType::StrikeOut);
  2097. } else if (single == Tags::kTagCode) {
  2098. result.set(EntityType::Code);
  2099. } else if (single == Tags::kTagPre) {
  2100. result.set(EntityType::Pre);
  2101. } else if (single.size() > languageStart
  2102. && single.startsWith(Tags::kTagPre)) {
  2103. result.set(EntityType::Pre);
  2104. result.language = single.mid(languageStart).toString();
  2105. } else if (single == Tags::kTagBlockquote) {
  2106. result.set(EntityType::Blockquote);
  2107. result.collapsed = 0;
  2108. } else if (single == Tags::kTagBlockquoteCollapsed) {
  2109. result.set(EntityType::Blockquote);
  2110. result.collapsed = 1;
  2111. } else if (single == Tags::kTagSpoiler) {
  2112. result.set(EntityType::Spoiler);
  2113. } else {
  2114. result.link = single.toString();
  2115. }
  2116. }
  2117. return result;
  2118. };
  2119. for (const auto &tag : tags) {
  2120. if (tag.offset > offset) {
  2121. processState(State());
  2122. }
  2123. offset = tag.offset;
  2124. processState(stateForTag(tag.id));
  2125. offset += tag.length;
  2126. }
  2127. processState(State());
  2128. result.erase(ranges::remove_if(result, [](const EntityInText &entity) {
  2129. return (entity.length() <= 0);
  2130. }), result.end());
  2131. return result;
  2132. }
  2133. TextWithTags::Tags ConvertEntitiesToTextTags(
  2134. const EntitiesInText &entities) {
  2135. auto result = TextWithTags::Tags();
  2136. if (entities.isEmpty()) {
  2137. return result;
  2138. }
  2139. auto offset = 0;
  2140. auto current = QString();
  2141. const auto updateCurrent = [&](int nextOffset, const QString &next) {
  2142. if (next == current) {
  2143. return;
  2144. } else if (nextOffset > offset) {
  2145. if (!current.isEmpty()) {
  2146. result.push_back({ offset, nextOffset - offset, current });
  2147. }
  2148. offset = nextOffset;
  2149. }
  2150. current = next;
  2151. };
  2152. auto toRemove = std::vector<std::pair<int, QString>>();
  2153. const auto removeTill = [&](int nextOffset) {
  2154. while (!toRemove.empty() && toRemove.front().first <= nextOffset) {
  2155. updateCurrent(
  2156. toRemove.front().first,
  2157. TagWithRemoved(current, toRemove.front().second));
  2158. toRemove.erase(toRemove.begin());
  2159. }
  2160. };
  2161. for (const auto &entity : entities) {
  2162. const auto push = [&](const QString &tag) {
  2163. removeTill(entity.offset());
  2164. updateCurrent(entity.offset(), TagWithAdded(current, tag));
  2165. toRemove.push_back({ offset + entity.length(), tag });
  2166. ranges::sort(toRemove);
  2167. };
  2168. switch (entity.type()) {
  2169. case EntityType::MentionName: {
  2170. static const auto RegExp = QRegularExpression(
  2171. "^(\\d+\\.\\d+:\\d+)$"
  2172. );
  2173. const auto match = RegExp.match(entity.data());
  2174. if (match.hasMatch()) {
  2175. push(kMentionTagStart + entity.data());
  2176. }
  2177. } break;
  2178. case EntityType::CustomUrl: {
  2179. const auto url = entity.data();
  2180. if (Ui::InputField::IsValidMarkdownLink(url)
  2181. && !IsMentionLink(url)) {
  2182. push(url);
  2183. }
  2184. } break;
  2185. case EntityType::CustomEmoji: {
  2186. static const auto RegExp = QRegularExpression("^(\\d+)$");
  2187. const auto match = RegExp.match(entity.data());
  2188. if (match.hasMatch()) {
  2189. push(Ui::InputField::CustomEmojiLink(entity.data()));
  2190. }
  2191. } break;
  2192. case EntityType::Bold: push(Ui::InputField::kTagBold); break;
  2193. //case EntityType::Semibold: // Semibold is for UI parts only.
  2194. // push(Ui::InputField::kTagSemibold);
  2195. // break;
  2196. case EntityType::Italic: push(Ui::InputField::kTagItalic); break;
  2197. case EntityType::Underline:
  2198. push(Ui::InputField::kTagUnderline);
  2199. break;
  2200. case EntityType::StrikeOut:
  2201. push(Ui::InputField::kTagStrikeOut);
  2202. break;
  2203. case EntityType::Code: push(Ui::InputField::kTagCode); break;
  2204. case EntityType::Pre: {
  2205. if (!entity.data().isEmpty()) {
  2206. static const auto Language = QRegularExpression("^[a-zA-Z0-9\\-\\+]+$");
  2207. if (Language.match(entity.data()).hasMatch()) {
  2208. push(Ui::InputField::kTagPre + entity.data());
  2209. break;
  2210. }
  2211. }
  2212. push(Ui::InputField::kTagPre);
  2213. } break;
  2214. case EntityType::Blockquote:
  2215. push(entity.data().isEmpty()
  2216. ? Ui::InputField::kTagBlockquote
  2217. : Ui::InputField::kTagBlockquoteCollapsed);
  2218. break;
  2219. case EntityType::Spoiler: push(Ui::InputField::kTagSpoiler); break;
  2220. }
  2221. }
  2222. if (!toRemove.empty()) {
  2223. removeTill(toRemove.back().first);
  2224. }
  2225. return result;
  2226. }
  2227. std::unique_ptr<QMimeData> MimeDataFromText(const TextForMimeData &text) {
  2228. return MimeDataFromText(
  2229. { text.rich.text, ConvertEntitiesToTextTags(text.rich.entities) },
  2230. text.expanded);
  2231. }
  2232. std::unique_ptr<QMimeData> MimeDataFromText(TextWithTags &&text) {
  2233. const auto expanded = ExpandCustomLinks(text);
  2234. return MimeDataFromText(std::move(text), expanded);
  2235. }
  2236. void SetClipboardText(
  2237. const TextForMimeData &text,
  2238. QClipboard::Mode mode) {
  2239. if (auto data = MimeDataFromText(text)) {
  2240. QGuiApplication::clipboard()->setMimeData(data.release(), mode);
  2241. }
  2242. }
  2243. } // namespace TextUtilities
  2244. TextForMimeData TextForMimeData::WithExpandedLinks(
  2245. const TextWithEntities &text) {
  2246. auto result = TextForMimeData{ .rich = text };
  2247. if (!ranges::contains(
  2248. text.entities,
  2249. EntityType::CustomUrl,
  2250. &EntityInText::type)) {
  2251. result.expanded = text.text;
  2252. } else {
  2253. auto from = 0;
  2254. for (const auto &entity : text.entities) {
  2255. if (entity.type() != EntityType::CustomUrl) {
  2256. continue;
  2257. }
  2258. // This logic is duplicated in Ui::Text::String::toText.
  2259. const auto &data = entity.data();
  2260. if (!data.startsWith(qstr("internal:"))
  2261. && (data != UrlClickHandler::EncodeForOpening(
  2262. text.text.mid(entity.offset(), entity.length())))) {
  2263. const auto till = entity.offset() + entity.length();
  2264. if (const auto add = till - from; add > 0) {
  2265. result.expanded.append(text.text.data() + from, add);
  2266. from = till;
  2267. }
  2268. result.expanded.append(qstr(" (")).append(data).append(')');
  2269. }
  2270. }
  2271. const auto till = text.text.size();
  2272. if (const auto add = till - from; add > 0) {
  2273. result.expanded.append(text.text.data() + from, add);
  2274. from = till;
  2275. }
  2276. }
  2277. return result;
  2278. }
  2279. EntityInText::EntityInText(
  2280. EntityType type,
  2281. int offset,
  2282. int length,
  2283. const QString &data)
  2284. : _type(type)
  2285. , _offset(offset)
  2286. , _length(length)
  2287. , _data(data) {
  2288. }
  2289. int EntityInText::FirstMonospaceOffset(
  2290. const EntitiesInText &entities,
  2291. int textLength) {
  2292. auto &&monospace = ranges::make_subrange(
  2293. entities.begin(),
  2294. entities.end()
  2295. ) | ranges::views::filter([](const EntityInText & entity) {
  2296. return (entity.type() == EntityType::Pre)
  2297. || (entity.type() == EntityType::Code);
  2298. });
  2299. const auto i = ranges::max_element(
  2300. monospace,
  2301. std::greater<>(),
  2302. &EntityInText::offset);
  2303. return (i == monospace.end()) ? textLength : i->offset();
  2304. }