text_bidi_algorithm.h 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013
  1. // This file is part of Desktop App Toolkit,
  2. // a set of libraries for developing nice desktop applications.
  3. //
  4. // For license and copyright information please follow this link:
  5. // https://github.com/desktop-app/legal/blob/master/LEGAL
  6. //
  7. #pragma once
  8. #include "ui/text/text.h"
  9. #include <private/qunicodetables_p.h>
  10. #include <private/qtextengine_p.h>
  11. #define BIDI_DEBUG if (1) ; else qDebug
  12. namespace Ui::Text {
  13. enum { BidiDebugEnabled = false };
  14. class BidiAlgorithm {
  15. public:
  16. template<typename T> using Vector = QVarLengthArray<T, 64>;
  17. BidiAlgorithm(const QChar *text, QScriptAnalysis *analysis, int length, bool baseDirectionIsRtl,
  18. Blocks::const_iterator startInBlocks,
  19. Blocks::const_iterator endInBlocks,
  20. int offsetInBlocks)
  21. : text(text),
  22. analysis(analysis),
  23. length(length),
  24. baseLevel(baseDirectionIsRtl ? 1 : 0),
  25. _startInBlocks(startInBlocks),
  26. _endInBlocks(endInBlocks),
  27. _currentBlock(_startInBlocks),
  28. _offsetInBlocks(offsetInBlocks)
  29. {
  30. }
  31. struct IsolatePair {
  32. int start;
  33. int end;
  34. };
  35. void initScriptAnalysisAndIsolatePairs(Vector<IsolatePair> &isolatePairs)
  36. {
  37. int isolateStack[128];
  38. int isolateLevel = 0;
  39. // load directions of string, and determine isolate pairs
  40. for (int i = 0; i < length; ++i) {
  41. int pos = i;
  42. const auto info = infoAt(i);
  43. if (info.surrogate) {
  44. ++i;
  45. analysis[i].bidiDirection = QChar::DirNSM;
  46. }
  47. const auto p = info.properties;
  48. analysis[pos].bidiDirection = QChar::Direction(p->direction);
  49. switch (QChar::Direction(p->direction)) {
  50. case QChar::DirON:
  51. // all mirrored chars are DirON
  52. if (p->mirrorDiff)
  53. analysis[pos].bidiFlags = QScriptAnalysis::BidiMirrored;
  54. break;
  55. case QChar::DirLRE:
  56. case QChar::DirRLE:
  57. case QChar::DirLRO:
  58. case QChar::DirRLO:
  59. case QChar::DirPDF:
  60. case QChar::DirBN:
  61. analysis[pos].bidiFlags = QScriptAnalysis::BidiMaybeResetToParagraphLevel|QScriptAnalysis::BidiBN;
  62. break;
  63. case QChar::DirLRI:
  64. case QChar::DirRLI:
  65. case QChar::DirFSI:
  66. if (isolateLevel < 128) {
  67. isolateStack[isolateLevel] = isolatePairs.size();
  68. isolatePairs.append({ pos, length });
  69. }
  70. ++isolateLevel;
  71. analysis[pos].bidiFlags = QScriptAnalysis::BidiMaybeResetToParagraphLevel;
  72. break;
  73. case QChar::DirPDI:
  74. if (isolateLevel > 0) {
  75. --isolateLevel;
  76. if (isolateLevel < 128)
  77. isolatePairs[isolateStack[isolateLevel]].end = pos;
  78. }
  79. Q_FALLTHROUGH();
  80. case QChar::DirWS:
  81. analysis[pos].bidiFlags = QScriptAnalysis::BidiMaybeResetToParagraphLevel;
  82. break;
  83. case QChar::DirS:
  84. case QChar::DirB:
  85. analysis[pos].bidiFlags = QScriptAnalysis::BidiResetToParagraphLevel;
  86. if (text[pos] == QChar::ParagraphSeparator) {
  87. // close all open isolates as we start a new paragraph
  88. while (isolateLevel > 0) {
  89. --isolateLevel;
  90. if (isolateLevel < 128)
  91. isolatePairs[isolateStack[isolateLevel]].end = pos;
  92. }
  93. }
  94. break;
  95. default:
  96. break;
  97. }
  98. }
  99. }
  100. struct DirectionalRun {
  101. int start;
  102. int end;
  103. int continuation;
  104. ushort level;
  105. bool isContinuation;
  106. bool hasContent;
  107. };
  108. void generateDirectionalRuns(const Vector<IsolatePair> &isolatePairs, Vector<DirectionalRun> &runs)
  109. {
  110. struct DirectionalStack {
  111. enum { MaxDepth = 125 };
  112. struct Item {
  113. ushort level;
  114. bool isOverride;
  115. bool isIsolate;
  116. int runBeforeIsolate;
  117. };
  118. Item items[128];
  119. int counter = 0;
  120. void push(Item i) {
  121. items[counter] = i;
  122. ++counter;
  123. }
  124. void pop() {
  125. --counter;
  126. }
  127. int depth() const {
  128. return counter;
  129. }
  130. const Item &top() const {
  131. return items[counter - 1];
  132. }
  133. } stack;
  134. int overflowIsolateCount = 0;
  135. int overflowEmbeddingCount = 0;
  136. int validIsolateCount = 0;
  137. ushort level = baseLevel;
  138. bool override = false;
  139. stack.push({ level, false, false, -1 });
  140. BIDI_DEBUG() << "resolving explicit levels";
  141. int runStart = 0;
  142. int continuationFrom = -1;
  143. int lastRunWithContent = -1;
  144. bool runHasContent = false;
  145. auto appendRun = [&](int runEnd) {
  146. if (runEnd < runStart)
  147. return;
  148. bool isContinuation = false;
  149. if (continuationFrom != -1) {
  150. runs[continuationFrom].continuation = runs.size();
  151. isContinuation = true;
  152. } else if (lastRunWithContent != -1 && level == runs.at(lastRunWithContent).level) {
  153. runs[lastRunWithContent].continuation = runs.size();
  154. isContinuation = true;
  155. }
  156. if (runHasContent)
  157. lastRunWithContent = runs.size();
  158. BIDI_DEBUG() << " appending run start/end" << runStart << runEnd << "level" << level;
  159. runs.append({ runStart, runEnd, -1, level, isContinuation, runHasContent });
  160. runHasContent = false;
  161. runStart = runEnd + 1;
  162. continuationFrom = -1;
  163. };
  164. int isolatePairPosition = 0;
  165. for (int i = 0; i < length; ++i) {
  166. QChar::Direction dir = analysis[i].bidiDirection;
  167. auto doEmbed = [&](bool isRtl, bool isOverride, bool isIsolate) {
  168. if (isIsolate) {
  169. if (override)
  170. analysis[i].bidiDirection = (level & 1) ? QChar::DirR : QChar::DirL;
  171. runHasContent = true;
  172. lastRunWithContent = -1;
  173. ++isolatePairPosition;
  174. }
  175. int runBeforeIsolate = runs.size();
  176. ushort newLevel = isRtl ? ((stack.top().level + 1) | 1) : ((stack.top().level + 2) & ~1);
  177. if (newLevel <= DirectionalStack::MaxDepth && !overflowEmbeddingCount && !overflowIsolateCount) {
  178. if (isIsolate)
  179. ++validIsolateCount;
  180. else
  181. runBeforeIsolate = -1;
  182. appendRun(isIsolate ? i : i - 1);
  183. BIDI_DEBUG() << "pushing new item on stack: level" << (int)newLevel << "isOverride" << isOverride << "isIsolate" << isIsolate << runBeforeIsolate;
  184. stack.push({ newLevel, isOverride, isIsolate, runBeforeIsolate });
  185. override = isOverride;
  186. level = newLevel;
  187. } else {
  188. if (isIsolate)
  189. ++overflowIsolateCount;
  190. else if (!overflowIsolateCount)
  191. ++overflowEmbeddingCount;
  192. }
  193. if (!isIsolate) {
  194. if (override)
  195. analysis[i].bidiDirection = (level & 1) ? QChar::DirR : QChar::DirL;
  196. else
  197. analysis[i].bidiDirection = QChar::DirBN;
  198. }
  199. };
  200. switch (dir) {
  201. case QChar::DirLRE:
  202. doEmbed(false, false, false);
  203. break;
  204. case QChar::DirRLE:
  205. doEmbed(true, false, false);
  206. break;
  207. case QChar::DirLRO:
  208. doEmbed(false, true, false);
  209. break;
  210. case QChar::DirRLO:
  211. doEmbed(true, true, false);
  212. break;
  213. case QChar::DirLRI:
  214. doEmbed(false, false, true);
  215. break;
  216. case QChar::DirRLI:
  217. doEmbed(true, false, true);
  218. break;
  219. case QChar::DirFSI: {
  220. bool isRtl = false;
  221. if (isolatePairPosition < isolatePairs.size()) {
  222. const auto &pair = isolatePairs.at(isolatePairPosition);
  223. Q_ASSERT(pair.start == i);
  224. isRtl = QStringView(text + pair.start + 1, pair.end - pair.start - 1).isRightToLeft();
  225. }
  226. doEmbed(isRtl, false, true);
  227. break;
  228. }
  229. case QChar::DirPDF:
  230. if (override)
  231. analysis[i].bidiDirection = (level & 1) ? QChar::DirR : QChar::DirL;
  232. else
  233. analysis[i].bidiDirection = QChar::DirBN;
  234. if (overflowIsolateCount) {
  235. ; // do nothing
  236. } else if (overflowEmbeddingCount) {
  237. --overflowEmbeddingCount;
  238. } else if (!stack.top().isIsolate && stack.depth() >= 2) {
  239. appendRun(i);
  240. stack.pop();
  241. override = stack.top().isOverride;
  242. level = stack.top().level;
  243. BIDI_DEBUG() << "popped PDF from stack, level now" << (int)stack.top().level;
  244. }
  245. break;
  246. case QChar::DirPDI:
  247. runHasContent = true;
  248. if (overflowIsolateCount) {
  249. --overflowIsolateCount;
  250. } else if (validIsolateCount == 0) {
  251. ; // do nothing
  252. } else {
  253. appendRun(i - 1);
  254. overflowEmbeddingCount = 0;
  255. while (!stack.top().isIsolate)
  256. stack.pop();
  257. continuationFrom = stack.top().runBeforeIsolate;
  258. BIDI_DEBUG() << "popped PDI from stack, level now" << (int)stack.top().level << "continuation from" << continuationFrom;
  259. stack.pop();
  260. override = stack.top().isOverride;
  261. level = stack.top().level;
  262. lastRunWithContent = -1;
  263. --validIsolateCount;
  264. }
  265. if (override)
  266. analysis[i].bidiDirection = (level & 1) ? QChar::DirR : QChar::DirL;
  267. break;
  268. case QChar::DirB:
  269. // paragraph separator, go down to base direction, reset all state
  270. if (text[i].unicode() == QChar::ParagraphSeparator) {
  271. appendRun(i - 1);
  272. while (stack.counter > 1) {
  273. // there might be remaining isolates on the stack that are missing a PDI. Those need to get
  274. // a continuation indicating to take the eos from the end of the string (ie. the paragraph level)
  275. const auto &t = stack.top();
  276. if (t.isIsolate) {
  277. runs[t.runBeforeIsolate].continuation = -2;
  278. }
  279. --stack.counter;
  280. }
  281. continuationFrom = -1;
  282. lastRunWithContent = -1;
  283. validIsolateCount = 0;
  284. overflowIsolateCount = 0;
  285. overflowEmbeddingCount = 0;
  286. level = baseLevel;
  287. }
  288. break;
  289. default:
  290. runHasContent = true;
  291. Q_FALLTHROUGH();
  292. case QChar::DirBN:
  293. if (override)
  294. analysis[i].bidiDirection = (level & 1) ? QChar::DirR : QChar::DirL;
  295. break;
  296. }
  297. }
  298. appendRun(length - 1);
  299. while (stack.counter > 1) {
  300. // there might be remaining isolates on the stack that are missing a PDI. Those need to get
  301. // a continuation indicating to take the eos from the end of the string (ie. the paragraph level)
  302. const auto &t = stack.top();
  303. if (t.isIsolate) {
  304. runs[t.runBeforeIsolate].continuation = -2;
  305. }
  306. --stack.counter;
  307. }
  308. }
  309. void resolveExplicitLevels(Vector<DirectionalRun> &runs)
  310. {
  311. Vector<IsolatePair> isolatePairs;
  312. initScriptAnalysisAndIsolatePairs(isolatePairs);
  313. generateDirectionalRuns(isolatePairs, runs);
  314. }
  315. struct IsolatedRunSequenceIterator {
  316. struct Position {
  317. int current = -1;
  318. int pos = -1;
  319. Position() = default;
  320. Position(int current, int pos) : current(current), pos(pos) {}
  321. bool isValid() const { return pos != -1; }
  322. void clear() { pos = -1; }
  323. };
  324. IsolatedRunSequenceIterator(const Vector<DirectionalRun> &runs, int i)
  325. : runs(runs),
  326. current(i)
  327. {
  328. pos = runs.at(current).start;
  329. }
  330. int operator *() const { return pos; }
  331. bool atEnd() const { return pos < 0; }
  332. void operator++() {
  333. ++pos;
  334. if (pos > runs.at(current).end) {
  335. current = runs.at(current).continuation;
  336. if (current > -1)
  337. pos = runs.at(current).start;
  338. else
  339. pos = -1;
  340. }
  341. }
  342. void setPosition(Position p) {
  343. current = p.current;
  344. pos = p.pos;
  345. }
  346. Position position() const {
  347. return Position(current, pos);
  348. }
  349. bool operator !=(int position) const {
  350. return pos != position;
  351. }
  352. const Vector<DirectionalRun> &runs;
  353. int current;
  354. int pos;
  355. };
  356. void resolveW1W2W3(const Vector<DirectionalRun> &runs, int i, QChar::Direction sos)
  357. {
  358. QChar::Direction last = sos;
  359. QChar::Direction lastStrong = sos;
  360. IsolatedRunSequenceIterator it(runs, i);
  361. while (!it.atEnd()) {
  362. int pos = *it;
  363. // Rule W1: Resolve NSM
  364. QChar::Direction current = analysis[pos].bidiDirection;
  365. if (current == QChar::DirNSM) {
  366. current = last;
  367. analysis[pos].bidiDirection = current;
  368. } else if (current >= QChar::DirLRI) {
  369. last = QChar::DirON;
  370. } else if (current == QChar::DirBN) {
  371. current = last;
  372. } else {
  373. // there shouldn't be any explicit embedding marks here
  374. Q_ASSERT(current != QChar::DirLRE);
  375. Q_ASSERT(current != QChar::DirRLE);
  376. Q_ASSERT(current != QChar::DirLRO);
  377. Q_ASSERT(current != QChar::DirRLO);
  378. Q_ASSERT(current != QChar::DirPDF);
  379. last = current;
  380. }
  381. // Rule W2
  382. if (current == QChar::DirEN && lastStrong == QChar::DirAL) {
  383. current = QChar::DirAN;
  384. analysis[pos].bidiDirection = current;
  385. }
  386. // remember last strong char for rule W2
  387. if (current == QChar::DirL || current == QChar::DirR) {
  388. lastStrong = current;
  389. } else if (current == QChar::DirAL) {
  390. // Rule W3
  391. lastStrong = current;
  392. analysis[pos].bidiDirection = QChar::DirR;
  393. }
  394. last = current;
  395. ++it;
  396. }
  397. }
  398. void resolveW4(const Vector<DirectionalRun> &runs, int i, QChar::Direction sos)
  399. {
  400. // Rule W4
  401. QChar::Direction secondLast = sos;
  402. IsolatedRunSequenceIterator it(runs, i);
  403. int lastPos = *it;
  404. QChar::Direction last = analysis[lastPos].bidiDirection;
  405. // BIDI_DEBUG() << "Applying rule W4/W5";
  406. ++it;
  407. while (!it.atEnd()) {
  408. int pos = *it;
  409. QChar::Direction current = analysis[pos].bidiDirection;
  410. if (current == QChar::DirBN) {
  411. ++it;
  412. continue;
  413. }
  414. // BIDI_DEBUG() << pos << secondLast << last << current;
  415. if (last == QChar::DirES && current == QChar::DirEN && secondLast == QChar::DirEN) {
  416. last = QChar::DirEN;
  417. analysis[lastPos].bidiDirection = last;
  418. } else if (last == QChar::DirCS) {
  419. if (current == QChar::DirEN && secondLast == QChar::DirEN) {
  420. last = QChar::DirEN;
  421. analysis[lastPos].bidiDirection = last;
  422. } else if (current == QChar::DirAN && secondLast == QChar::DirAN) {
  423. last = QChar::DirAN;
  424. analysis[lastPos].bidiDirection = last;
  425. }
  426. }
  427. secondLast = last;
  428. last = current;
  429. lastPos = pos;
  430. ++it;
  431. }
  432. }
  433. void resolveW5(const Vector<DirectionalRun> &runs, int i)
  434. {
  435. // Rule W5
  436. IsolatedRunSequenceIterator::Position lastETPosition;
  437. IsolatedRunSequenceIterator it(runs, i);
  438. int lastPos = *it;
  439. QChar::Direction last = analysis[lastPos].bidiDirection;
  440. if (last == QChar::DirET || last == QChar::DirBN)
  441. lastETPosition = it.position();
  442. ++it;
  443. while (!it.atEnd()) {
  444. int pos = *it;
  445. QChar::Direction current = analysis[pos].bidiDirection;
  446. if (current == QChar::DirBN) {
  447. ++it;
  448. continue;
  449. }
  450. if (current == QChar::DirET) {
  451. if (last == QChar::DirEN) {
  452. current = QChar::DirEN;
  453. analysis[pos].bidiDirection = current;
  454. } else if (!lastETPosition.isValid()) {
  455. lastETPosition = it.position();
  456. }
  457. } else if (lastETPosition.isValid()) {
  458. if (current == QChar::DirEN) {
  459. it.setPosition(lastETPosition);
  460. while (it != pos) {
  461. int pos = *it;
  462. analysis[pos].bidiDirection = QChar::DirEN;
  463. ++it;
  464. }
  465. }
  466. lastETPosition.clear();
  467. }
  468. last = current;
  469. lastPos = pos;
  470. ++it;
  471. }
  472. }
  473. void resolveW6W7(const Vector<DirectionalRun> &runs, int i, QChar::Direction sos)
  474. {
  475. QChar::Direction lastStrong = sos;
  476. IsolatedRunSequenceIterator it(runs, i);
  477. while (!it.atEnd()) {
  478. int pos = *it;
  479. // Rule W6
  480. QChar::Direction current = analysis[pos].bidiDirection;
  481. if (current == QChar::DirBN) {
  482. ++it;
  483. continue;
  484. }
  485. if (current == QChar::DirET || current == QChar::DirES || current == QChar::DirCS) {
  486. analysis[pos].bidiDirection = QChar::DirON;
  487. }
  488. // Rule W7
  489. else if (current == QChar::DirL || current == QChar::DirR) {
  490. lastStrong = current;
  491. } else if (current == QChar::DirEN && lastStrong == QChar::DirL) {
  492. analysis[pos].bidiDirection = lastStrong;
  493. }
  494. ++it;
  495. }
  496. }
  497. struct BracketPair {
  498. int first;
  499. int second;
  500. bool isValid() const { return second > 0; }
  501. QChar::Direction containedDirection(const QScriptAnalysis *analysis, QChar::Direction embeddingDir) const {
  502. int isolateCounter = 0;
  503. QChar::Direction containedDir = QChar::DirON;
  504. for (int i = first + 1; i < second; ++i) {
  505. QChar::Direction dir = analysis[i].bidiDirection;
  506. if (isolateCounter) {
  507. if (dir == QChar::DirPDI)
  508. --isolateCounter;
  509. continue;
  510. }
  511. if (dir == QChar::DirL) {
  512. containedDir = dir;
  513. if (embeddingDir == dir)
  514. break;
  515. } else if (dir == QChar::DirR || dir == QChar::DirAN || dir == QChar::DirEN) {
  516. containedDir = QChar::DirR;
  517. if (embeddingDir == QChar::DirR)
  518. break;
  519. } else if (dir == QChar::DirLRI || dir == QChar::DirRLI || dir == QChar::DirFSI)
  520. ++isolateCounter;
  521. }
  522. BIDI_DEBUG() << " contained dir for backet pair" << first << "/" << second << "is" << containedDir;
  523. return containedDir;
  524. }
  525. };
  526. struct BracketStack {
  527. struct Item {
  528. Item() = default;
  529. Item(uint pairedBracked, int position) : pairedBracked(pairedBracked), position(position) {}
  530. uint pairedBracked = 0;
  531. int position = 0;
  532. };
  533. void push(uint closingUnicode, int pos) {
  534. if (position < MaxDepth)
  535. stack[position] = Item(closingUnicode, pos);
  536. ++position;
  537. }
  538. int match(uint unicode) {
  539. Q_ASSERT(!overflowed());
  540. int p = position;
  541. while (--p >= 0) {
  542. if (stack[p].pairedBracked == unicode ||
  543. // U+3009 and U+2329 are canonical equivalents of each other. Fortunately it's the only pair in Unicode 10
  544. (stack[p].pairedBracked == 0x3009 && unicode == 0x232a) ||
  545. (stack[p].pairedBracked == 0x232a && unicode == 0x3009)) {
  546. position = p;
  547. return stack[p].position;
  548. }
  549. }
  550. return -1;
  551. }
  552. enum { MaxDepth = 63 };
  553. Item stack[MaxDepth];
  554. int position = 0;
  555. bool overflowed() const { return position > MaxDepth; }
  556. };
  557. void resolveN0(const Vector<DirectionalRun> &runs, int i, QChar::Direction sos)
  558. {
  559. ushort level = runs.at(i).level;
  560. Vector<BracketPair> bracketPairs;
  561. {
  562. BracketStack bracketStack;
  563. IsolatedRunSequenceIterator it(runs, i);
  564. while (!it.atEnd()) {
  565. int pos = *it;
  566. QChar::Direction dir = analysis[pos].bidiDirection;
  567. if (dir == QChar::DirON) {
  568. const QUnicodeTables::Properties *p = infoAt(pos).properties;
  569. if (p->mirrorDiff) {
  570. // either opening or closing bracket
  571. if (p->category == QChar::Punctuation_Open) {
  572. // opening bracked
  573. uint closingBracked = text[pos].unicode() + p->mirrorDiff;
  574. bracketStack.push(closingBracked, bracketPairs.size());
  575. if (bracketStack.overflowed()) {
  576. bracketPairs.clear();
  577. break;
  578. }
  579. bracketPairs.append({ pos, -1 });
  580. } else if (p->category == QChar::Punctuation_Close) {
  581. int pairPos = bracketStack.match(text[pos].unicode());
  582. if (pairPos != -1)
  583. bracketPairs[pairPos].second = pos;
  584. }
  585. }
  586. }
  587. ++it;
  588. }
  589. }
  590. if (BidiDebugEnabled && bracketPairs.size()) {
  591. BIDI_DEBUG() << "matched bracket pairs:";
  592. for (int i = 0; i < bracketPairs.size(); ++i)
  593. BIDI_DEBUG() << " " << bracketPairs.at(i).first << bracketPairs.at(i).second;
  594. }
  595. QChar::Direction lastStrong = sos;
  596. IsolatedRunSequenceIterator it(runs, i);
  597. QChar::Direction embeddingDir = (level & 1) ? QChar::DirR : QChar::DirL;
  598. for (int i = 0; i < bracketPairs.size(); ++i) {
  599. const auto &pair = bracketPairs.at(i);
  600. if (!pair.isValid())
  601. continue;
  602. QChar::Direction containedDir = pair.containedDirection(analysis, embeddingDir);
  603. if (containedDir == QChar::DirON) {
  604. BIDI_DEBUG() << " 3: resolve bracket pair" << i << "to DirON";
  605. continue;
  606. } else if (containedDir == embeddingDir) {
  607. analysis[pair.first].bidiDirection = embeddingDir;
  608. analysis[pair.second].bidiDirection = embeddingDir;
  609. BIDI_DEBUG() << " 1: resolve bracket pair" << i << "to" << embeddingDir;
  610. } else {
  611. // case c.
  612. while (it.pos < pair.first) {
  613. int pos = *it;
  614. switch (analysis[pos].bidiDirection) {
  615. case QChar::DirR:
  616. case QChar::DirEN:
  617. case QChar::DirAN:
  618. lastStrong = QChar::DirR;
  619. break;
  620. case QChar::DirL:
  621. lastStrong = QChar::DirL;
  622. break;
  623. default:
  624. break;
  625. }
  626. ++it;
  627. }
  628. analysis[pair.first].bidiDirection = lastStrong;
  629. analysis[pair.second].bidiDirection = lastStrong;
  630. BIDI_DEBUG() << " 2: resolve bracket pair" << i << "to" << lastStrong;
  631. }
  632. for (int i = pair.second + 1; i < length; ++i) {
  633. if (infoAt(i).properties->direction == QChar::DirNSM)
  634. analysis[i].bidiDirection = analysis[pair.second].bidiDirection;
  635. else
  636. break;
  637. }
  638. }
  639. }
  640. void resolveN1N2(const Vector<DirectionalRun> &runs, int i, QChar::Direction sos, QChar::Direction eos)
  641. {
  642. // Rule N1 & N2
  643. QChar::Direction lastStrong = sos;
  644. IsolatedRunSequenceIterator::Position niPos;
  645. IsolatedRunSequenceIterator it(runs, i);
  646. // QChar::Direction last = QChar::DirON;
  647. while (1) {
  648. int pos = *it;
  649. QChar::Direction current = pos >= 0 ? analysis[pos].bidiDirection : eos;
  650. QChar::Direction currentStrong = current;
  651. switch (current) {
  652. case QChar::DirEN:
  653. case QChar::DirAN:
  654. currentStrong = QChar::DirR;
  655. Q_FALLTHROUGH();
  656. case QChar::DirL:
  657. case QChar::DirR:
  658. if (niPos.isValid()) {
  659. QChar::Direction dir = currentStrong;
  660. if (lastStrong != currentStrong)
  661. dir = (runs.at(i).level) & 1 ? QChar::DirR : QChar::DirL;
  662. it.setPosition(niPos);
  663. while (*it != pos) {
  664. if (analysis[*it].bidiDirection != QChar::DirBN)
  665. analysis[*it].bidiDirection = dir;
  666. ++it;
  667. }
  668. niPos.clear();
  669. }
  670. lastStrong = currentStrong;
  671. break;
  672. case QChar::DirBN:
  673. case QChar::DirS:
  674. case QChar::DirWS:
  675. case QChar::DirON:
  676. case QChar::DirFSI:
  677. case QChar::DirLRI:
  678. case QChar::DirRLI:
  679. case QChar::DirPDI:
  680. case QChar::DirB:
  681. if (!niPos.isValid())
  682. niPos = it.position();
  683. break;
  684. default:
  685. Q_UNREACHABLE();
  686. }
  687. if (it.atEnd())
  688. break;
  689. // last = current;
  690. ++it;
  691. }
  692. }
  693. void resolveImplicitLevelsForIsolatedRun(const Vector<DirectionalRun> &runs, int i)
  694. {
  695. // Rule X10
  696. int level = runs.at(i).level;
  697. int before = i - 1;
  698. while (before >= 0 && !runs.at(before).hasContent)
  699. --before;
  700. int level_before = (before >= 0) ? runs.at(before).level : baseLevel;
  701. int after = i;
  702. while (runs.at(after).continuation >= 0)
  703. after = runs.at(after).continuation;
  704. if (runs.at(after).continuation == -2) {
  705. after = runs.size();
  706. } else {
  707. ++after;
  708. while (after < runs.size() && !runs.at(after).hasContent)
  709. ++after;
  710. }
  711. int level_after = (after == runs.size()) ? baseLevel : runs.at(after).level;
  712. QChar::Direction sos = (qMax(level_before, level) & 1) ? QChar::DirR : QChar::DirL;
  713. QChar::Direction eos = (qMax(level_after, level) & 1) ? QChar::DirR : QChar::DirL;
  714. if (BidiDebugEnabled) {
  715. BIDI_DEBUG() << "Isolated run starting at" << i << "sos/eos" << sos << eos;
  716. BIDI_DEBUG() << "before implicit level processing:";
  717. IsolatedRunSequenceIterator it(runs, i);
  718. while (!it.atEnd()) {
  719. BIDI_DEBUG() << " " << *it << Qt::hex << text[*it].unicode() << analysis[*it].bidiDirection;
  720. ++it;
  721. }
  722. }
  723. resolveW1W2W3(runs, i, sos);
  724. resolveW4(runs, i, sos);
  725. resolveW5(runs, i);
  726. if (BidiDebugEnabled) {
  727. BIDI_DEBUG() << "after W4/W5";
  728. IsolatedRunSequenceIterator it(runs, i);
  729. while (!it.atEnd()) {
  730. BIDI_DEBUG() << " " << *it << Qt::hex << text[*it].unicode() << analysis[*it].bidiDirection;
  731. ++it;
  732. }
  733. }
  734. resolveW6W7(runs, i, sos);
  735. // Resolve neutral types
  736. // Rule N0
  737. resolveN0(runs, i, sos);
  738. resolveN1N2(runs, i, sos, eos);
  739. BIDI_DEBUG() << "setting levels (run at" << level << ")";
  740. // Rules I1 & I2: set correct levels
  741. {
  742. ushort level = runs.at(i).level;
  743. IsolatedRunSequenceIterator it(runs, i);
  744. while (!it.atEnd()) {
  745. int pos = *it;
  746. QChar::Direction current = analysis[pos].bidiDirection;
  747. switch (current) {
  748. case QChar::DirBN:
  749. break;
  750. case QChar::DirL:
  751. analysis[pos].bidiLevel = (level + 1) & ~1;
  752. break;
  753. case QChar::DirR:
  754. analysis[pos].bidiLevel = level | 1;
  755. break;
  756. case QChar::DirAN:
  757. case QChar::DirEN:
  758. analysis[pos].bidiLevel = (level + 2) & ~1;
  759. break;
  760. default:
  761. Q_UNREACHABLE();
  762. }
  763. BIDI_DEBUG() << " " << pos << current << analysis[pos].bidiLevel;
  764. ++it;
  765. }
  766. }
  767. }
  768. void resolveImplicitLevels(const Vector<DirectionalRun> &runs)
  769. {
  770. for (int i = 0; i < runs.size(); ++i) {
  771. if (runs.at(i).isContinuation)
  772. continue;
  773. resolveImplicitLevelsForIsolatedRun(runs, i);
  774. }
  775. }
  776. bool checkForBidi() const
  777. {
  778. if (baseLevel != 0)
  779. return true;
  780. for (int i = 0; i < length; ++i) {
  781. if (text[i].unicode() >= 0x590) {
  782. switch (infoAt(i).properties->direction) {
  783. case QChar::DirR: case QChar::DirAN:
  784. case QChar::DirLRE: case QChar::DirLRO: case QChar::DirAL:
  785. case QChar::DirRLE: case QChar::DirRLO: case QChar::DirPDF:
  786. case QChar::DirLRI: case QChar::DirRLI: case QChar::DirFSI: case QChar::DirPDI:
  787. return true;
  788. default:
  789. break;
  790. }
  791. }
  792. }
  793. return false;
  794. }
  795. bool process()
  796. {
  797. memset(analysis, 0, length * sizeof(QScriptAnalysis));
  798. bool hasBidi = checkForBidi();
  799. if (!hasBidi)
  800. return false;
  801. if (BidiDebugEnabled) {
  802. BIDI_DEBUG() << ">>>> start bidi, text length" << length;
  803. for (int i = 0; i < length; ++i)
  804. BIDI_DEBUG() << Qt::hex << " (" << i << ")" << text[i].unicode() << text[i].direction();
  805. }
  806. {
  807. Vector<DirectionalRun> runs;
  808. resolveExplicitLevels(runs);
  809. if (BidiDebugEnabled) {
  810. BIDI_DEBUG() << "resolved explicit levels, nruns" << runs.size();
  811. for (int i = 0; i < runs.size(); ++i)
  812. BIDI_DEBUG() << " " << i << "start/end" << runs.at(i).start << runs.at(i).end << "level" << (int)runs.at(i).level << "continuation" << runs.at(i).continuation;
  813. }
  814. // now we have a list of isolated run sequences inside the vector of runs, that can be fed
  815. // through the implicit level resolving
  816. resolveImplicitLevels(runs);
  817. }
  818. BIDI_DEBUG() << "Rule L1:";
  819. // Rule L1:
  820. bool resetLevel = true;
  821. for (int i = length - 1; i >= 0; --i) {
  822. if (analysis[i].bidiFlags & QScriptAnalysis::BidiResetToParagraphLevel) {
  823. BIDI_DEBUG() << "resetting pos" << i << "to baselevel";
  824. analysis[i].bidiLevel = baseLevel;
  825. resetLevel = true;
  826. } else if (resetLevel && analysis[i].bidiFlags & QScriptAnalysis::BidiMaybeResetToParagraphLevel) {
  827. BIDI_DEBUG() << "resetting pos" << i << "to baselevel (maybereset flag)";
  828. analysis[i].bidiLevel = baseLevel;
  829. } else {
  830. resetLevel = false;
  831. }
  832. }
  833. // set directions for BN to the minimum of adjacent chars
  834. // This makes is possible to be conformant with the Bidi algorithm even though we don't
  835. // remove BN and explicit embedding chars from the stream of characters to reorder
  836. int lastLevel = baseLevel;
  837. int lastBNPos = -1;
  838. for (int i = 0; i < length; ++i) {
  839. if (analysis[i].bidiFlags & QScriptAnalysis::BidiBN) {
  840. if (lastBNPos < 0)
  841. lastBNPos = i;
  842. analysis[i].bidiLevel = lastLevel;
  843. } else {
  844. int l = analysis[i].bidiLevel;
  845. if (lastBNPos >= 0) {
  846. if (l < lastLevel) {
  847. while (lastBNPos < i) {
  848. analysis[lastBNPos].bidiLevel = l;
  849. ++lastBNPos;
  850. }
  851. }
  852. lastBNPos = -1;
  853. }
  854. lastLevel = l;
  855. }
  856. }
  857. if (lastBNPos >= 0 && baseLevel < lastLevel) {
  858. while (lastBNPos < length) {
  859. analysis[lastBNPos].bidiLevel = baseLevel;
  860. ++lastBNPos;
  861. }
  862. }
  863. if (BidiDebugEnabled) {
  864. BIDI_DEBUG() << "final resolved levels:";
  865. for (int i = 0; i < length; ++i)
  866. BIDI_DEBUG() << " " << i << Qt::hex << text[i].unicode() << Qt::dec << (int)analysis[i].bidiLevel;
  867. }
  868. return true;
  869. }
  870. const QChar *text;
  871. QScriptAnalysis *analysis;
  872. int length;
  873. char baseLevel;
  874. Blocks::const_iterator _startInBlocks;
  875. Blocks::const_iterator _endInBlocks;
  876. mutable Blocks::const_iterator _currentBlock;
  877. int _offsetInBlocks;
  878. struct Info {
  879. const QUnicodeTables::Properties *properties = nullptr;
  880. bool surrogate = false;
  881. };
  882. [[nodiscard]] Info infoAt(int i) const {
  883. if (_currentBlock != _startInBlocks
  884. && (*_currentBlock)->position() > _offsetInBlocks + i) {
  885. _currentBlock = _startInBlocks;
  886. }
  887. auto next = _currentBlock + 1;
  888. while (next != _endInBlocks
  889. && (*next)->position() <= _offsetInBlocks + i) {
  890. _currentBlock = next;
  891. ++next;
  892. }
  893. const auto type = (*_currentBlock)->type();
  894. const auto object = (type == TextBlockType::Emoji)
  895. || (type == TextBlockType::CustomEmoji)
  896. || (type == TextBlockType::Skip);
  897. constexpr auto kQt5 = (QT_VERSION < QT_VERSION_CHECK(6, 0, 0));
  898. using wide = std::conditional_t<kQt5, uint, char32_t>;
  899. using narrow = std::conditional_t<kQt5, ushort, char16_t>;
  900. auto uc = wide(text[i].unicode());
  901. if (QChar::isHighSurrogate(uc) && i < length - 1 && text[i + 1].isLowSurrogate()) {
  902. uc = QChar::surrogateToUcs4(ushort(uc), text[i + 1].unicode());
  903. return {
  904. .properties = QUnicodeTables::properties(object
  905. ? wide(QChar::ObjectReplacementCharacter)
  906. : uc),
  907. .surrogate = true,
  908. };
  909. }
  910. return {
  911. .properties = QUnicodeTables::properties(object
  912. ? narrow(QChar::ObjectReplacementCharacter)
  913. : narrow(uc)),
  914. .surrogate = false,
  915. };
  916. }
  917. };
  918. } // namespace Ui::Text
  919. #undef BIDI_DEBUG