Skip to content

Commit ab88304

Browse files
authored
Added MathJax rendering (#4)
## Summary: With the recent need for Math rendering in Khan Academy, we need to pick up the MathJax rendering again. This added MathJax rendering to the conversation whenever we detect any conversation that contains MathJax inline in the conversation. Note we need to also consider the slot filling features - we do this by setting an attribute (`data-skip-node`) which when met this will allow dynamic MathJax rendering to happen within the node, without affecting the highlight of the elements. Issue: https://khanacademy.atlassian.net/browse/DI-533 ## Test plan: `yarn test` `yarn start` and see the equation rendered. Note that we also added the slots to demonstrate different granularity of the tags ![image](https://github.com/user-attachments/assets/02d35c6b-20b6-4c33-ac29-686d599d6efa) With the highlight slot (unfortunately we cannot highlight the MathJax expression) ![image](https://github.com/user-attachments/assets/a6fedf35-a80c-4868-b948-dcb989884cd1) And also test out dynamic rendering by using our local label studio repo with multiiple entries. ## E2E tests Although the official E2E test is broken (it seems like it depends on some external resources on some audio test which times out) - We can manaully run those E2E tests manually to confirm that the `rich-text` component is not borken. ``` % npx codeceptjs run tests/rich-text/*.js CodeceptJS v3.3.3 #StandWithUkraine Using test root "/Users/borislau/khan/label-studio-frontend/e2e" Richtext basic functional -- Warning: Timeout was set to 2400secs. Global timeout should be specified in seconds. ✔ Creating, removing and restoring regions in 8394ms ✔ Rich text content consistency in 25356ms OK | 2 passed // 35s ``` Author: dat-boris Reviewers: github-actions[bot], dat-boris, jimmykodes, wwells, lizfaubell Required Reviewers: Approved By: jimmykodes, wwells, wwells, lizfaubell Checks: ⌛ Tests / E2E Tests, ✅ Build JS Bundle / Build Coverage, ✅ Build JS Bundle / Build, ✅ Tests / Unit Tests, ✅ Lint / Run ESLint Pull Request URL: #4
1 parent 06fbda8 commit ab88304

File tree

11 files changed

+276
-18
lines changed

11 files changed

+276
-18
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
"@types/react-beautiful-dnd": "^13.1.3",
9191
"babel-plugin-istanbul": "^6.1.1",
9292
"babel-preset-react-app": "^9.1.1",
93+
"better-react-mathjax": "^2.0.2",
9394
"d3": "^5.16.0",
9495
"d3-color": "3.1.0",
9596
"loader-utils": "2.0.4",
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
<View>
22
<HyperTextLabels name="ner" toName="text">
3-
<Label value="Person"></Label>
4-
<Label value="Organization"></Label>
5-
<Label value="Date"></Label>
3+
<Label value="Paragraph" granularity="paragraph"></Label>
4+
<Label value="Word" granularity="word"></Label>
5+
<Label value="Div" granularity="div"></Label>
6+
<Label value="Interaction" granularity="parent_div"></Label>
67
</HyperTextLabels>
78
<TableText name="text" value="$text"></TableText>
89
</View>

src/examples/rich_text_table/tasks.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[
22
{
33
"data": {
4-
"text": "[[\"Asking question?<a href=\\\"javascript:alert('pwned')\\\">click</a>\", \"Answer! Look at some LaTex\\n\\n\\\\(\\\\dfrac{6}{2k - 6} = \\\\dfrac{1}{3}\\\\)\\n\\n\"], [\"Asking more question?\", \"More LaTex!\\n\\n\\\\(\\\\dfrac{6}{2k - 6} = \\\\dfrac{1}{3}\\\\)\\n\\n\"]]"
4+
"text": "[[\"Asking question?<a href=\\\"javascript:alert('pwned')\\\">click</a>\", \"Answer! Look at some LaTex\\n\\n\\\\(\\\\dfrac{1}{2k - 6} = \\\\dfrac{1}{3}\\\\) and \\\\(\\\\dfrac{1.5}{2k - 6} = \\\\dfrac{1}{3}\\\\)\\n\\n\"], [\"Asking more question in LaTex?\\\\(\\\\dfrac{2}{2k - 6} = \\\\dfrac{1}{3}\\\\)\", \"More LaTex!\\n\\n\\\\(\\\\dfrac{3}{2k - 6} = \\\\dfrac{1}{3}\\\\)\\n\\n\"]]"
55
},
66
"predictions": [
77
{

src/tags/control/Label.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ const TagAttrs = types.model({
6060
size: types.optional(types.string, 'medium'),
6161
background: types.optional(customTypes.color, Constants.LABEL_BACKGROUND),
6262
selectedcolor: types.optional(customTypes.color, '#ffffff'),
63-
granularity: types.maybeNull(types.enumeration(['symbol', 'word', 'sentence', 'paragraph'])),
63+
granularity: types.maybeNull(types.enumeration(['symbol', 'word', 'sentence', 'paragraph', 'div', 'parent_div'])),
6464
groupcancontain: types.maybeNull(types.string),
6565
// childrencheck: types.optional(types.enumeration(["any", "all"]), "any")
6666
...(isFF(FF_DEV_2128) ? { html: types.maybeNull(types.string) } : {}),
@@ -174,7 +174,7 @@ const Model = types.model({
174174
if (labels.type === 'labels') return true; // universal labels are fine to select
175175
if (labels.type.includes(region.type.replace(/region$/, ''))) return true; // region type is in label type
176176
if (labels.type.includes(region.results[0].type)) return true; // any result type of the region is in label type
177-
177+
178178
return false;
179179
});
180180

src/tags/object/RichText/RichText.styl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,14 @@
8080
word-wrap: break-word
8181
margin-top: 16px
8282

83-
&:first-child
83+
&_qa_question
84+
// User questions
8485
margin-left: auto
8586

87+
&_qa_answer
88+
// Khanmigo answer
89+
margin-right: 10%
90+
8691
&__error-box
8792
padding 8px 16px
8893
background-color: pink

src/tags/object/RichText/domManager.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,16 @@ export default class DomManager {
633633
while (currentNode) {
634634
const isText = currentNode.nodeType === Node.TEXT_NODE;
635635
const isBR = currentNode.nodeName === 'BR';
636+
const isSkipSelect = currentNode.nodeType === Node.ELEMENT_NODE && currentNode.getAttribute('data-skip-select');
636637

638+
if (isSkipSelect) {
639+
const ignoreContainer = currentNode;
640+
641+
// Ignore all nodes within the container
642+
while (ignoreContainer.contains(currentNode)) {
643+
currentNode = this.nextStep();
644+
}
645+
}
637646
if (isText) {
638647
domData.addTextElement(currentNode as Text, this.currentPath);
639648
} else if (isBR) {

src/tags/object/RichText/model.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ const WARNING_MESSAGES = {
4949
* @param {string} [highlightColor] - hex string with highlight color, if not provided uses the labels color
5050
* @param {boolean} [showLabels=true] - whether or not to show labels next to the region
5151
* @param {none|base64|base64unicode} [encoding] - decode value from an encoded string
52-
* @param {symbol|word|sentence|paragraph} [granularity] - control region selection granularity
52+
* @param {symbol|word|sentence|paragraph|div} [granularity] - control region selection granularity
5353
*/
5454
const TagAttrs = types.model('RichTextModel', {
5555
value: types.maybeNull(types.string),
@@ -74,7 +74,7 @@ const TagAttrs = types.model('RichTextModel', {
7474

7575
encoding: types.optional(types.enumeration(['none', 'base64', 'base64unicode']), 'none'),
7676

77-
granularity: types.optional(types.enumeration(['symbol', 'word', 'sentence', 'paragraph']), 'symbol'),
77+
granularity: types.optional(types.enumeration(['symbol', 'word', 'sentence', 'paragraph', 'div', 'parent_div']), 'symbol'),
7878
});
7979

8080
const Model = types

src/tags/object/RichText/table.js

Lines changed: 109 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,31 @@
1818

1919
import React from 'react';
2020
import { types } from 'mobx-state-tree';
21+
import { MathJax, MathJaxContext } from 'better-react-mathjax';
2122
import { cn } from '../../../utils/bem';
2223

2324
import './RichText.styl';
2425
import { RichTextModel } from './model';
2526
import { HtxRichText } from './view';
2627

28+
// To allow us to render MathJax expressions, we need to use a marker
29+
// However we also want to keep the original \(expressions\), so we swap into
30+
// a differnet marker instead.
31+
// We use a different marker than what is used in Khanmigo.
32+
const MATHJAX_MARKER = '$';
33+
34+
// Extract math from conversation, alternate between math and non-math
35+
// Khanmigo uses "\(.*?\)" as the marker for math
36+
// For example, "What is \(2 + 2\)?" will split into ["What is ", "2 + 2", "?"]
37+
const parseConvoWithMath = (str) => {
38+
// About the capture group: a cool behaviour of str.split is that if there's
39+
// capturing group, the group is captured into the group, which is perfect
40+
// for us!
41+
const mathRegex = /\\\((.*?)\\\)/g;
42+
43+
return str.split(mathRegex);
44+
};
45+
2746
const renderTableValue = (val) => {
2847
let conversations = [];
2948

@@ -38,27 +57,114 @@ const renderTableValue = (val) => {
3857
return <div className={errClass}>{errMsg}</div>;
3958
}
4059

41-
const itemClass = cn('richtext', { elem: 'table-item' });
60+
const questionItemClass = cn('richtext', { elem: 'table-item', mod: { qa : 'question' } });
61+
const answerItemClass = cn('richtext', { elem: 'table-item', mod: { qa : 'answer' } });
62+
let hasMath = false;
4263

4364
const rowElems = conversations.map((conversation, index) => {
4465
const question = conversation[0];
4566
const answer = conversation[1];
67+
const mathQuestions = parseConvoWithMath(question);
68+
const mathAnswers = parseConvoWithMath(answer);
69+
let mathQuestionComponent = null;
70+
let mathAnswerComponent = null;
71+
72+
// Render an alternate list between Math and non-math expressions
73+
// The list alternates between non-Math and Math expressions from
74+
// `parseConvoWithMath`
75+
const renderAllMathJax = (convoAndMathList) => (
76+
convoAndMathList.map((convo, i) => {
77+
if (i % 2 === 0) {
78+
// Non math
79+
return <span key={`eq=${i}`}>{convo}</span>;
80+
} else {
81+
// So for Math, we need to create a span as we want 2 piece of dom:
82+
// 1. The hidden raw MathJax expression, to allow slot Label to work
83+
// 2. A marked MathJax expression that allows <MathJax/> to render
84+
return (
85+
<span key={`eq-${i}`}>
86+
<span style={{ 'display': 'none' }}>{'\\(' + convo + '\\)'}</span>
87+
<span data-skip-select='1'>{MATHJAX_MARKER + convo + MATHJAX_MARKER}</span>
88+
</span>
89+
);
90+
}
91+
})
92+
);
93+
94+
if (mathQuestions.length > 1) {
95+
mathQuestionComponent = (
96+
<div className={questionItemClass}>
97+
<MathJax dynamic>{renderAllMathJax(mathQuestions)}</MathJax>
98+
</div>
99+
);
100+
hasMath = true;
101+
} else if (question) {
102+
mathQuestionComponent = <div className={questionItemClass}>{question}</div>;
103+
}
104+
if (mathAnswers.length > 1) {
105+
mathAnswerComponent = (
106+
<div className={answerItemClass}>
107+
<MathJax dynamic>{renderAllMathJax(mathAnswers)}</MathJax>
108+
</div>
109+
);
110+
hasMath = true;
111+
} else if (answer){
112+
mathAnswerComponent = <div className={answerItemClass}>{answer}</div>;
113+
}
46114

47115
return (
48116
<div key={`conversation-${index}`}>
49-
<div className={itemClass}>{question}</div>
50-
<div className={itemClass}>{answer}</div>
117+
{mathQuestionComponent}
118+
{mathAnswerComponent}
51119
</div>
52120
);
53121
});
54122

123+
if (hasMath) {
124+
const mathJaxConfig = {
125+
tex: {
126+
inlineMath: [[MATHJAX_MARKER, MATHJAX_MARKER]],
127+
},
128+
};
129+
130+
return (
131+
<MathJaxContext config={mathJaxConfig}>
132+
{rowElems}
133+
</MathJaxContext>
134+
);
135+
}
55136
return <div>{rowElems}</div>;
56137
};
57138

139+
// We need to trigger MathJax typeset after the component is mounted
140+
// See https://docs.mathjax.org/en/latest/advanced/typeset.html
141+
// As the document suggest above, we need to ensure that only one typeSet
142+
// function is running at one time. We use the promise to ensure that the
143+
// typeset is only run once at a time.
144+
let typesetPromise = null;
145+
146+
const triggerMathJaxTypeset = () => {
147+
setTimeout(() => {
148+
// This means that we already have a typeset running.
149+
if (typesetPromise) return;
150+
151+
// This means that this is first load, and we can wait for
152+
// <MathJaxContext/> component to be ready and typeset, instead of doing
153+
// this dynamically.
154+
if (typeof window?.MathJax?.typesetPromise !== 'function') return;
155+
156+
typesetPromise = window?.MathJax?.typesetPromise();
157+
typesetPromise.finally(() => {
158+
typesetPromise = null;
159+
});
160+
}, 100);
161+
};
162+
58163
export const TableText = () => (
59164
HtxRichText({
60165
isText: false,
61166
valueToComponent: renderTableValue,
167+
didMountCallback: triggerMathJaxTypeset,
62168
alwaysInline: true,
63169
})
64170
);

src/tags/object/RichText/view.js

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ class RichTextPieceView extends Component {
237237
*/
238238
_determineRegion(element) {
239239
const spanSelector = isFF(FF_LSDV_4620_3) ? this._regionVisibleSpanSelector : this._regionSpanSelector;
240-
240+
241241
if (matchesSelector(element, spanSelector)) {
242242
const span = element.tagName === 'SPAN' && (!isFF(FF_LSDV_4620_3) || element.matches(spanSelector)) ? element : element.closest(spanSelector);
243243
const { item } = this.props;
@@ -247,7 +247,7 @@ class RichTextPieceView extends Component {
247247
}
248248

249249
componentDidMount() {
250-
const { item, alwaysInline } = this.props;
250+
const { item, alwaysInline, didMountCallback } = this.props;
251251

252252
if (!isFF(FF_LSDV_4620_3)) {
253253
item.setNeedsUpdateCallbacks(
@@ -256,6 +256,10 @@ class RichTextPieceView extends Component {
256256
);
257257
}
258258

259+
if (didMountCallback) {
260+
didMountCallback(item);
261+
}
262+
259263
if (!(alwaysInline || item.inline)) {
260264
this.dispose = observe(item, '_isReady', this.updateLoadingVisibility, true);
261265
}
@@ -484,9 +488,12 @@ const storeInjector = inject('store');
484488
const RPTV = storeInjector(observer(RichTextPieceView));
485489

486490
export const HtxRichText = (
487-
{ isText = false, valueToComponent = null, alwaysInline = false } = {},
491+
{ isText = false, valueToComponent = null, alwaysInline = false, didMountCallback = null } = {},
488492
) => {
489493
return storeInjector(observer(props => {
490-
return <RPTV {...props} isText={isText} valueToComponent={valueToComponent} alwaysInline={alwaysInline}/>;
494+
return (
495+
<RPTV {...props} isText={isText} alwaysInline={alwaysInline}
496+
valueToComponent={valueToComponent} didMountCallback={didMountCallback} />
497+
);
491498
}));
492499
};

src/utils/selection-tools.js

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,55 @@ const closestBoundarySelection = (selection, boundary) => {
170170
return selection;
171171
};
172172

173+
/**
174+
* Modify selection to be a boundary to be of parent element with tagName
175+
* We will highlight the first and last text node within the parent element,
176+
* which is expected from selection-tools.js as we convert this to an offset.
177+
*/
178+
const changeBoundaryToElement = (selection, tagName, depth=1) => {
179+
const {
180+
startContainer,
181+
} = destructSelection(selection);
182+
183+
// find parent of startContainer with tagName
184+
const upperCaseTagName = tagName.toUpperCase();
185+
let parent = startContainer;
186+
187+
for (let i = 0; i < depth; i++) {
188+
parent = parent.parentNode;
189+
while (parent && parent.tagName !== upperCaseTagName) {
190+
parent = parent.parentNode;
191+
}
192+
}
193+
if (!parent) {
194+
return;
195+
}
196+
const walker = parent.ownerDocument.createTreeWalker(parent, NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT);
197+
let firstTextChild = null;
198+
let lastTextChild = null;
199+
let currentNode = walker.nextNode();
200+
let skipContainer = null;
201+
202+
while (currentNode) {
203+
// skip any elements iwthin `data-skip-select`, which might be added by
204+
// other dynamic rendering (e.g. MathJax)
205+
if (currentNode.nodeType === Node.ELEMENT_NODE && currentNode.getAttribute('data-skip-select')) {
206+
skipContainer = currentNode;
207+
while (skipContainer.contains(currentNode)) {
208+
currentNode = walker.nextNode();
209+
}
210+
continue;
211+
}
212+
213+
if (firstTextChild === null) firstTextChild = currentNode;
214+
lastTextChild = currentNode;
215+
216+
currentNode = walker.nextNode();
217+
}
218+
selection.setPosition(firstTextChild);
219+
selection.extend(lastTextChild, lastTextChild.length);
220+
};
221+
173222
const boundarySelection = (selection, boundary) => {
174223
const wordBoundary = boundary !== 'symbol';
175224
const {
@@ -262,13 +311,19 @@ const applyTextGranularity = (selection, granularity) => {
262311
case 'paragraph':
263312
boundarySelection(selection, 'paragraphboundary');
264313
return;
314+
case 'div':
315+
changeBoundaryToElement(selection, 'div');
316+
return;
317+
case 'parent_div':
318+
changeBoundaryToElement(selection, 'div', 2);
319+
return;
265320
case 'charater':
266321
case 'symbol':
267322
default:
268323
return;
269324
}
270-
} catch {
271-
console.warn('Probably, you\'re using browser that doesn\'t support granularity.');
325+
} catch (e) {
326+
console.warn('Probably, you\'re using browser that doesn\'t support granularity.', e);
272327
}
273328
};
274329

@@ -711,6 +766,24 @@ const findGlobalOffset = (node, position, root) => {
711766
const isText = currentNode.nodeType === Node.TEXT_NODE;
712767
const isBR = currentNode.nodeName === 'BR';
713768

769+
// if the current node have skip_select attribute, we should skip it
770+
const isSkipSelect = currentNode.nodeType === Node.ELEMENT_NODE && currentNode.getAttribute('data-skip-select');
771+
772+
// Skip MathJax generated nodes, jump to next node (i.e. lastChild's next)
773+
if (isSkipSelect) {
774+
const ignoreContainer = currentNode;
775+
776+
// Keep checking the next of lastChild is not part of the container
777+
// Note this will end if currentNode = null, which is what we want.
778+
while (ignoreContainer.contains(currentNode)) {
779+
currentNode = walker.nextNode();
780+
// Note: the nodeReached can be within the ignore container, so we need
781+
// to check here.
782+
nodeReached = nodeReached || node === currentNode;
783+
}
784+
continue;
785+
}
786+
714787
// Stop iteration
715788
// Break if we passed target node and current node
716789
// is not target, nor child of a target

0 commit comments

Comments
 (0)