Skip to content

Commit

Permalink
Merge branch 'fixRecomOrder'
Browse files Browse the repository at this point in the history
  • Loading branch information
finia2NA committed May 29, 2024
2 parents 77d9547 + bd43156 commit ed269b5
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 52 deletions.
5 changes: 1 addition & 4 deletions src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,9 @@ function App() {
setKanjiRoot(relevantRoot);

// Get order in string form
const orderString = getRecommendedOrder(relevantRoot).map(x => x.name).join('');
const orderString = getRecommendedOrder(relevantRoot).map(x => x.name).filter(x => !knownKanjiString.includes(x)).join('');
setRecommendedOrder(orderString);




// Cleanup function
return () => {
// Perform any necessary cleanup here
Expand Down
11 changes: 10 additions & 1 deletion src/components/KanjiGraph.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,16 @@ const KanjiGraph = ({ kanjiNodeList }: KanjiGraphProps) => {
target: "n-" + child.name,
// label: "Edge " + node.name + "-" + child.name
};
edges.push(currentEdge);

const existingEdge = edges.find(edge =>
edge.id === currentEdge.id &&
edge.source === currentEdge.source &&
edge.target === currentEdge.target
);

if (!existingEdge) {
edges.push(currentEdge);
}
}
}

Expand Down
138 changes: 91 additions & 47 deletions src/logic/kanjiorder.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
import { DisplayNode } from '../components/KanjiGraph';
import topology from '../assets/topology.json';

//---------------------------------------------------------------------
// DATA STRUCTURES

// This is the structure of the JSON file
// The topology is a dictionary syntactically,
// semantically it is a DAG modified to have just one root ("0")
// semantically it is a DAG I have modified to have just one root ("0")
type Topology = Record<string, string[]>;

// RADICALS:
// This is from https://www.localizingjapan.com/blog/2012/01/20/regular-expressions-for-japanese-text/ .
// Not using their regex because they match Kangxi unicode block, while I operate in CJK Unified Ideographs
// When normalized, some of these radicals turn into Kanji. I have done my best to remove them.

// This is the original
// const radicalList = new Set("⺀⺀⺁⺂⺃⺄⺅⺆⺇⺈⺉⺊⺋⺌⺍⺎⺏⺐⺑⺒⺓⺔⺕⺖⺗⺘⺙⺚⺛⺜⺝⺞⺟⺠⺡⺢⺣⺤⺥⺦⺧⺨⺩⺪⺫⺬⺭⺮⺯⺰⺱⺲⺳⺴⺵⺶⺷⺸⺹⺺⺻⺼⺽⺾⺿⻀⻁⻂⻃⻄⻅⻆⻇⻈⻉⻊⻋⻌⻍⻎⻏⻐⻑⻒⻓⻔⻕⻖⻗⻘⻙⻚⻛⻜⻝⻞⻟⻠⻡⻢⻣⻤⻥⻦⻧⻨⻩⻪⻫⻬⻭⻮⻯⻰⻱⻲⻳⼀⼁⼂⼃⼄⼅⼆⼇⼈⼉⼊⼋⼌⼍⼎⼏⼐⼑⼒⼓⼔⼕⼖⼗⼘⼙⼚⼛⼜⼝⼞⼟⼠⼡⼢⼣⼤⼥⼦⼧⼨⼩⼪⼫⼬⼭⼮⼯⼰⼱⼲⼳⼴⼵⼶⼷⼸⼹⼺⼻⼼⼽⼾⼿⽀⽁⽂⽃⽄⽅⽆⽇⽈⽉⽊⽋⽌⽍⽎⽏⽐⽑⽒⽓⽔⽕⽖⽗⽘⽙⽚⽛⽜⽝⽞⽟⽠⽡⽢⽣⽤⽥⽦⽧⽨⽩⽪⽫⽬⽭⽮⽯⽰⽱⽲⽳⽴⽵⽶⽷⽸⽹⽺⽻⽼⽽⽾⽿⾀⾁⾂⾃⾄⾅⾆⾇⾈⾉⾊⾋⾌⾍⾎⾏⾐⾑⾒⾓⾔⾕⾖⾗⾘⾙⾚⾛⾜⾝⾞⾟⾠⾡⾢⾣⾤⾥⾦⾧⾨⾩⾪⾫⾬⾭⾮⾯⾰⾱⾲⾳⾴⾵⾶⾷⾸⾹⾺⾻⾼⾽⾾⾿⿀⿁⿂⿃⿄⿅⿆⿇⿈⿉⿊⿋⿌⿍⿎⿏⿐⿑⿒⿓⿔⿕".normalize("NFKC").split(''));

// This is the one I have cleaned
const radicalSet = new Set("⺀⺁⺂⺃⺄⺅⺆⺇⺈⺉⺊⺋⺌⺍⺎⺏⺐⺑⺒⺓⺔⺕⺖⺗⺘⺙⺚⺛⺜⺝⺞⺡⺢⺣⺤⺥⺦⺧⺨⺩⺪⺫⺬⺭⺮⺯⺰⺱⺲⺳⺴⺵⺶⺷⺸⺹⺺⺻⺼⺽⺾⺿⻀⻂⻃⻅⻇⻈⻉⻊⻋⻌⻍⻎⻏⻐⻒⻓⻔⻕⻖⻗⻙⻚⻛⻜⻝⻞⻟⻠⻡⻢⻣⻥⻦⻧⻪⻫⻬⻭⻮⻰⻱龟丨丶丿亅亠儿冂冖冫几凵勹匕匚匸卜卩厂厶夂夊宀寸小尢尸屮巛巾幺广廴廾弋弓彐彡彳戈戶攴斤无曰歹殳毋气爻爿片牙瓦疋疒癶禸禾糸缶网而耒肉自至臼舛艮艸襾豸辵釆隶隹韋韭髟鬥鬯鬲鹵麥黍黑黹黽鼎鼠齊齒龜龠".normalize("NFKC").split(''));

export class KanjiNode {
// DAG properties
Expand All @@ -17,17 +24,21 @@ export class KanjiNode {
parents: KanjiNode[];

// for kanji
isRadical: boolean;
priority: number | undefined; // lower is more important
isRelevant: boolean = false;
isKnown: boolean = false;
priority: number | undefined; // lower is more important

// For display
displayNode?: DisplayNode;

constructor(name: string) {
this.name = name;
// In unicode, some kanji have multiple representations, so we normalize them to a standard one.
this.name = name.normalize("NFKC");
this.children = [];
this.parents = [];

this.isRadical = radicalSet.has(this.name);
}

addChild(node: KanjiNode): void {
Expand All @@ -38,30 +49,31 @@ export class KanjiNode {
this.parents.push(node);
}

// equals(other: Node): boolean {
// if (this === other) return true;
// if (other == null || this.constructor !== other.constructor) return false;

// return (
// this.name === other.name &&
// this.isRelevant === other.isRelevant &&
// this.isKnown === other.isKnown &&
// this.priority === other.priority &&
// this.children.length === other.children.length &&
// this.parents.length === other.parents.length &&
// this.children.every((child, idx) => child.equals(other.children[idx])) &&
// this.parents.every((parent, idx) => parent.equals(other.parents[idx]))
// );
// }
/**
* Returns a shallow copy of the node (no children or parents are copied, just the properties)
* @returns
*/
shallowCopy(): KanjiNode {
const newNode = new KanjiNode(this.name);
newNode.isRadical = this.isRadical;
newNode.isRelevant = this.isRelevant;
newNode.isKnown = this.isKnown;
newNode.priority = this.priority;
return newNode;
}
}

//---------------------------------------------------------------------
// FUNCTIONS TO CREATE THE GRAPH

function findOrCreateNode(name: string, thelist: KanjiNode[]): KanjiNode {
function findOrCreateNode(name: string, thelist: KanjiNode[], properties?: KanjiNode): KanjiNode {
let node = thelist.find(x => x.name === name);
if (!node) {
node = new KanjiNode(name);
if (properties) {
node = properties.shallowCopy();
} else {
node = new KanjiNode(name);
}
thelist.push(node);
}
return node;
Expand Down Expand Up @@ -91,6 +103,7 @@ function buildDAG(): [KanjiNode[], KanjiNode] {
}

// New subgraph functions

/**
* This function takes the whole kanji topography as graph and node list and returns a subgraph of kanjis to know.
* The subgraph is a DAG with the same structure as the original, but with only the relevant nodes.
Expand All @@ -106,8 +119,9 @@ function getTargetSubgraph(allRoot: KanjiNode, nodeList: KanjiNode[], targetKanj
const paint = (node: KanjiNode | undefined) => {
// In this case, either the node does not exist or this subgraph has already been painted
if (!node || node.isRelevant) return;
// recursion 🙏

node.isRelevant = true;
// recursion 🙏
for (const parent of node.parents) {
paint(parent);
}
Expand All @@ -119,25 +133,24 @@ function getTargetSubgraph(allRoot: KanjiNode, nodeList: KanjiNode[], targetKanj
}

// Step 2: create a new graph with only the relevant nodes
const relevantWalker = (node: KanjiNode): KanjiNode => {
const relevantWalker = (startNode: KanjiNode, foundList: KanjiNode[]): KanjiNode => {
// copy the node properties
const currentNewNode = new KanjiNode(node.name);
currentNewNode.isRelevant = node.isRelevant;
currentNewNode.isKnown = node.isKnown;
currentNewNode.priority = node.priority;
// debugger;
const currentNewNode = findOrCreateNode(startNode.name, foundList, startNode);

// add only the relevant parents
for (const child of node.children) {
for (const child of startNode.children) {
if (child.isRelevant) {
const relevantChildrenRoot = relevantWalker(child);
const relevantChildrenRoot = relevantWalker(child, foundList);
currentNewNode.addChild(relevantChildrenRoot);
relevantChildrenRoot.addParent(currentNewNode);
}
}

return currentNewNode
}
const relevantRootNode = relevantWalker(allRoot);
const relevantKanji: KanjiNode[] = [];
const relevantRootNode = relevantWalker(allRoot, relevantKanji);

// Step 2a: clean up the big graph
for (const node of nodeList) {
Expand All @@ -150,8 +163,6 @@ function getTargetSubgraph(allRoot: KanjiNode, nodeList: KanjiNode[], targetKanj
markKnown(relevantRootNode, knownKanji);

// Step 4: Sort the children and parent arrays by priority
// This way, a left-to right dfs will give a natural order to learn the kanji where
// the most important ones are first
const prioWalker = (node: KanjiNode) => {
node.children.sort((a, b) => (a.priority || 0) - (b.priority || 0));
node.parents.sort((a, b) => (a.priority || 0) - (b.priority || 0));
Expand Down Expand Up @@ -205,20 +216,43 @@ function markKnown(rootNode: KanjiNode, knownList: string[]) {
// ---------------------------------------------------------------------
// Order Recommendation

// This function could be used to check if all parents of a node are in the list, but
// it is not used since I instead check on insert time.
// function isReachable(target: KanjiNode, included: KanjiNode[]): boolean {
// // Special case: if the target is a root, it is always reachable
// if (target.parents.length === 0) {
// return true;
// }

// // Node is reachable if all parents are included
// for (const parent of target.parents) {
// if (!included.includes(parent)) {
// return false;
// }
// }
// return true;
// }

/**
* Generates an order to study the kanji, where
* 1. Consitituating parts are always learned before the kanji that uses them
* 2. The most frequent kanji are learned first
* @param rootNode
* @returns
*/
// NOTE: if a high-priority kanji is hidden behind a low-priority radical, it suffers in this
// Implementation. Could be improved by calculating a "value" of each node based on the
// collective priority of all its (multi-level) children.
export function getRecommendedOrder(rootNode: KanjiNode): KanjiNode[] {
const currentList = [rootNode];
// List of directly reachable nodes, sorted by priority, advantage given to Kanji over radicals
const possibleCandidates = [rootNode];
// The order to return
const returnList: KanjiNode[] = [];

while (currentList.length > 0) {
const candidate = currentList.shift();
if (!candidate) break; // This should never happen but the compiler wants this
while (possibleCandidates.length > 0) {
const candidate = possibleCandidates.shift();
// This should never happen but the compiler wants this line
if (!candidate) throw new Error("unexpectedly didn't receive a candidate");

// See if the candidate is already in the return list
// (Since it is a DFS it can be the child of multiple parents)
Expand All @@ -228,28 +262,38 @@ export function getRecommendedOrder(rootNode: KanjiNode): KanjiNode[] {
// If the candidate is not in the return list, add it
returnList.push(candidate as KanjiNode);

// Insert the children into current if all parents are already in the list
// Insert the children into possibleCandidates if all parents are already in the list
for (const child of candidate.children) {
// If the child has all its parents in the list, we can add it
if (child.parents.every(x => returnList.includes(x))) {
currentList.push(child);
possibleCandidates.push(child);
}
}

// Sort the current list by priority
// NOTE: would be better if we inserted at the correct position instead of pushing and sorting,
// but we can optimize this later
currentList.sort((a, b) => (a.priority || 0) - (b.priority || 0));
// Sort the current list by if it is a radical first, then priority.
// This way:
// 1. All currently available kanji will be learned before new radicals
// 2. The priority will be respected
// NOTE: It would be better if we inserted at the correct position instead of pushing to the end
// and sorting, but I can optimize this later
possibleCandidates.sort((a, b) => {
// Sort by isRadical first
if (a.isRadical && !b.isRadical) {
return 1; // a is radical, b is not radical, so a should come after b
} else if (!a.isRadical && b.isRadical) {
return -1; // a is not radical, b is radical, so a should come before b
}
// Sort by priority if isRadical is the same
return (a.priority || 0) - (b.priority || 0);
});
}

return returnList;
}


// //---------------------------------------------------------------------
// // MAIN FUNCTION

// Do this only once
// Build the DAG only once since it takes time
const [allList, allRoot] = buildDAG();

export default function getKanjiOrder(kanjis: string, known: string = ""): [KanjiNode[], KanjiNode] {
Expand Down

0 comments on commit ed269b5

Please sign in to comment.