-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
106 additions
and
109 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,30 @@ | ||
import cedict from '../../data/cedict.json' | ||
import ChineseWord from '../types/ChineseWord' | ||
import cedict from '../../data/cedict.json'; | ||
import ChineseWord from '../types/ChineseWord'; | ||
|
||
class Cedict { | ||
|
||
static getCedict (): ChineseWord[] { | ||
return cedict | ||
static getCedict(): ChineseWord[] { | ||
return cedict; | ||
} | ||
|
||
static getByTraditional (character: string): ChineseWord | undefined { | ||
return Cedict.getCedict().find((word) => word.traditional === character) | ||
} | ||
|
||
static getBySimplified (character: string): ChineseWord | undefined { | ||
return Cedict.getCedict().find((word) => word.simplified === character) | ||
|
||
static getByTraditional(character: string): ChineseWord | undefined { | ||
return Cedict.getCedict().find((word) => word.traditional === character); | ||
} | ||
static getByPinyin (character: string): ChineseWord | undefined { | ||
return Cedict.getCedict().find((word) => word.pinyin === character) | ||
|
||
static getBySimplified(character: string): ChineseWord | undefined { | ||
return Cedict.getCedict().find((word) => word.simplified === character); | ||
} | ||
static getByEnglish (sentence: string): ChineseWord[] | undefined { | ||
return Cedict.getCedict().filter((word) => word.english.includes(sentence)) | ||
|
||
static getByPinyin(character: string): ChineseWord | undefined { | ||
return Cedict.getCedict().find((word) => word.pinyin === character); | ||
} | ||
static allOccurenceOfTraditional (traditional: string): ChineseWord[] | undefined { | ||
return Cedict.getCedict().filter((word) => word.traditional.includes(traditional)) | ||
|
||
static getByEnglish(sentence: string): ChineseWord[] | undefined { | ||
return Cedict.getCedict().filter((word) => word.english.includes(sentence)); | ||
} | ||
|
||
|
||
static allOccurenceOfTraditional(traditional: string): ChineseWord[] | undefined { | ||
return Cedict.getCedict().filter((word) => word.traditional.includes(traditional)); | ||
} | ||
} | ||
|
||
export default Cedict | ||
export default Cedict; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,74 +1,74 @@ | ||
import extract from 'extract-zip' | ||
import * as fs from 'fs' | ||
import * as superagent from 'superagent' | ||
import ChineseWord from './types/ChineseWord' | ||
import extract from 'extract-zip'; | ||
import * as fs from 'fs'; | ||
import * as superagent from 'superagent'; | ||
import ChineseWord from './types/ChineseWord'; | ||
|
||
const urlOfZip = 'https://www.mdbg.net/chinese/export/cedict/cedict_1_0_ts_utf-8_mdbg.zip' | ||
const pathOfData = 'data' | ||
const pathOfCedict = `${pathOfData}/cedict_ts.u8` | ||
const pathOfZip = `${pathOfData}/cedict_1_0_ts_utf-8_mdbg.zip` | ||
const pathOfJSON = `${pathOfData}/cedict.json` | ||
const urlOfZip = 'https://www.mdbg.net/chinese/export/cedict/cedict_1_0_ts_utf-8_mdbg.zip'; | ||
const pathOfData = 'data'; | ||
const pathOfCedict = `${pathOfData}/cedict_ts.u8`; | ||
const pathOfZip = `${pathOfData}/cedict_1_0_ts_utf-8_mdbg.zip`; | ||
const pathOfJSON = `${pathOfData}/cedict.json`; | ||
|
||
/** | ||
* Used to parse the cedict file given in the zip | ||
*/ | ||
const parseLine = (line: string): ChineseWord | null => { | ||
if (!line || line === '' || line.startsWith('#')) return null | ||
const splitedLine = line.split(/\/(.*)/s) | ||
if (splitedLine.length <= 0) return null | ||
if (!line || line === '' || line.startsWith('#')) return null; | ||
const splitedLine = line.split(/\/(.*)/s); | ||
if (splitedLine.length <= 0) return null; | ||
|
||
try { | ||
const english = splitedLine[1] | ||
const charAndPinyin = splitedLine[0].split('[') | ||
const characters = charAndPinyin[0].split(' ') | ||
const traditional = characters[0] | ||
const simplified = characters[1] | ||
let pinyin = charAndPinyin[1] | ||
pinyin = pinyin.split(' ')[0] as unknown as string | ||
pinyin = pinyin.split(']')[0] as unknown as string | ||
const english = splitedLine[1]; | ||
const charAndPinyin = splitedLine[0].split('['); | ||
const characters = charAndPinyin[0].split(' '); | ||
const traditional = characters[0]; | ||
const simplified = characters[1]; | ||
let pinyin = charAndPinyin[1]; | ||
pinyin = pinyin.split(' ')[0] as unknown as string; | ||
pinyin = pinyin.split(']')[0] as unknown as string; | ||
|
||
return { traditional, simplified, pinyin, english } | ||
return { traditional, simplified, pinyin, english }; | ||
} catch (e: unknown) { | ||
return null | ||
return null; | ||
} | ||
} | ||
}; | ||
|
||
const readFile = (): string[] => { | ||
const file = fs.readFileSync(pathOfCedict, 'utf8') | ||
return file.split('\n') | ||
} | ||
const file = fs.readFileSync(pathOfCedict, 'utf8'); | ||
return file.split('\n'); | ||
}; | ||
|
||
const parsedArray = (): ChineseWord[] => { | ||
const listOfChineseWord: ChineseWord[] = [] | ||
const listOfChineseWord: ChineseWord[] = []; | ||
for (const line of readFile()) { | ||
const word = parseLine(line) | ||
const word = parseLine(line); | ||
if (word !== null) { | ||
listOfChineseWord.push(word) | ||
listOfChineseWord.push(word); | ||
} | ||
} | ||
return listOfChineseWord | ||
} | ||
return listOfChineseWord; | ||
}; | ||
|
||
const serializeAndSave = (object: any) => { | ||
const strignify = JSON.stringify(object) | ||
fs.writeFileSync(pathOfJSON, strignify) | ||
} | ||
const strignify = JSON.stringify(object); | ||
fs.writeFileSync(pathOfJSON, strignify); | ||
}; | ||
|
||
const downloadUnzipAndFormat = async () => { | ||
return superagent | ||
.get(urlOfZip) | ||
.on('error', () => { | ||
return null | ||
return null; | ||
}) | ||
.pipe(fs.createWriteStream(pathOfZip)) | ||
.on('finish', async () => { | ||
await extract(`${process.cwd()}/${pathOfZip}`, { dir: `${process.cwd()}/${pathOfData}/` }) | ||
serializeAndSave(parsedArray()) | ||
}) | ||
} | ||
await extract(`${process.cwd()}/${pathOfZip}`, { dir: `${process.cwd()}/${pathOfData}/` }); | ||
serializeAndSave(parsedArray()); | ||
}); | ||
}; | ||
|
||
const main = async () => { | ||
downloadUnzipAndFormat() | ||
} | ||
downloadUnzipAndFormat(); | ||
}; | ||
|
||
main() | ||
main(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
import ChineseWord from './types/ChineseWord' | ||
import Cedict from './classes/Cedict' | ||
import ChineseWord from './types/ChineseWord'; | ||
import Cedict from './classes/Cedict'; | ||
|
||
export { ChineseWord } | ||
export { ChineseWord }; | ||
|
||
export default Cedict | ||
export default Cedict; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
declare module '*cedict.json' { | ||
const value: any | ||
export default value | ||
const value: any; | ||
export default value; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,41 @@ | ||
import ChineseWord from '../types/ChineseWord' | ||
import Cedict from '../classes/Cedict' | ||
import ChineseWord from '../types/ChineseWord'; | ||
import Cedict from '../classes/Cedict'; | ||
|
||
describe('Check property return of dictionary function', () => { | ||
it('getByTraditional should have ChineseWord property', () => { | ||
const expected = Cedict.getByTraditional('一') | ||
const expected = Cedict.getByTraditional('一'); | ||
|
||
expect(expected).toHaveProperty('traditional') | ||
expect(expected).toHaveProperty('simplified') | ||
expect(expected).toHaveProperty('pinyin') | ||
expect(expected).toHaveProperty('english') | ||
}) | ||
expect(expected).toHaveProperty('traditional'); | ||
expect(expected).toHaveProperty('simplified'); | ||
expect(expected).toHaveProperty('pinyin'); | ||
expect(expected).toHaveProperty('english'); | ||
}); | ||
|
||
it('get should have ChineseWord property', () => { | ||
const expected = Cedict.getBySimplified('一') | ||
const expected = Cedict.getBySimplified('一'); | ||
|
||
expect(expected).toHaveProperty('traditional') | ||
expect(expected).toHaveProperty('simplified') | ||
expect(expected).toHaveProperty('pinyin') | ||
expect(expected).toHaveProperty('english') | ||
}) | ||
expect(expected).toHaveProperty('traditional'); | ||
expect(expected).toHaveProperty('simplified'); | ||
expect(expected).toHaveProperty('pinyin'); | ||
expect(expected).toHaveProperty('english'); | ||
}); | ||
|
||
it('getByEnglsih should have ChineseWord property', () => { | ||
const englishArray = Cedict.getByEnglish('one') as ChineseWord[] | ||
const expected = englishArray[0] | ||
const englishArray = Cedict.getByEnglish('one') as ChineseWord[]; | ||
const expected = englishArray[0]; | ||
|
||
expect(expected).toHaveProperty('traditional') | ||
expect(expected).toHaveProperty('simplified') | ||
expect(expected).toHaveProperty('pinyin') | ||
expect(expected).toHaveProperty('english') | ||
}) | ||
expect(expected).toHaveProperty('traditional'); | ||
expect(expected).toHaveProperty('simplified'); | ||
expect(expected).toHaveProperty('pinyin'); | ||
expect(expected).toHaveProperty('english'); | ||
}); | ||
|
||
it('getByPinyin should have ChineseWord property', () => { | ||
const expected = Cedict.getByPinyin('yi1') as ChineseWord | ||
|
||
expect(expected).toHaveProperty('traditional') | ||
expect(expected).toHaveProperty('simplified') | ||
expect(expected).toHaveProperty('pinyin') | ||
expect(expected).toHaveProperty('english') | ||
}) | ||
}) | ||
const expected = Cedict.getByPinyin('yi1') as ChineseWord; | ||
|
||
expect(expected).toHaveProperty('traditional'); | ||
expect(expected).toHaveProperty('simplified'); | ||
expect(expected).toHaveProperty('pinyin'); | ||
expect(expected).toHaveProperty('english'); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters