Skip to content

Commit

Permalink
2.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Tykok committed Feb 23, 2023
1 parent 97df8c7 commit 235052b
Show file tree
Hide file tree
Showing 8 changed files with 106 additions and 109 deletions.
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@tykok/cedict-dictionary",
"version": "1.5.0",
"version": "2.0.0",
"description": "The Cedict dictionary get and format the cedict to a JSON file",
"main": "lib/index.js",
"files": [
Expand Down
43 changes: 20 additions & 23 deletions src/classes/Cedict.ts
Original file line number Diff line number Diff line change
@@ -1,33 +1,30 @@
import cedict from '../../data/cedict.json'
import ChineseWord from '../types/ChineseWord'
import cedict from '../../data/cedict.json';
import ChineseWord from '../types/ChineseWord';

class Cedict {

static getCedict (): ChineseWord[] {
return cedict
static getCedict(): ChineseWord[] {
return cedict;
}

static getByTraditional (character: string): ChineseWord | undefined {
return Cedict.getCedict().find((word) => word.traditional === character)
}

static getBySimplified (character: string): ChineseWord | undefined {
return Cedict.getCedict().find((word) => word.simplified === character)

static getByTraditional(character: string): ChineseWord | undefined {
return Cedict.getCedict().find((word) => word.traditional === character);
}
static getByPinyin (character: string): ChineseWord | undefined {
return Cedict.getCedict().find((word) => word.pinyin === character)

static getBySimplified(character: string): ChineseWord | undefined {
return Cedict.getCedict().find((word) => word.simplified === character);
}
static getByEnglish (sentence: string): ChineseWord[] | undefined {
return Cedict.getCedict().filter((word) => word.english.includes(sentence))

static getByPinyin(character: string): ChineseWord | undefined {
return Cedict.getCedict().find((word) => word.pinyin === character);
}
static allOccurenceOfTraditional (traditional: string): ChineseWord[] | undefined {
return Cedict.getCedict().filter((word) => word.traditional.includes(traditional))

static getByEnglish(sentence: string): ChineseWord[] | undefined {
return Cedict.getCedict().filter((word) => word.english.includes(sentence));
}


static allOccurenceOfTraditional(traditional: string): ChineseWord[] | undefined {
return Cedict.getCedict().filter((word) => word.traditional.includes(traditional));
}
}

export default Cedict
export default Cedict;
84 changes: 42 additions & 42 deletions src/format-cedict.ts
Original file line number Diff line number Diff line change
@@ -1,74 +1,74 @@
import extract from 'extract-zip'
import * as fs from 'fs'
import * as superagent from 'superagent'
import ChineseWord from './types/ChineseWord'
import extract from 'extract-zip';
import * as fs from 'fs';
import * as superagent from 'superagent';
import ChineseWord from './types/ChineseWord';

const urlOfZip = 'https://www.mdbg.net/chinese/export/cedict/cedict_1_0_ts_utf-8_mdbg.zip'
const pathOfData = 'data'
const pathOfCedict = `${pathOfData}/cedict_ts.u8`
const pathOfZip = `${pathOfData}/cedict_1_0_ts_utf-8_mdbg.zip`
const pathOfJSON = `${pathOfData}/cedict.json`
const urlOfZip = 'https://www.mdbg.net/chinese/export/cedict/cedict_1_0_ts_utf-8_mdbg.zip';
const pathOfData = 'data';
const pathOfCedict = `${pathOfData}/cedict_ts.u8`;
const pathOfZip = `${pathOfData}/cedict_1_0_ts_utf-8_mdbg.zip`;
const pathOfJSON = `${pathOfData}/cedict.json`;

/**
* Used to parse the cedict file given in the zip
*/
const parseLine = (line: string): ChineseWord | null => {
if (!line || line === '' || line.startsWith('#')) return null
const splitedLine = line.split(/\/(.*)/s)
if (splitedLine.length <= 0) return null
if (!line || line === '' || line.startsWith('#')) return null;
const splitedLine = line.split(/\/(.*)/s);
if (splitedLine.length <= 0) return null;

try {
const english = splitedLine[1]
const charAndPinyin = splitedLine[0].split('[')
const characters = charAndPinyin[0].split(' ')
const traditional = characters[0]
const simplified = characters[1]
let pinyin = charAndPinyin[1]
pinyin = pinyin.split(' ')[0] as unknown as string
pinyin = pinyin.split(']')[0] as unknown as string
const english = splitedLine[1];
const charAndPinyin = splitedLine[0].split('[');
const characters = charAndPinyin[0].split(' ');
const traditional = characters[0];
const simplified = characters[1];
let pinyin = charAndPinyin[1];
pinyin = pinyin.split(' ')[0] as unknown as string;
pinyin = pinyin.split(']')[0] as unknown as string;

return { traditional, simplified, pinyin, english }
return { traditional, simplified, pinyin, english };
} catch (e: unknown) {
return null
return null;
}
}
};

const readFile = (): string[] => {
const file = fs.readFileSync(pathOfCedict, 'utf8')
return file.split('\n')
}
const file = fs.readFileSync(pathOfCedict, 'utf8');
return file.split('\n');
};

const parsedArray = (): ChineseWord[] => {
const listOfChineseWord: ChineseWord[] = []
const listOfChineseWord: ChineseWord[] = [];
for (const line of readFile()) {
const word = parseLine(line)
const word = parseLine(line);
if (word !== null) {
listOfChineseWord.push(word)
listOfChineseWord.push(word);
}
}
return listOfChineseWord
}
return listOfChineseWord;
};

const serializeAndSave = (object: any) => {
const strignify = JSON.stringify(object)
fs.writeFileSync(pathOfJSON, strignify)
}
const strignify = JSON.stringify(object);
fs.writeFileSync(pathOfJSON, strignify);
};

const downloadUnzipAndFormat = async () => {
return superagent
.get(urlOfZip)
.on('error', () => {
return null
return null;
})
.pipe(fs.createWriteStream(pathOfZip))
.on('finish', async () => {
await extract(`${process.cwd()}/${pathOfZip}`, { dir: `${process.cwd()}/${pathOfData}/` })
serializeAndSave(parsedArray())
})
}
await extract(`${process.cwd()}/${pathOfZip}`, { dir: `${process.cwd()}/${pathOfData}/` });
serializeAndSave(parsedArray());
});
};

const main = async () => {
downloadUnzipAndFormat()
}
downloadUnzipAndFormat();
};

main()
main();
8 changes: 4 additions & 4 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import ChineseWord from './types/ChineseWord'
import Cedict from './classes/Cedict'
import ChineseWord from './types/ChineseWord';
import Cedict from './classes/Cedict';

export { ChineseWord }
export { ChineseWord };

export default Cedict
export default Cedict;
4 changes: 2 additions & 2 deletions src/module.d.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
declare module '*cedict.json' {
const value: any
export default value
const value: any;
export default value;
}
58 changes: 29 additions & 29 deletions src/test/dictionary-function.test.ts
Original file line number Diff line number Diff line change
@@ -1,41 +1,41 @@
import ChineseWord from '../types/ChineseWord'
import Cedict from '../classes/Cedict'
import ChineseWord from '../types/ChineseWord';
import Cedict from '../classes/Cedict';

describe('Check property return of dictionary function', () => {
it('getByTraditional should have ChineseWord property', () => {
const expected = Cedict.getByTraditional('一')
const expected = Cedict.getByTraditional('一');

expect(expected).toHaveProperty('traditional')
expect(expected).toHaveProperty('simplified')
expect(expected).toHaveProperty('pinyin')
expect(expected).toHaveProperty('english')
})
expect(expected).toHaveProperty('traditional');
expect(expected).toHaveProperty('simplified');
expect(expected).toHaveProperty('pinyin');
expect(expected).toHaveProperty('english');
});

it('get should have ChineseWord property', () => {
const expected = Cedict.getBySimplified('一')
const expected = Cedict.getBySimplified('一');

expect(expected).toHaveProperty('traditional')
expect(expected).toHaveProperty('simplified')
expect(expected).toHaveProperty('pinyin')
expect(expected).toHaveProperty('english')
})
expect(expected).toHaveProperty('traditional');
expect(expected).toHaveProperty('simplified');
expect(expected).toHaveProperty('pinyin');
expect(expected).toHaveProperty('english');
});

it('getByEnglsih should have ChineseWord property', () => {
const englishArray = Cedict.getByEnglish('one') as ChineseWord[]
const expected = englishArray[0]
const englishArray = Cedict.getByEnglish('one') as ChineseWord[];
const expected = englishArray[0];

expect(expected).toHaveProperty('traditional')
expect(expected).toHaveProperty('simplified')
expect(expected).toHaveProperty('pinyin')
expect(expected).toHaveProperty('english')
})
expect(expected).toHaveProperty('traditional');
expect(expected).toHaveProperty('simplified');
expect(expected).toHaveProperty('pinyin');
expect(expected).toHaveProperty('english');
});

it('getByPinyin should have ChineseWord property', () => {
const expected = Cedict.getByPinyin('yi1') as ChineseWord

expect(expected).toHaveProperty('traditional')
expect(expected).toHaveProperty('simplified')
expect(expected).toHaveProperty('pinyin')
expect(expected).toHaveProperty('english')
})
})
const expected = Cedict.getByPinyin('yi1') as ChineseWord;

expect(expected).toHaveProperty('traditional');
expect(expected).toHaveProperty('simplified');
expect(expected).toHaveProperty('pinyin');
expect(expected).toHaveProperty('english');
});
});
12 changes: 6 additions & 6 deletions src/types/ChineseWord.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@ type ChineseWord = {
/**
* Chinese traditional character
*/
traditional: string
traditional: string;

/**
* Chinese simplified character
*/
simplified: string
simplified: string;

/**
* Pinyin is the official transcription of Mandarin Chinese
*/
pinyin: string
pinyin: string;

/**
* English translation of the chinese word
*/
english: string
}
english: string;
};

export default ChineseWord
export default ChineseWord;

0 comments on commit 235052b

Please sign in to comment.