Skip to content

Commit

Permalink
Fix Qcel not work for letters words, add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
yfdyh000 committed Mar 26, 2022
1 parent 09d9450 commit 2d91719
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 23 deletions.
41 changes: 19 additions & 22 deletions src/ImeWlConverterCore/IME/QQPinyinQcel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public WordLibraryList Import(string path)
#endregion

private Dictionary<int, string> pyDic = new Dictionary<int, string>();
private static char[] a2zchar => "abcdefghijklmnopqrstuvwxyz".ToCharArray();

#region IWordLibraryImport Members

Expand All @@ -55,7 +56,10 @@ public WordLibraryList ImportLine(string line)
{
throw new Exception("Qcel格式是二进制文件,不支持流转换");
}

public static Dictionary<string, string> ReadQcelInfo(string path)
{
return SougouPinyinScel.ReadScelInfo(path);
}
private WordLibraryList ReadQcel(string path)
{
pyDic = new Dictionary<int, string>();
Expand Down Expand Up @@ -88,22 +92,6 @@ private WordLibraryList ReadQcel(string path)
CountWord = BinFileHelper.ReadInt32(fs);
CurrentStatus = 0;

//fs.Position = 0x130;
//fs.Read(str, 0, 64);
//string txt = Encoding.Unicode.GetString(str);
////Console.WriteLine("字库名称:" + txt);
//fs.Position = 0x338;
//fs.Read(str, 0, 64);
////Console.WriteLine("字库类别:" + Encoding.Unicode.GetString(str));

//fs.Position = 0x540;
//fs.Read(str, 0, 64);
////Console.WriteLine("字库信息:" + Encoding.Unicode.GetString(str));

//fs.Position = 0xd40;
//fs.Read(str, 0, 64);
////Console.WriteLine("字库示例:" + Encoding.Unicode.GetString(str));

fs.Position = 0x1540;
str = new byte[4];
fs.Read(str, 0, 4); //\x9D\x01\x00\x00
Expand All @@ -119,6 +107,7 @@ private WordLibraryList ReadQcel(string path)
pyDic.Add(mark, py);
if (py == "zuo") //最后一个拼音
{
Debug.WriteLine(fs.Position);
break;
}
}
Expand All @@ -130,20 +119,23 @@ private WordLibraryList ReadQcel(string path)
Debug.WriteLine(s.ToString());


fs.Position = 0x2628;
//fs.Position = 0x2628;
//fs.Position = hzPosition;

while (true)
{
try
{
pyAndWord.AddRange(ReadAPinyinWord(fs));
var data = ReadAPinyinWord(fs);
if (data is null) break;

pyAndWord.AddRange(data);
}
catch (Exception ex)
{
Debug.WriteLine(ex.Message);
throw ex;
}
if (fs.Length == fs.Position) //判断文件结束
if (CurrentStatus == CountWord || fs.Length == fs.Position) //判断文件结束
{
fs.Close();
break;
Expand Down Expand Up @@ -174,7 +166,12 @@ private IList<WordLibrary> ReadAPinyinWord(FileStream fs)
for (int i = 0; i < pinyinLen / 2; i++)
{
int key = str[i * 2] + str[i * 2 + 1] * 256;
wordPY.Add(pyDic[key]);
//Debug.Assert(key < pyDic.Count);
if(key < pyDic.Count)
wordPY.Add(pyDic[key]);
else
wordPY.Add(a2zchar[key - pyDic.Count].ToString());
//return null; // 用于调试,忽略编码异常的记录,不中止运行
}
//wordPY = wordPY.Remove(wordPY.Length - 1); //移除最后一个单引号
//接下来读词语
Expand Down
2 changes: 1 addition & 1 deletion src/ImeWlConverterCore/MainBody.cs
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ public string Convert(IList<string> filePathes)
}
catch(Exception ex)
{
ProcessNotice("词库(" + Path.GetFileName(file) + ")处理出现异常:" + ex.Message);
ProcessNotice("词库(" + Path.GetFileName(file) + ")处理出现异常:\n\t" + ex.Message);
isImportProgress = false;
this.timer.Stop();
return "";
Expand Down
3 changes: 3 additions & 0 deletions src/ImeWlConverterCoreTest/ImeWlConverterCoreTest.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@
<None Update="Test\成语.qpyd">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Test\星际战甲.qcel">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Test\纯汉字.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
82 changes: 82 additions & 0 deletions src/ImeWlConverterCoreTest/QQPinyinQcelTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Copyright © 2022 yfdyh000
* This program "IME WL Converter(深蓝词库转换)" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

using System;
using System.ComponentModel;
using System.IO;
using System.Net;
using System.Text;
using NUnit.Framework;
using Studyzy.IMEWLConverter.Entities;
using Studyzy.IMEWLConverter.IME;

namespace Studyzy.IMEWLConverter.Test
{
[TestFixture]
internal class QQPinyinQcelTest : BaseTest
{

[OneTimeSetUp]
public override void InitData()
{
importer = new QQPinyinQcel();
}

protected override string StringData
{
get { throw new NotImplementedException(); }
}

[TestCase]
public void TestImportLine()
{
Assert.Catch(() =>
{
importer.ImportLine("test");
}, "Qcel格式是二进制文件,不支持流转换");
}
[TestCase("星际战甲.qcel")]
public void TestImportQcelWithAlphabet(string filePath)
{
var lib = importer.Import(GetFullPath(filePath));
Assert.Greater(lib.Count, 0);

Assert.AreEqual(lib.Count, 4675);
Assert.AreEqual(lib[0].CodeType, Studyzy.IMEWLConverter.Entities.CodeType.Pinyin);
Assert.AreEqual(lib[2].PinYinString, "a'ka'ta");
Assert.AreEqual(lib[3].PinYinString, "a'ka'ta'r'i'v'wai'guan");
Assert.AreEqual(lib[0].Rank, 0);
Assert.AreEqual(lib[4670].SingleCode, "zuo");
Assert.AreEqual(lib[2].Word, "阿卡塔");
Assert.AreEqual(lib[3].Word, "阿卡塔riv外观");
}
[TestCase("星际战甲.qcel")]
public void TestListQcelInfo(string filePath)
{
var info = QQPinyinQcel.ReadQcelInfo(GetFullPath(filePath));
Assert.That(info, Is.Not.Null.And.Not.Empty);
foreach (var item in info)
TestContext.WriteLine(item.Key + ": " + item.Value);

Assert.AreEqual(info["CountWord"], "4675");
Assert.AreEqual(info["Name"], "星际战甲warframe国际服");
Assert.AreEqual(info["Type"], "射击游戏");
Assert.True(info["Info"].Contains("词条来源是灰机wiki-warframe中文维基的中英文对照表"));
Assert.True(info["Sample"].Contains("肿瘤 三叶坚韧 狂风猛踢 寒冰之力"));
}
}
}
Binary file not shown.

0 comments on commit 2d91719

Please sign in to comment.