From 84f36790cef48399c5cc5fbfee5a9ee96afacb3d Mon Sep 17 00:00:00 2001 From: jiro Date: Thu, 23 Jun 2022 09:20:26 +0900 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=EF=BC=81=E3=82=84=EF=BC=9F=E3=81=8C?= =?UTF-8?q?=E5=87=BA=E7=8F=BE=E3=81=97=E3=81=9F=E6=99=82=E3=81=AB=E3=80=81?= =?UTF-8?q?=E5=8D=8A=E8=A7=92=E3=82=84=E7=B5=B5=E6=96=87=E5=AD=97=E3=81=AB?= =?UTF-8?q?=E3=83=A9=E3=83=B3=E3=83=80=E3=83=A0=E3=81=A7=E5=88=87=E3=82=8A?= =?UTF-8?q?=E6=9B=BF=E3=81=88=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=EF=BC=8B?= =?UTF-8?q?=EF=BC=81=EF=BC=9F=EF=BC=81=EF=BC=9F=E3=81=AA=E3=81=A9=E9=80=A3?= =?UTF-8?q?=E7=B6=9A=E3=81=99=E3=82=8B=E5=A0=B4=E5=90=88=E3=82=82=E5=87=A6?= =?UTF-8?q?=E7=90=86=E3=81=A7=E3=81=8D=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?= =?UTF-8?q?=E3=81=84=E3=81=9F=E3=81=97=E3=81=BE=E3=81=97=E3=81=9F=E3=82=8F?= =?UTF-8?q?=20#40=20(#41)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * wip * wip * wip * だいたいいけた * fix test * fix * fix make * add test case * del unused func * add test case * go fmt * comment * fix: nilアクセスが発生していたので修正 * fix: CI * fix: path --- .github/workflows/test.yml | 5 +- Makefile | 4 +- internal/chars/chars.go | 122 +++++++++++++++++++++++++++++++ internal/chars/chars_test.go | 136 +++++++++++++++++++++++++++++++++++ ojosama.go | 68 +++++++++++++++--- ojosama_test.go | 64 +++++++++++++++++ 6 files changed, 384 insertions(+), 15 deletions(-) create mode 100644 internal/chars/chars.go create mode 100644 internal/chars/chars_test.go diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8465607..c7eb7c1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,9 +40,10 @@ jobs: fi - name: Static analysis run: go vet . - - run: go build - - run: go install + - run: go build ./cmd/ojosama + - run: go install ./cmd/ojosama - run: go test -cover ./... + - run: ./ojosama README.adoc coverage: runs-on: ubuntu-latest diff --git a/Makefile b/Makefile index 36a28a7..d5605c8 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -bin/ojosama: go.* *.go cmd/* +bin/ojosama: go.* *.go cmd/* internal/* make test go vet . go fmt . @@ -10,5 +10,5 @@ test: go test -cover ./... .PHONY: install -install: go.* *.go cmd/* +install: go.* *.go cmd/* internal/* go install ./cmd/ojosama diff --git a/internal/chars/chars.go b/internal/chars/chars.go new file mode 100644 index 0000000..a2ae77c --- /dev/null +++ b/internal/chars/chars.go @@ -0,0 +1,122 @@ +package chars + +import ( + "math/rand" +) + +type ExclQuesMark struct { + Value string + Style StyleType + Meaning MeaningType +} + +type StyleType int +type MeaningType int +type TestMode struct { + Pos int +} + +const ( + styleTypeUnknown StyleType = iota + styleTypeFullWidth + styleTypeHalfWidth + styleTypeEmoji + styleTypeDoubleEmoji // !! + + meaningTypeUnknown = iota + meaningTypeExcl // ! + meaningTypeQues // ? + meaningTypeEQ // !? +) + +var ( + eqMarks = []ExclQuesMark{ + newExcl("!", styleTypeFullWidth), + newExcl("!", styleTypeHalfWidth), + newExcl("❗", styleTypeEmoji), + newExcl("‼", styleTypeDoubleEmoji), + newQues("?", styleTypeFullWidth), + newQues("?", styleTypeHalfWidth), + newQues("❓", styleTypeEmoji), + newEQ("!?", styleTypeHalfWidth), + newEQ("⁉", styleTypeEmoji), + } +) + +func newExcl(v string, t StyleType) ExclQuesMark { + return ExclQuesMark{ + Value: v, + Style: t, + Meaning: meaningTypeExcl, + } +} + +func newQues(v string, t StyleType) ExclQuesMark { + return ExclQuesMark{ + Value: v, + Style: t, + Meaning: meaningTypeQues, + } +} + +func newEQ(v string, t StyleType) ExclQuesMark { + return ExclQuesMark{ + Value: v, + Style: t, + Meaning: meaningTypeEQ, + } +} + +func IsExclQuesMark(s string) (bool, *ExclQuesMark) { + for _, v := range eqMarks { + if v.Value == s { + return true, &v + } + } + return false, nil +} + +func SampleExclQuesByValue(v string, t *TestMode) *ExclQuesMark { + ok, got := IsExclQuesMark(v) + if !ok { + return nil + } + + var s []ExclQuesMark + for _, mark := range eqMarks { + if mark.Meaning == got.Meaning { + s = append(s, mark) + } + } + // 到達しないはずだけれど一応いれてる + if len(s) < 1 { + return nil + } + + if t != nil { + // テスト用のパラメータがあるときは決め打ちで返す + return &s[t.Pos] + } + rand.Shuffle(len(s), func(i, j int) { s[i], s[j] = s[j], s[i] }) + return &s[0] +} + +func FindExclQuesByStyleAndMeaning(s StyleType, m MeaningType) *ExclQuesMark { + var eq []ExclQuesMark + for _, mark := range eqMarks { + if mark.Style == s { + eq = append(eq, mark) + } + } + if len(eq) < 1 { + return nil + } + + for _, mark := range eq { + if mark.Meaning == m { + return &mark + } + } + + return nil +} diff --git a/internal/chars/chars_test.go b/internal/chars/chars_test.go new file mode 100644 index 0000000..260f1ef --- /dev/null +++ b/internal/chars/chars_test.go @@ -0,0 +1,136 @@ +package chars + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIsExclQuesMark(t *testing.T) { + tests := []struct { + desc string + s string + wantOK bool + wantEQ ExclQuesMark + }{ + { + desc: "正常系: !とはマッチいたしますわ", + s: "!", + wantOK: true, + wantEQ: newExcl("!", styleTypeFullWidth), + }, + { + desc: "正常系: ❓とはマッチいたしますわ", + s: "❓", + wantOK: true, + wantEQ: newQues("❓", styleTypeEmoji), + }, + { + desc: "正常系: 漆とはマッチいたしませんわ", + s: "漆", + wantOK: false, + }, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + assert := assert.New(t) + + got, got2 := IsExclQuesMark(tt.s) + assert.Equal(tt.wantOK, got) + if tt.wantOK { + assert.Equal(&tt.wantEQ, got2) + } + }) + } +} + +func TestSampleExclQuesByValue(t *testing.T) { + tests := []struct { + desc string + v string + t *TestMode + want ExclQuesMark + wantNil bool + }{ + { + desc: "正常系: !とはマッチいたしますわ", + v: "!", + t: &TestMode{Pos: 0}, + want: newExcl("!", styleTypeFullWidth), + }, + { + desc: "正常系: ❓とはマッチいたしますわ", + v: "❓", + t: &TestMode{Pos: 2}, + want: newQues("❓", styleTypeEmoji), + }, + { + desc: "正常系: 菫とはマッチいたしませんわ", + v: "菫", + t: &TestMode{Pos: 2}, + wantNil: true, + }, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + assert := assert.New(t) + + got := SampleExclQuesByValue(tt.v, tt.t) + if tt.wantNil { + assert.Nil(got) + return + } + assert.Equal(&tt.want, got) + }) + } +} + +func TestFindExclQuesByStyleAndMeaning(t *testing.T) { + tests := []struct { + desc string + s StyleType + m MeaningType + want ExclQuesMark + wantNil bool + }{ + { + desc: "正常系: ❗を指定いたしますわ", + s: styleTypeEmoji, + m: meaningTypeExcl, + want: newExcl("❗", styleTypeEmoji), + }, + { + desc: "正常系: ?を指定いたしますわ", + s: styleTypeFullWidth, + m: meaningTypeQues, + want: newQues("?", styleTypeFullWidth), + }, + { + desc: "正常系: 不明な要素の場合は何もお返しいたしませんわ", + s: styleTypeUnknown, + m: meaningTypeExcl, + wantNil: true, + }, + { + desc: "正常系: 不明な要素の場合は何もお返しいたしませんわ", + s: styleTypeFullWidth, + m: meaningTypeUnknown, + wantNil: true, + }, + } + + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + assert := assert.New(t) + + got := FindExclQuesByStyleAndMeaning(tt.s, tt.m) + if tt.wantNil { + assert.Nil(got) + return + } + assert.Equal(&tt.want, got) + }) + } +} diff --git a/ojosama.go b/ojosama.go index cd0a86a..3d83cf2 100644 --- a/ojosama.go +++ b/ojosama.go @@ -8,11 +8,13 @@ import ( "github.com/ikawaha/kagome-dict/ipa" "github.com/ikawaha/kagome/v2/tokenizer" + "github.com/jiro4989/ojosama/internal/chars" ) // ConvertOption はお嬢様変換時のオプショナルな設定。 type ConvertOption struct { forceAppendLongNote forceAppendLongNote // 単体テスト用のパラメータ + forceCharsTestMode *chars.TestMode // 単体テスト用のパラメータ } // forceAppendLongNote は強制的に波線や感嘆符や疑問符を任意の数追加するための設定。 @@ -43,6 +45,18 @@ func init() { // opt は挙動を微調整するためのオプショナルなパラメータ。不要であれば nil を渡せ // ば良い。 func Convert(src string, opt *ConvertOption) (string, error) { + // 正規化 + // !?だと「!」と「?」に分割されるが + // !?!?❗❓とかだと、1つのサ変接続として解釈されてしまう。 + // この辺の文字のばらつきで処理がばらつくのが面倒なので + // 形態素解析するまえに表記ゆれを統一してしまう。 + src = strings.ReplaceAll(src, "!", "!") + src = strings.ReplaceAll(src, "?", "?") + src = strings.ReplaceAll(src, "❗", "!") + src = strings.ReplaceAll(src, "❓", "?") + src = strings.ReplaceAll(src, "‼", "!!") + src = strings.ReplaceAll(src, "⁉", "!?") + t, err := tokenizer.New(ipa.Dict(), tokenizer.OmitBosEos()) if err != nil { return "", err @@ -84,7 +98,7 @@ func Convert(src string, opt *ConvertOption) (string, error) { } // お嬢様言葉に変換 - buf, nounKeep = convert(data, tokens, i, buf, nounKeep, opt) + buf, nounKeep, i = convert(data, tokens, i, buf, nounKeep, opt) // 形容詞、自立で文が終わった時は丁寧語ですわを追加する buf = appendPoliteWord(data, tokens, i, buf) @@ -194,7 +208,7 @@ func convertContinuousConditions(tokens []tokenizer.Token, tokenPos int, opt *Co n := tokenPos + len(mc.Conditions) - 1 result := mc.Value if mc.AppendLongNote { - result = appendLongNote(result, tokens, n, opt) + result, n = appendLongNote(result, tokens, n, opt) } return result, n, true } @@ -232,7 +246,7 @@ excludeLoop: } // convert は基本的な変換を行う。 -func convert(data tokenizer.TokenData, tokens []tokenizer.Token, i int, surface string, nounKeep bool, opt *ConvertOption) (string, bool) { +func convert(data tokenizer.TokenData, tokens []tokenizer.Token, i int, surface string, nounKeep bool, opt *ConvertOption) (string, bool, int) { var beforeToken tokenizer.TokenData var beforeTokenOK bool if 0 < i { @@ -270,10 +284,11 @@ func convert(data tokenizer.TokenData, tokens []tokenizer.Token, i int, surface } result := c.Value + pos := i // 波線伸ばしをランダムに追加する if c.AppendLongNote { - result = appendLongNote(result, tokens, i, opt) + result, pos = appendLongNote(result, tokens, i, opt) } // 手前に「お」を付ける @@ -281,13 +296,13 @@ func convert(data tokenizer.TokenData, tokens []tokenizer.Token, i int, surface result, nounKeep = appendPrefix(data, tokens, i, result, nounKeep) } - return result, nounKeep + return result, nounKeep, pos } // 手前に「お」を付ける result := surface result, nounKeep = appendPrefix(data, tokens, i, result, nounKeep) - return result, nounKeep + return result, nounKeep, i } // appendPrefix は surface の前に「お」を付ける。 @@ -361,9 +376,14 @@ func isSentenceSeparation(data tokenizer.TokenData) bool { // // 乱数が絡むと単体テストがやりづらくなるので、 opt を使うことで任意の数付与でき // るようにしている。 -func appendLongNote(src string, tokens []tokenizer.Token, i int, opt *ConvertOption) string { +func appendLongNote(src string, tokens []tokenizer.Token, i int, opt *ConvertOption) (string, int) { if len(tokens) <= i+1 { - return src + return src, i + } + + var tm *chars.TestMode + if opt != nil { + tm = opt.forceCharsTestMode } data := tokenizer.NewTokenData(tokens[i+1]) @@ -389,16 +409,42 @@ func appendLongNote(src string, tokens []tokenizer.Token, i int, opt *ConvertOpt suffix.WriteString("~") } + // !or?をどれかからランダムに選択する + feq := chars.SampleExclQuesByValue(s, tm) + // 次の token は必ず感嘆符か疑問符のどちらかであることが確定しているため // -1 して数を調整している。 for i := 0; i < e-1; i++ { - suffix.WriteString(s) + suffix.WriteString(feq.Value) + } + + // 後ろに!や?が連続する場合、それらをすべて feq と同じ種類(半角、全角、 + // 絵文字)の!や?に置き換えて返却する。 + pos := i + loop2: + for j := i + 1; j < len(tokens); j++ { + token := tokens[j] + data := tokenizer.NewTokenData(token) + for _, r := range data.Surface { + surface := string(r) + if ok, eq := chars.IsExclQuesMark(surface); !ok { + break loop2 + } else { + // e は!か?のどちらかなので、同じスタイルの文字を取得して追加 + if got := chars.FindExclQuesByStyleAndMeaning(feq.Style, eq.Meaning); got != nil { + + suffix.WriteString(got.Value) + } + } + } + // トークンの位置を制御する変数なので、forループ内では変更しない + pos = j } src += suffix.String() - break + return src, pos } - return src + return src, i } // isPoliteWord は丁寧語かどうかを判定する。 diff --git a/ojosama_test.go b/ojosama_test.go index f5ebef6..f45b902 100644 --- a/ojosama_test.go +++ b/ojosama_test.go @@ -3,6 +3,7 @@ package ojosama import ( "testing" + "github.com/jiro4989/ojosama/internal/chars" "github.com/stretchr/testify/assert" ) @@ -199,6 +200,9 @@ func TestConvert(t *testing.T) { wavyLineCount: 2, exclamationMarkCount: 3, }, + forceCharsTestMode: &chars.TestMode{ + Pos: 0, + }, }, wantErr: false, }, @@ -212,6 +216,9 @@ func TestConvert(t *testing.T) { wavyLineCount: 2, exclamationMarkCount: 3, }, + forceCharsTestMode: &chars.TestMode{ + Pos: 1, + }, }, wantErr: false, }, @@ -225,6 +232,9 @@ func TestConvert(t *testing.T) { wavyLineCount: 2, exclamationMarkCount: 3, }, + forceCharsTestMode: &chars.TestMode{ + Pos: 0, + }, }, wantErr: false, }, @@ -238,6 +248,9 @@ func TestConvert(t *testing.T) { wavyLineCount: 2, exclamationMarkCount: 3, }, + forceCharsTestMode: &chars.TestMode{ + Pos: 0, + }, }, wantErr: false, }, @@ -260,6 +273,9 @@ func TestConvert(t *testing.T) { wavyLineCount: 2, exclamationMarkCount: 3, }, + forceCharsTestMode: &chars.TestMode{ + Pos: 0, + }, }, wantErr: false, }, @@ -451,6 +467,54 @@ func TestConvert(t *testing.T) { opt: nil, wantErr: false, }, + { + desc: "正常系: すべて全角文字に変換しますわ", + src: "です!?!?❗❓", + want: "ですわ~!?!?!?", + opt: &ConvertOption{ + forceAppendLongNote: forceAppendLongNote{ + enable: true, + wavyLineCount: 1, + exclamationMarkCount: 1, + }, + forceCharsTestMode: &chars.TestMode{ + Pos: 0, + }, + }, + wantErr: false, + }, + { + desc: "正常系: すべて絵文字に変換しますわ", + src: "です!?!?❗❓", + want: "ですわ~❗❓❗❓❗❓", + opt: &ConvertOption{ + forceAppendLongNote: forceAppendLongNote{ + enable: true, + wavyLineCount: 1, + exclamationMarkCount: 1, + }, + forceCharsTestMode: &chars.TestMode{ + Pos: 2, + }, + }, + wantErr: false, + }, + { + desc: "正常系: 絵文字を連続して付与もできますわ", + src: "です!寿司", + want: "ですわ~~❗❗❗お寿司", + opt: &ConvertOption{ + forceAppendLongNote: forceAppendLongNote{ + enable: true, + wavyLineCount: 2, + exclamationMarkCount: 3, + }, + forceCharsTestMode: &chars.TestMode{ + Pos: 2, + }, + }, + wantErr: false, + }, } for _, tt := range tests {