Skip to content

Commit

Permalink
✨「~ですわ」等の言い回しで終了する時に「。」をランダムに「!」「❗」等に変換する処理を追加いたしましたわ! #15 (#63)
Browse files Browse the repository at this point in the history
* add DisableKutenToExclamation

* 形容詞+自立で終わる場合に。を!に変換

* Add comment

* Add convert rule
  • Loading branch information
jiro4989 committed Jul 6, 2022
1 parent 63fa12c commit afe947b
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 76 deletions.
52 changes: 38 additions & 14 deletions convert_rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ type convertRule struct {
EnableWhenSentenceSeparation bool // 文の区切り(単語の後に句点か読点がくる、あるいは何もない)場合だけ有効にする
AppendLongNote bool // 波線を追加する
DisablePrefix bool // 「お」を手前に付与しない
Value string // この文字列に置換する
EnableKutenToExclamation bool
Value string // この文字列に置換する
}

/*
Expand Down Expand Up @@ -78,9 +79,10 @@ func (c convertRule) disablePrefix(v bool) convertRule {

// continuousConditionsConvertRule は連続する条件がすべてマッチしたときに変換するルール。
type continuousConditionsConvertRule struct {
Conditions convertConditions
AppendLongNote bool
Value string
Conditions convertConditions
AppendLongNote bool
EnableKutenToExclamation bool
Value string
}

// sentenceEndingParticleConvertRule は「名詞」+「動詞」+「終助詞」の組み合わせによる変換ルール。
Expand Down Expand Up @@ -186,6 +188,7 @@ var (
newCond([]string{"動詞", "自立"}, "し"),
newCond([]string{"助動詞"}, "ます"),
},
EnableKutenToExclamation: true,
},

{
Expand All @@ -194,6 +197,7 @@ var (
newCond([]string{"助動詞"}, "だ"),
newCond([]string{"助詞", "接続助詞"}, "から"),
},
EnableKutenToExclamation: true,
},

{
Expand All @@ -203,6 +207,7 @@ var (
newCond([]string{"名詞", "非自立", "一般"}, "ん"),
newCond([]string{"助動詞"}, "だ"),
},
EnableKutenToExclamation: true,
},

{
Expand All @@ -211,6 +216,7 @@ var (
newCond([]string{"助動詞"}, "だ"),
newCond([]string{"助詞", "終助詞"}, "よ"),
},
EnableKutenToExclamation: true,
},

{
Expand All @@ -219,20 +225,23 @@ var (
newCond(posPronounGeneral, "なん"),
newCond(posSubPostpositionalParticle, "じゃ"),
},
EnableKutenToExclamation: true,
},
{
Value: "なんですの",
Conditions: convertConditions{
newCond(posPronounGeneral, "なん"),
newCond(posAuxiliaryVerb, "だ"),
},
EnableKutenToExclamation: true,
},
{
Value: "なんですの",
Conditions: convertConditions{
newCond(posPronounGeneral, "なん"),
newCond(posAssistantParallelParticle, "や"),
},
EnableKutenToExclamation: true,
},

{
Expand All @@ -241,20 +250,23 @@ var (
condNounsGeneral,
newCond(posAuxiliaryVerb, "じゃ"),
},
EnableKutenToExclamation: true,
},
{
Value: "@1ですの",
Conditions: convertConditions{
condNounsGeneral,
newCond(posAuxiliaryVerb, "だ"),
},
EnableKutenToExclamation: true,
},
{
Value: "@1ですの",
Conditions: convertConditions{
condNounsGeneral,
newCond(posAuxiliaryVerb, "や"),
},
EnableKutenToExclamation: true,
},

{
Expand All @@ -263,20 +275,23 @@ var (
condPronounsGeneral,
newCond(posAuxiliaryVerb, "じゃ"),
},
EnableKutenToExclamation: true,
},
{
Value: "@1ですの",
Conditions: convertConditions{
condPronounsGeneral,
newCond(posAuxiliaryVerb, "だ"),
},
EnableKutenToExclamation: true,
},
{
Value: "@1ですの",
Conditions: convertConditions{
condPronounsGeneral,
newCond(posAuxiliaryVerb, "や"),
},
EnableKutenToExclamation: true,
},
}

Expand Down Expand Up @@ -375,8 +390,9 @@ var (
AfterIgnoreConditions: convertConditions{
{Features: posSubParEndParticle},
},
AppendLongNote: true,
Value: "ですわ",
AppendLongNote: true,
EnableKutenToExclamation: true,
Value: "ですわ",
},
{
Conditions: convertConditions{
Expand All @@ -385,15 +401,17 @@ var (
AfterIgnoreConditions: convertConditions{
{Features: posSubParEndParticle},
},
AppendLongNote: true,
Value: "ですわ",
AppendLongNote: true,
EnableKutenToExclamation: true,
Value: "ですわ",
},
{
Conditions: convertConditions{
newCond(posVerbIndependence, "する"),
},
EnableWhenSentenceSeparation: true,
AppendLongNote: true,
EnableKutenToExclamation: true,
Value: "いたしますわ",
},
{
Expand All @@ -402,6 +420,7 @@ var (
},
EnableWhenSentenceSeparation: true,
AppendLongNote: true,
EnableKutenToExclamation: true,
Value: "なりますわ",
},
{
Expand All @@ -426,8 +445,9 @@ var (
Conditions: convertConditions{
newCond(posSentenceEndingParticle, "わ"),
},
AppendLongNote: true,
Value: "ですわ",
AppendLongNote: true,
EnableKutenToExclamation: true,
Value: "ですわ",
},
{
Conditions: convertConditions{
Expand Down Expand Up @@ -472,15 +492,17 @@ var (
Conditions: convertConditions{
newCond(posAuxiliaryVerb, "ます"),
},
AppendLongNote: true,
Value: "ますわ",
AppendLongNote: true,
EnableKutenToExclamation: true,
Value: "ますわ",
},
{
Conditions: convertConditions{
newCond(posAuxiliaryVerb, "た"),
},
EnableWhenSentenceSeparation: true,
AppendLongNote: true,
EnableKutenToExclamation: true,
Value: "たわ",
},
{
Expand All @@ -502,13 +524,15 @@ var (
Conditions: convertConditions{
newCond(posVerbNotIndependence, "ください"),
},
Value: "くださいまし",
EnableKutenToExclamation: true,
Value: "くださいまし",
},
{
Conditions: convertConditions{
newCond(posVerbNotIndependence, "くれ"),
},
Value: "くださいまし",
EnableKutenToExclamation: true,
Value: "くださいまし",
},
newRuleInterjection("ありがとう", "ありがとうございますわ"),
newRuleInterjection("じゃぁ", "それでは"),
Expand Down
75 changes: 68 additions & 7 deletions ojosama.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,14 @@ import (

// ConvertOption はお嬢様変換時のオプショナルな設定。
type ConvertOption struct {
forceAppendLongNote forceAppendLongNote // 単体テスト用のパラメータ
forceCharsTestMode *chars.TestMode // 単体テスト用のパラメータ
// 句点を!に変換する機能をOFFにする。句点を!に変換してしまうと変換元の文章
// のニュアンスを破壊する可能性があるため、オプションパラメータで無効にでき
// るようにする。
DisableKutenToExclamation bool

forceAppendLongNote forceAppendLongNote // 単体テスト用のパラメータ
forceCharsTestMode *chars.TestMode // 単体テスト用のパラメータ
forceKutenToExclamation bool // KutenToExclamationで強制的に3番目の要素を選択する
}

// forceAppendLongNote は強制的に波線や感嘆符や疑問符を任意の数追加するための設定。
Expand All @@ -35,6 +41,11 @@ const (

var (
alnumRegexp = regexp.MustCompile(`^[a-zA-Z0-9]+$`)

featKuten = []string{"記号", "句点"} // 。
featToten = []string{"記号", "読点"} // 、

shuffleElementsKutenToExclamation = []string{"。", "。", "!", "❗"}
)

func init() {
Expand Down Expand Up @@ -90,11 +101,20 @@ func Convert(src string, opt *ConvertOption) (string, error) {
}

// お嬢様言葉に変換
buf, nounKeep, i = convert(data, tokens, i, buf, nounKeep, opt)
var kutenToEx bool
buf, nounKeep, i, kutenToEx = convert(data, tokens, i, buf, nounKeep, opt)

// 形容詞、自立で文が終わった時は丁寧語ですわを追加する
if isAppendablePoliteWord(data, tokens, i) {
buf += politeWord
kutenToEx = true
}

if kutenToEx {
if ok, s, pos := randomKutenToExclamation(tokens, i, opt); ok {
buf += s
i = pos
}
}

result.WriteString(buf)
Expand Down Expand Up @@ -207,12 +227,21 @@ func convertContinuousConditions(tokens []tokenizer.Token, tokenPos int, opt *Co
surface = "お" + surface
}
result = strings.ReplaceAll(result, "@1", surface)

// 句点と~が同時に発生することは無いので早期リターンで良い
if ok, s, pos := randomKutenToExclamation(tokens, n, opt); ok {
result += s
n = pos
return result, n, true
}

if mc.AppendLongNote {
if note, pos := newLongNote(tokens, n, opt); note != "" {
result += note
n = pos
}
}

return result, n, true
}
return "", -1, false
Expand Down Expand Up @@ -249,13 +278,13 @@ excludeLoop:
}

// convert は基本的な変換を行う。
func convert(data tokenizer.TokenData, tokens []tokenizer.Token, i int, surface string, nounKeep bool, opt *ConvertOption) (string, bool, int) {
func convert(data tokenizer.TokenData, tokens []tokenizer.Token, i int, surface string, nounKeep bool, opt *ConvertOption) (string, bool, int, bool) {
var ok bool
var c convertRule
if ok, c = matchConvertRule(data, tokens, i); !ok {
result := surface
result, nounKeep = appendPrefix(data, tokens, i, result, nounKeep)
return result, nounKeep, i
return result, nounKeep, i, false
}

result := c.Value
Expand All @@ -274,7 +303,7 @@ func convert(data tokenizer.TokenData, tokens []tokenizer.Token, i int, surface
result, nounKeep = appendPrefix(data, tokens, i, result, nounKeep)
}

return result, nounKeep, pos
return result, nounKeep, pos, c.EnableKutenToExclamation
}

func matchConvertRule(data tokenizer.TokenData, tokens []tokenizer.Token, i int) (bool, convertRule) {
Expand Down Expand Up @@ -390,7 +419,7 @@ func isAppendablePoliteWord(data tokenizer.TokenData, tokens []tokenizer.Token,

// isSentenceSeparation は data が文の区切りに使われる token かどうかを判定する。
func isSentenceSeparation(data tokenizer.TokenData) bool {
return containsFeatures([][]string{{"記号", "句点"}, {"記号", "読点"}}, data.Features) ||
return containsFeatures([][]string{featKuten, featToten}, data.Features) ||
containsString([]string{"!", "!", "?", "?"}, data.Surface)
}

Expand Down Expand Up @@ -488,3 +517,35 @@ func getContinuousExclamationMark(tokens []tokenizer.Token, i int, feq *chars.Ex
func isPoliteWord(data tokenizer.TokenData) bool {
return strings.HasPrefix(data.Reading, "オ")
}

// randomKutenToExclamation はランダムで句点を!に変換する。
func randomKutenToExclamation(tokens []tokenizer.Token, tokenPos int, opt *ConvertOption) (bool, string, int) {
if opt != nil && opt.DisableKutenToExclamation {
return false, "", tokenPos
}

pos := tokenPos + 1
if len(tokens) <= pos {
return false, "", tokenPos
}

data := tokenizer.NewTokenData(tokens[pos])
if !isKuten(data) {
return false, "", tokenPos
}

// テスト用に値をすげ替えられるようにする
var s []string
if opt != nil && opt.forceKutenToExclamation {
s = []string{"❗", "❗"}
} else {
s = shuffleElementsKutenToExclamation
}

rand.Shuffle(len(s), func(i, j int) { s[i], s[j] = s[j], s[i] })
return true, s[0], pos
}

func isKuten(data tokenizer.TokenData) bool {
return equalsFeatures(data.Features, featKuten) && data.Surface == "。"
}
Loading

0 comments on commit afe947b

Please sign in to comment.