✨「～ですわ」等の言い回しで終了する時に「。」をランダムに「！」「❗」等に変換する処理を追加いたしましたわ！ #15 (#63)

* add DisableKutenToExclamation * 形容詞＋自立で終わる場合に。を！に変換 * Add comment * Add convert rule
jiro4989 · Jul 6, 2022 · afe947b · afe947b
1 parent 63fa12c
commit afe947b
Show file tree

Hide file tree

Showing 3 changed files with 183 additions and 76 deletions.
diff --git a/convert_rule.go b/convert_rule.go
@@ -14,7 +14,8 @@ type convertRule struct {
 	EnableWhenSentenceSeparation bool              // 文の区切り（単語の後に句点か読点がくる、あるいは何もない）場合だけ有効にする
 	AppendLongNote               bool              // 波線を追加する
 	DisablePrefix                bool              // 「お」を手前に付与しない
-	Value                        string            // この文字列に置換する
+	EnableKutenToExclamation     bool
+	Value                        string // この文字列に置換する
 }
 
 /*
@@ -78,9 +79,10 @@ func (c convertRule) disablePrefix(v bool) convertRule {
 
 // continuousConditionsConvertRule は連続する条件がすべてマッチしたときに変換するルール。
 type continuousConditionsConvertRule struct {
-	Conditions     convertConditions
-	AppendLongNote bool
-	Value          string
+	Conditions               convertConditions
+	AppendLongNote           bool
+	EnableKutenToExclamation bool
+	Value                    string
 }
 
 // sentenceEndingParticleConvertRule は「名詞」＋「動詞」＋「終助詞」の組み合わせによる変換ルール。
@@ -186,6 +188,7 @@ var (
 				newCond([]string{"動詞", "自立"}, "し"),
 				newCond([]string{"助動詞"}, "ます"),
 			},
+			EnableKutenToExclamation: true,
 		},
 
 		{
@@ -194,6 +197,7 @@ var (
 				newCond([]string{"助動詞"}, "だ"),
 				newCond([]string{"助詞", "接続助詞"}, "から"),
 			},
+			EnableKutenToExclamation: true,
 		},
 
 		{
@@ -203,6 +207,7 @@ var (
 				newCond([]string{"名詞", "非自立", "一般"}, "ん"),
 				newCond([]string{"助動詞"}, "だ"),
 			},
+			EnableKutenToExclamation: true,
 		},
 
 		{
@@ -211,6 +216,7 @@ var (
 				newCond([]string{"助動詞"}, "だ"),
 				newCond([]string{"助詞", "終助詞"}, "よ"),
 			},
+			EnableKutenToExclamation: true,
 		},
 
 		{
@@ -219,20 +225,23 @@ var (
 				newCond(posPronounGeneral, "なん"),
 				newCond(posSubPostpositionalParticle, "じゃ"),
 			},
+			EnableKutenToExclamation: true,
 		},
 		{
 			Value: "なんですの",
 			Conditions: convertConditions{
 				newCond(posPronounGeneral, "なん"),
 				newCond(posAuxiliaryVerb, "だ"),
 			},
+			EnableKutenToExclamation: true,
 		},
 		{
 			Value: "なんですの",
 			Conditions: convertConditions{
 				newCond(posPronounGeneral, "なん"),
 				newCond(posAssistantParallelParticle, "や"),
 			},
+			EnableKutenToExclamation: true,
 		},
 
 		{
@@ -241,20 +250,23 @@ var (
 				condNounsGeneral,
 				newCond(posAuxiliaryVerb, "じゃ"),
 			},
+			EnableKutenToExclamation: true,
 		},
 		{
 			Value: "@1ですの",
 			Conditions: convertConditions{
 				condNounsGeneral,
 				newCond(posAuxiliaryVerb, "だ"),
 			},
+			EnableKutenToExclamation: true,
 		},
 		{
 			Value: "@1ですの",
 			Conditions: convertConditions{
 				condNounsGeneral,
 				newCond(posAuxiliaryVerb, "や"),
 			},
+			EnableKutenToExclamation: true,
 		},
 
 		{
@@ -263,20 +275,23 @@ var (
 				condPronounsGeneral,
 				newCond(posAuxiliaryVerb, "じゃ"),
 			},
+			EnableKutenToExclamation: true,
 		},
 		{
 			Value: "@1ですの",
 			Conditions: convertConditions{
 				condPronounsGeneral,
 				newCond(posAuxiliaryVerb, "だ"),
 			},
+			EnableKutenToExclamation: true,
 		},
 		{
 			Value: "@1ですの",
 			Conditions: convertConditions{
 				condPronounsGeneral,
 				newCond(posAuxiliaryVerb, "や"),
 			},
+			EnableKutenToExclamation: true,
 		},
 	}
 
@@ -375,8 +390,9 @@ var (
 			AfterIgnoreConditions: convertConditions{
 				{Features: posSubParEndParticle},
 			},
-			AppendLongNote: true,
-			Value:          "ですわ",
+			AppendLongNote:           true,
+			EnableKutenToExclamation: true,
+			Value:                    "ですわ",
 		},
 		{
 			Conditions: convertConditions{
@@ -385,15 +401,17 @@ var (
 			AfterIgnoreConditions: convertConditions{
 				{Features: posSubParEndParticle},
 			},
-			AppendLongNote: true,
-			Value:          "ですわ",
+			AppendLongNote:           true,
+			EnableKutenToExclamation: true,
+			Value:                    "ですわ",
 		},
 		{
 			Conditions: convertConditions{
 				newCond(posVerbIndependence, "する"),
 			},
 			EnableWhenSentenceSeparation: true,
 			AppendLongNote:               true,
+			EnableKutenToExclamation:     true,
 			Value:                        "いたしますわ",
 		},
 		{
@@ -402,6 +420,7 @@ var (
 			},
 			EnableWhenSentenceSeparation: true,
 			AppendLongNote:               true,
+			EnableKutenToExclamation:     true,
 			Value:                        "なりますわ",
 		},
 		{
@@ -426,8 +445,9 @@ var (
 			Conditions: convertConditions{
 				newCond(posSentenceEndingParticle, "わ"),
 			},
-			AppendLongNote: true,
-			Value:          "ですわ",
+			AppendLongNote:           true,
+			EnableKutenToExclamation: true,
+			Value:                    "ですわ",
 		},
 		{
 			Conditions: convertConditions{
@@ -472,15 +492,17 @@ var (
 			Conditions: convertConditions{
 				newCond(posAuxiliaryVerb, "ます"),
 			},
-			AppendLongNote: true,
-			Value:          "ますわ",
+			AppendLongNote:           true,
+			EnableKutenToExclamation: true,
+			Value:                    "ますわ",
 		},
 		{
 			Conditions: convertConditions{
 				newCond(posAuxiliaryVerb, "た"),
 			},
 			EnableWhenSentenceSeparation: true,
 			AppendLongNote:               true,
+			EnableKutenToExclamation:     true,
 			Value:                        "たわ",
 		},
 		{
@@ -502,13 +524,15 @@ var (
 			Conditions: convertConditions{
 				newCond(posVerbNotIndependence, "ください"),
 			},
-			Value: "くださいまし",
+			EnableKutenToExclamation: true,
+			Value:                    "くださいまし",
 		},
 		{
 			Conditions: convertConditions{
 				newCond(posVerbNotIndependence, "くれ"),
 			},
-			Value: "くださいまし",
+			EnableKutenToExclamation: true,
+			Value:                    "くださいまし",
 		},
 		newRuleInterjection("ありがとう", "ありがとうございますわ"),
 		newRuleInterjection("じゃぁ", "それでは"),

diff --git a/ojosama.go b/ojosama.go
@@ -13,8 +13,14 @@ import (
 
 // ConvertOption はお嬢様変換時のオプショナルな設定。
 type ConvertOption struct {
-	forceAppendLongNote forceAppendLongNote // 単体テスト用のパラメータ
-	forceCharsTestMode  *chars.TestMode     // 単体テスト用のパラメータ
+	// 句点を！に変換する機能をOFFにする。句点を！に変換してしまうと変換元の文章
+	// のニュアンスを破壊する可能性があるため、オプションパラメータで無効にでき
+	// るようにする。
+	DisableKutenToExclamation bool
+
+	forceAppendLongNote     forceAppendLongNote // 単体テスト用のパラメータ
+	forceCharsTestMode      *chars.TestMode     // 単体テスト用のパラメータ
+	forceKutenToExclamation bool                // KutenToExclamationで強制的に3番目の要素を選択する
 }
 
 // forceAppendLongNote は強制的に波線や感嘆符や疑問符を任意の数追加するための設定。
@@ -35,6 +41,11 @@ const (
 
 var (
 	alnumRegexp = regexp.MustCompile(`^[a-zA-Z0-9]+$`)
+
+	featKuten = []string{"記号", "句点"} // 。
+	featToten = []string{"記号", "読点"} // 、
+
+	shuffleElementsKutenToExclamation = []string{"。", "。", "！", "❗"}
 )
 
 func init() {
@@ -90,11 +101,20 @@ func Convert(src string, opt *ConvertOption) (string, error) {
 		}
 
 		// お嬢様言葉に変換
-		buf, nounKeep, i = convert(data, tokens, i, buf, nounKeep, opt)
+		var kutenToEx bool
+		buf, nounKeep, i, kutenToEx = convert(data, tokens, i, buf, nounKeep, opt)
 
 		// 形容詞、自立で文が終わった時は丁寧語ですわを追加する
 		if isAppendablePoliteWord(data, tokens, i) {
 			buf += politeWord
+			kutenToEx = true
+		}
+
+		if kutenToEx {
+			if ok, s, pos := randomKutenToExclamation(tokens, i, opt); ok {
+				buf += s
+				i = pos
+			}
 		}
 
 		result.WriteString(buf)
@@ -207,12 +227,21 @@ func convertContinuousConditions(tokens []tokenizer.Token, tokenPos int, opt *Co
 			surface = "お" + surface
 		}
 		result = strings.ReplaceAll(result, "@1", surface)
+
+		// 句点と～が同時に発生することは無いので早期リターンで良い
+		if ok, s, pos := randomKutenToExclamation(tokens, n, opt); ok {
+			result += s
+			n = pos
+			return result, n, true
+		}
+
 		if mc.AppendLongNote {
 			if note, pos := newLongNote(tokens, n, opt); note != "" {
 				result += note
 				n = pos
 			}
 		}
+
 		return result, n, true
 	}
 	return "", -1, false
@@ -249,13 +278,13 @@ excludeLoop:
 }
 
 // convert は基本的な変換を行う。
-func convert(data tokenizer.TokenData, tokens []tokenizer.Token, i int, surface string, nounKeep bool, opt *ConvertOption) (string, bool, int) {
+func convert(data tokenizer.TokenData, tokens []tokenizer.Token, i int, surface string, nounKeep bool, opt *ConvertOption) (string, bool, int, bool) {
 	var ok bool
 	var c convertRule
 	if ok, c = matchConvertRule(data, tokens, i); !ok {
 		result := surface
 		result, nounKeep = appendPrefix(data, tokens, i, result, nounKeep)
-		return result, nounKeep, i
+		return result, nounKeep, i, false
 	}
 
 	result := c.Value
@@ -274,7 +303,7 @@ func convert(data tokenizer.TokenData, tokens []tokenizer.Token, i int, surface
 		result, nounKeep = appendPrefix(data, tokens, i, result, nounKeep)
 	}
 
-	return result, nounKeep, pos
+	return result, nounKeep, pos, c.EnableKutenToExclamation
 }
 
 func matchConvertRule(data tokenizer.TokenData, tokens []tokenizer.Token, i int) (bool, convertRule) {
@@ -390,7 +419,7 @@ func isAppendablePoliteWord(data tokenizer.TokenData, tokens []tokenizer.Token,
 
 // isSentenceSeparation は data が文の区切りに使われる token かどうかを判定する。
 func isSentenceSeparation(data tokenizer.TokenData) bool {
-	return containsFeatures([][]string{{"記号", "句点"}, {"記号", "読点"}}, data.Features) ||
+	return containsFeatures([][]string{featKuten, featToten}, data.Features) ||
 		containsString([]string{"！", "!", "？", "?"}, data.Surface)
 }
 
@@ -488,3 +517,35 @@ func getContinuousExclamationMark(tokens []tokenizer.Token, i int, feq *chars.Ex
 func isPoliteWord(data tokenizer.TokenData) bool {
 	return strings.HasPrefix(data.Reading, "オ")
 }
+
+// randomKutenToExclamation はランダムで句点を！に変換する。
+func randomKutenToExclamation(tokens []tokenizer.Token, tokenPos int, opt *ConvertOption) (bool, string, int) {
+	if opt != nil && opt.DisableKutenToExclamation {
+		return false, "", tokenPos
+	}
+
+	pos := tokenPos + 1
+	if len(tokens) <= pos {
+		return false, "", tokenPos
+	}
+
+	data := tokenizer.NewTokenData(tokens[pos])
+	if !isKuten(data) {
+		return false, "", tokenPos
+	}
+
+	// テスト用に値をすげ替えられるようにする
+	var s []string
+	if opt != nil && opt.forceKutenToExclamation {
+		s = []string{"❗", "❗"}
+	} else {
+		s = shuffleElementsKutenToExclamation
+	}
+
+	rand.Shuffle(len(s), func(i, j int) { s[i], s[j] = s[j], s[i] })
+	return true, s[0], pos
+}
+
+func isKuten(data tokenizer.TokenData) bool {
+	return equalsFeatures(data.Features, featKuten) && data.Surface == "。"
+}