Merge remote-tracking branch 'origin/master' into pettingzoo

LPSim · Jul 7, 2024 · cf35d28 · cf35d28
2 parents 5482e76 + dfaad9c
commit cf35d28
Show file tree

Hide file tree

Showing 98 changed files with 700 additions and 217 deletions.
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -6,15 +6,15 @@ on:
       - "src/**"
       - "tests/**"
       - "**/*.py"
-    paths-ignore: 
-      - "src/env/**"
+      - "!src/lpsim/env/**"
+      - "!src/lpsim/network/**"
   pull_request:
     paths:
       - "src/**"
       - "tests/**"
       - "**/*.py"
-    paths-ignore: 
-      - "src/env/**"
+      - "!src/lpsim/env/**"
+      - "!src/lpsim/network/**"
   workflow_dispatch:
   workflow_call:
 

diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -6,20 +6,21 @@
     "configurations": [
         {
             "name": "CurrentFile",
-            "type": "python",
+            "type": "debugpy",
             "request": "launch",
             "program": "${file}",
             "console": "integratedTerminal",
             "cwd": "${workspaceFolder}",
             "env": {
                 // NEED TO INSTALL Command Variable plugin!
                 "PYTHONPATH": "${workspaceFolder}${command:extension.commandvariable.envListSep}${workspaceFolder}/src",
+                "LPSIM_DEBUG_LEVEL": "DEBUG",
             },
             "justMyCode": true
         },
         {
             "name": "Uvicorn",
-            "type": "python",
+            "type": "debugpy",
             "request": "launch",
             "program": "http_test_serve.py",
             "args": [
@@ -32,30 +33,5 @@
             },
             "justMyCode": true
         },
-        // {
-        //     "name": "Uvicorn",
-        //     "type": "python",
-        //     "request": "launch",
-        //     "module": "uvicorn",
-        //     "args": [
-        //         "network:app",
-        //         "--reload"
-        //     ],
-        //     "console": "integratedTerminal",
-        //     "cwd": "${workspaceFolder}",
-        //     "env": {
-        //         "PYTHONPATH": "${workspaceFolder}"
-        //     },
-        //     "justMyCode": true
-        // },
-        // {
-        //     "name": "Main",
-        //     "type": "python",
-        //     "request": "launch",
-        //     "program": "main.py",
-        //     "console": "integratedTerminal",
-        //     "cwd": "${workspaceFolder}",
-        //     "justMyCode": true
-        // },
     ]
 }
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,5 +1,10 @@
 {
     "[python]": {
         "editor.defaultFormatter": "charliermarsh.ruff"
-    }
+    },
+    "python.testing.pytestArgs": [
+        "tests"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
 }
diff --git a/README.en.md b/README.en.md
@@ -27,7 +27,13 @@ related to miHoYo products.
 ## Progress
 
 :sparkles: 4.5 characters and cards, as well as balance changes are 
-implemented. :sparkles:
+implemented.
+
+:hammer: 4.6 characters and cards, as well as balance changes are
+being implemented, including strategies related to deck order.
+
+:construction: AI training support is being implemented, refer to AI training
+support section.
 
 ## Feature
 
@@ -38,6 +44,20 @@ implemented. :sparkles:
 - Consistent when load from a state and continue running.
 - 100% Coverage of codes.
 
+
+## :construction: WIP: AI Training Support
+
+Using LPSim as a simulator for AI training has the following advantages:
+1. Since all card versions are implemented internally, updates to the simulator and official balance changes will have little impact on previously trained AI strategies. Old card versions and strategies can be used at any time to play against new card versions and strategies (if the old strategy does not fail due to the opponent using new cards).
+2. The code is implemented in Python, making it easy to integrate into common deep learning frameworks such as PyTorch and TensorFlow.
+3. All simulation processes are implemented by class functions of the Match class, without multi-threading or network communication, making it easy to implement environment parallelization.
+4. Based on Pydantic, Match can import and export from any time without changing the subsequent simulation results, making it easy to reproduce and debug.
+5. Some basic agents have been implemented, which can be used to quickly build a single-agent environment; at the same time, interactive agents have been designed to facilitate training and playing against them after training.
+
+Currently, the environment definition based on `gymnasium` and `pettingzoo` has been developed, including `gymnasium.Env`, `pettingzoo.AECEnv` basic environments. Since how to accurately represent the state of Genius Invokation as an array is a difficult problem, the basic environment has not yet implemented the encoding representation of the state, and can be inherited from the environment for modification or use `Wrapper` to modify the observation and action space.
+
+In the `src/lpsim/env` folder of the [pettingzoo](https://github.com/LPSim/backend/tree/pettingzoo) branch, the above environment implementation is included, as well as simple test code under the [Tianshou]([https://](https://github.com/thu-ml/tianshou)) multi-agent framework. The environment has not been thoroughly tested at present, and currently exists as an independent branch with the possibility of interface changes. AI-related code is currently only tested in Python 3.10, and its availability in other python versions is not guaranteed.
+
 ## Usage
 
 This project works with Python 3.10 or newer.

diff --git a/README.md b/README.md
@@ -18,7 +18,11 @@
 
 ## 项目进度
 
-:sparkles: 4.5版本角色和卡牌，以及平衡性调整已实现。 :sparkles:
+:sparkles: 4.5版本角色和卡牌，以及平衡性调整已实现。
+
+:hammer: 4.6版本角色和卡牌，以及平衡性调整实现中，包含牌组顺序相关策略更新。
+
+:construction: AI训练支持实现中，参考AI训练支持小节。
 
 ## 特性
 
@@ -29,6 +33,19 @@
 - 可以从任意状态加载并继续运行，保持结果不变。
 - 100%代码覆盖率。
 
+## :construction: WIP: AI训练支持
+
+使用水皇模拟器LPSim用作AI训练具有如下优势：
+1. 由于内部实现了所有卡牌版本，模拟器更新和官方平衡性改动几乎不会影响到之前训练好的AI策略，随时可以使用旧版本卡牌和策略与新版本卡牌策略进行对局和训练（如果旧版策略不会因为对手用新卡牌而无法工作的话）。
+2. 代码使用Python实现，便于集成至常见深度学习框架如PyTorch、TensorFlow中。
+3. 所有模拟过程由Match类的类函数实现，不包含多线程或网络通信，便于实现环境并行化。
+4. 基于Pydantic，Match可以从任意时刻导入导出且不改变接下来的模拟结果，便于复现和调试纠错。
+5. 已实现部分基本Agent，可以作为对手快速搭建单智能体环境；同时设计拥有交互式Agent，便于训练完Agent以后与与其对战。
+
+目前已基于`gymnasium`和`pettingzoo`的环境定义，开发了`gymnasium.Env`, `pettingzoo.AECEnv`基本环境。由于如何准确将七圣状态表示为数组本身就是一个难题，基本环境暂未对状态的编码表示进行实现，可以继承该环境进行修改或是`Wrapper`来对观测和动作空间进行修改。
+
+在[pettingzoo](https://github.com/LPSim/backend/tree/pettingzoo)分支的`src/lpsim/env`文件夹中包含了上述环境实现，以及在[Tianshou](https://github.com/thu-ml/tianshou)多智能体框架下的简单测试代码。目前环境还未经过详细测试，暂时以独立分支的方式存在且具有接口改动可能。AI相关代码目前仅在Python3.10测试，不保证其它版本可用性。
+
 ## 使用方法
 
 该项目需要Python 3.10或更新版本。

diff --git a/frontend b/frontend
diff --git a/pytest.ini b/pytest.ini
@@ -9,3 +9,6 @@ addopts = --cov --durations=30 -n auto --dist worksteal --import-mode=importlib
 
 # Our pytest units are located in the ./test/ directory.
 testpaths = tests
+
+markers =
+    slowtest: marks tests as slow (deselect with '-m "not slow"')
diff --git a/src/lpsim/server/action.py b/src/lpsim/server/action.py
@@ -16,6 +16,7 @@ class ActionTypes(str, Enum):
     EMPTY = "EMPTY"
     DRAW_CARD = "DRAW_CARD"
     RESTORE_CARD = "RESTORE_CARD"
+    SWITCH_CARD = "SWITCH_CARD"
     REMOVE_CARD = "REMOVE_CARD"
     CHOOSE_CHARACTER = "CHOOSE_CHARACTER"
     CREATE_DICE = "CREATE_DICE"
@@ -506,6 +507,21 @@ class GenerateSwitchCardRequestAction(ActionBase):
     player_idx: int
 
 
+class SwitchCardAction(ActionBase):
+    """
+    Action for switching card. Specified cards will be put back to deck, and same
+    number of cards draw from deck.
+    Now restore card will randomly put back to deck, and draw same number of cards from
+    top to bottom. Cards with same name put back into deck will be skipped, until card
+    number is not enough, they will be drawn from top to bottom.
+    """
+
+    type: Literal[ActionTypes.SWITCH_CARD] = ActionTypes.SWITCH_CARD
+    record_level: int = 10
+    player_idx: int
+    restore_card_idxs: List[int]
+
+
 Actions = (
     ActionBase
     | DrawCardAction
@@ -539,4 +555,5 @@ class GenerateSwitchCardRequestAction(ActionBase):
     # 25
     | CharacterReviveAction
     | GenerateSwitchCardRequestAction
+    | SwitchCardAction
 )
diff --git a/src/lpsim/server/card/equipment/artifact/element_artifacts.py b/src/lpsim/server/card/equipment/artifact/element_artifacts.py
@@ -125,17 +125,17 @@ def value_modifier_INITIAL_DICE_COLOR(
 
 
 class SmallElementalArtifact_3_3(SmallElementalArtifact_4_0):
-    version: Literal["3.3"]
+    version: Literal["3.3"] = "3.3"
     cost: Cost = Cost(same_dice_number=2)
 
 
 class BigElementalArtifact_3_6(BigElementalArtifact_4_0):
-    version: Literal["3.6"]
+    version: Literal["3.6"] = "3.6"
     cost: Cost = Cost(any_dice_number=3)
 
 
 class BigElementalArtifact_3_3(BigElementalArtifact_4_0):
-    version: Literal["3.3"]
+    version: Literal["3.3"] = "3.3"
     cost: Cost = Cost(same_dice_number=3)
 
 

diff --git a/src/lpsim/server/card/equipment/artifact/emblem_of_severed_fate.py b/src/lpsim/server/card/equipment/artifact/emblem_of_severed_fate.py
@@ -42,7 +42,7 @@ def event_handler_SKILL_END(
 
 
 class OrnateKabuto_3_5(OrnateKabuto_4_0):
-    version: Literal["3.5"]
+    version: Literal["3.5"] = "3.5"
     cost: Cost = Cost(any_dice_number=2)
 
 
@@ -74,12 +74,12 @@ def value_modifier_DAMAGE_INCREASE(
 
 
 class EmblemOfSeveredFate_4_0(EmblemOfSeveredFate_4_1):
-    version: Literal["4.0"]
+    version: Literal["4.0"] = "4.0"
     max_usage_per_round: int = 999
 
 
 class EmblemOfSeveredFate_3_7(EmblemOfSeveredFate_4_0):
-    version: Literal["3.7"]
+    version: Literal["3.7"] = "3.7"
     cost: Cost = Cost(any_dice_number=3)
 
 

diff --git a/src/lpsim/server/card/equipment/artifact/gamblers.py b/src/lpsim/server/card/equipment/artifact/gamblers.py
@@ -58,7 +58,7 @@ def event_handler_CHARACTER_DEFEATED(
 
 
 class GamblersEarrings_3_3(GamblersEarrings_3_8):
-    version: Literal["3.3"]
+    version: Literal["3.3"] = "3.3"
     usage: int = 999
 
     def equip(self, match: Any) -> List[Actions]:

diff --git a/src/lpsim/server/card/equipment/artifact/vermillion_shimenawa.py b/src/lpsim/server/card/equipment/artifact/vermillion_shimenawa.py
@@ -135,22 +135,22 @@ def value_modifier_DAMAGE_INCREASE(
 
 
 class ThunderingPoise_3_7(ThunderingPoise_4_0):
-    version: Literal["3.7"]
+    version: Literal["3.7"] = "3.7"
     cost: Cost = Cost(same_dice_number=2)
 
 
 class VermillionHereafter_3_7(VermillionHereafter_4_0):
-    version: Literal["3.7"]
+    version: Literal["3.7"] = "3.7"
     cost: Cost = Cost(same_dice_number=3)
 
 
 class CapriciousVisage_3_7(CapriciousVisage_4_0):
-    version: Literal["3.7"]
+    version: Literal["3.7"] = "3.7"
     cost: Cost = Cost(same_dice_number=2)
 
 
 class ShimenawasReminiscence_3_7(ShimenawasReminiscence_4_0):
-    version: Literal["3.7"]
+    version: Literal["3.7"] = "3.7"
     cost: Cost = Cost(same_dice_number=3)
 
 

diff --git a/src/lpsim/server/card/equipment/weapon/other_polearm.py b/src/lpsim/server/card/equipment/weapon/other_polearm.py
@@ -135,7 +135,7 @@ def equip(self, match: Any) -> List[Actions]:
 
 
 class LithicSpear_3_3(LithicSpear_3_7):
-    version: Literal["3.3"]
+    version: Literal["3.3"] = "3.3"
 
     def equip(self, match: Any) -> List[Actions]:
         """

diff --git a/src/lpsim/server/card/event/foods.py b/src/lpsim/server/card/event/foods.py
@@ -337,7 +337,7 @@ def get_actions(
 
 
 class MintyMeatRolls_3_3(MintyMeatRolls_3_4):
-    version: Literal["3.3"]
+    version: Literal["3.3"] = "3.3"
 
 
 class TeyvatFriedEgg_4_1(FoodCardBase):

diff --git a/src/lpsim/server/card/event/others.py b/src/lpsim/server/card/event/others.py
@@ -203,7 +203,7 @@ def get_actions(
 
 class IHaventLostYet_3_3(IHaventLostYet_4_0):
     name: Literal["I Haven't Lost Yet!"]
-    version: Literal["3.3"]
+    version: Literal["3.3"] = "3.3"
     cost: Cost = Cost()
 
     def get_actions(
@@ -465,14 +465,14 @@ class BlessingOfTheDivineRelicsInstallation_4_1(MasterOfWeaponry_4_1):
 
 
 class MasterOfWeaponry_3_3(MasterOfWeaponry_4_1):
-    version: Literal["3.3"]
+    version: Literal["3.3"] = "3.3"
     reset_usage: bool = False
 
 
 class BlessingOfTheDivineRelicsInstallation_3_3(
     BlessingOfTheDivineRelicsInstallation_4_1
 ):
-    version: Literal["3.3"]
+    version: Literal["3.3"] = "3.3"
     reset_usage: bool = False
 
 
@@ -528,7 +528,7 @@ def get_actions(
 
 class SendOff_3_3(SendOff_3_7):
     name: Literal["Send Off"]
-    version: Literal["3.3"]
+    version: Literal["3.3"] = "3.3"
     cost: Cost = Cost(any_dice_number=2)
 
     def get_actions(

diff --git a/src/lpsim/server/card/event/resonance.py b/src/lpsim/server/card/event/resonance.py
@@ -412,7 +412,7 @@ def get_actions(
 
 class WindAndFreedom_3_7(WindAndFreedom_4_1):
     name: Literal["Wind and Freedom"]
-    version: Literal["3.7"]
+    version: Literal["3.7"] = "3.7"
 
 
 class StoneAndContracts_3_7(NationResonanceCardBase):
@@ -483,7 +483,7 @@ def get_actions(
 
 class ThunderAndEternity_3_7(ThunderAndEternity_4_0):
     name: Literal["Thunder and Eternity"]
-    version: Literal["3.7"]
+    version: Literal["3.7"] = "3.7"
 
     def get_dice_color(self, match: Any) -> str:
         """

diff --git a/src/lpsim/server/card/support/base.py b/src/lpsim/server/card/support/base.py
@@ -36,7 +36,7 @@ class SupportBase(CardBase):
     # when status icon type is not none, it will show in team status area
     status_icon_type: Literal[IconType.NONE] = IconType.NONE
 
-    def check_should_remove(self) -> List[RemoveObjectAction]:
+    def check_should_remove(self, match: Any = None) -> List[RemoveObjectAction]:
         """
         Check if the support should be removed.
         when usage has changed, call this function to check if the support
@@ -161,7 +161,7 @@ class UsageWithRoundRestrictionSupportBase(SupportBase):
     name: str
     version: str
     cost: Cost
-    usage: int = 2
+    usage: int
     usage_this_round: int = 0
     max_usage_one_round: int