From 9bae279a480a71b457c10ff5c54b272be9c05359 Mon Sep 17 00:00:00 2001
From: kekxv <caesar@kekxv.com>
Date: Sun, 1 Jun 2025 08:10:13 +0000
Subject: [PATCH 1/2] add data type

---
 .gitignore         |  1 +
 README.md          | 58 ++++++++++++++++++++++++++++++++++++++--------
 example/BUILD      |  1 +
 rules/defs.bzl     | 36 +++++++++++++++++-----------
 tools/bin_to_cc.py | 49 +++++++++++++++++++++++++++++++++------
 5 files changed, 114 insertions(+), 31 deletions(-)

diff --git a/.gitignore b/.gitignore
index b8261ab..40a72be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,3 +46,4 @@ cmake-build-debug/
 
 **/.DS_Store
 
+bazel-*
\ No newline at end of file
diff --git a/README.md b/README.md
index 1a81816..7740fce 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ cc_resources(
     name = "my_resources",
     srcs = ["path/to/resource1.bin", "path/to/resource2.bin"],
     out_prefix = "my_res",  # Optional prefix for generated file names
-    _tool = "//tools:bin_to_cc"  # Path to the conversion tool
+    data_type = "uchar",    # Optional data type for the array (default: "uchar")
 )
 ```
 
@@ -32,7 +32,10 @@ cc_resources(
 
 - `out_prefix`: An optional string that specifies a prefix for the output file names and the corresponding C variable names. For example, if `out_prefix` is set to `ui` and the input is `icon.png`, the outputs will be named `ui_icon.h` and `ui_icon.cpp`, and the resource name will be `ui_icon`.
 
-- `_tool`: A label for the conversion tool that should be executed to process the binary files. By default, this is set to `//tools:bin_to_cc`, but it can be overridden to use a custom tool.
+- `data_type`: An optional string that specifies the data type for the generated array. Available options are:
+  - `"char"`: Signed char array, suitable for text data
+  - `"uchar"`: Unsigned char array (default), suitable for binary data
+  - `"uint"`: Unsigned int array, reduces generated source file size by combining 4 bytes into one integer
 
 ### Output
 
@@ -41,10 +44,9 @@ When you invoke the `cc_resources` rule, it generates:
 - `.h` files containing the C-compatible struct definitions, including resource metadata.
 - `.cpp` files implementing the logic to handle these resources.
 
-### Example
-
-Given the following `BUILD.bazel` setup:
+### Examples
 
+Basic usage with default settings:
 ```python
 load("@rules_cc_resources//rules:defs.bzl", "cc_resources")
 
@@ -55,13 +57,49 @@ cc_resources(
 )
 ```
 
+Using unsigned int type to reduce generated file size:
+```python
+cc_resources(
+    name = "large_resources",
+    srcs = ["data/large_file.bin"],
+    out_prefix = "data",
+    data_type = "uint"  # Combines 4 bytes into one unsigned int
+)
+```
+
+Text data handling:
+```python
+cc_resources(
+    name = "text_resources",
+    srcs = ["text/strings.txt"],
+    out_prefix = "text",
+    data_type = "char"  # Uses signed char for text data
+)
+```
+
 This will produce the following files:
+- `assets_logo.h` and `assets_logo.cpp`
+- `assets_background.h` and `assets_background.cpp`
+- `data_large_file.h` and `data_large_file.cpp`
+- `text_strings.h` and `text_strings.cpp`
+
+### Data Type Selection Guide
+
+- Use `"uchar"` (default) when:
+  - Working with general binary data
+  - Maximum compatibility is needed
+  - Individual byte access is important
+
+- Use `"uint"` when:
+  - You have large binary files
+  - You want to reduce the generated source file size
+  - Memory alignment is not a concern
 
-- `assets_logo.h`
-- `assets_logo.cpp`
-- `assets_background.h`
-- `assets_background.cpp`
+- Use `"char"` when:
+  - Working primarily with text data
+  - Sign information is important
+  - You need to handle ASCII/text data
 
 ### Conclusion
 
-The `cc_resources` rule facilitates the integration of binary resources into C/C++ projects by automating the generation of corresponding source files, thereby streamlining resource management in Bazel builds.
+The `cc_resources` rule facilitates the integration of binary resources into C/C++ projects by automating the generation of corresponding source files, thereby streamlining resource management in Bazel builds. With the flexible data type options, you can optimize the generated code size and choose the most appropriate representation for your data.
diff --git a/example/BUILD b/example/BUILD
index 2652d89..85363ba 100644
--- a/example/BUILD
+++ b/example/BUILD
@@ -8,6 +8,7 @@ cc_resources(
         "assets/my_resource_next.txt",
         "my_resource.txt",
     ],
+    data_type = "uint",
     out_prefix = "example",
 )
 
diff --git a/rules/defs.bzl b/rules/defs.bzl
index 1f9e17d..33d367e 100644
--- a/rules/defs.bzl
+++ b/rules/defs.bzl
@@ -30,14 +30,6 @@ def _cc_resources_impl(ctx):
             base_name = src_stem
             file_path_prefix = src_stem
 
-        # Declare output files for this specific resource
-        # Ensure declared paths are unique if out_prefix is not used and multiple inputs might have same stem from different dirs
-        # However, ctx.actions.declare_file handles uniqueness within the rule's output directory.
-        # If src_file.short_path is "path/to/data.bin", we might want "path_to_data"
-        # For simplicity, let's assume basenames are unique enough or out_prefix is used.
-        # A more robust way for uniqueness without out_prefix would be to incorporate parts of the path.
-        # For now, file_path_prefix from above is used.
-
         h_file = ctx.actions.declare_file(file_path_prefix + ".h")
         cpp_file = ctx.actions.declare_file(file_path_prefix + ".cpp")
 
@@ -48,10 +40,10 @@ def _cc_resources_impl(ctx):
         args.add("--input", src_file.path)
         args.add("--output_h", h_file.path)
         args.add("--output_cpp", cpp_file.path)
-
-        # The resource_name passed to the tool should be what the C variable is named
-        # This will be 'base_name' which incorporates the out_prefix if present.
         args.add("--resource_name", base_name)
+        
+        # 添加数据类型选择
+        args.add("--data_type", ctx.attr.data_type)
 
         ctx.actions.run(
             executable = ctx.executable._tool,
@@ -80,7 +72,6 @@ cc_resources = rule(
     implementation = _cc_resources_impl,
     attrs = {
         "srcs": attr.label_list(
-            # Changed from src to srcs
             mandatory = True,
             allow_files = True,  # Allow actual file paths
             doc = "List of input binary files.",
@@ -92,12 +83,29 @@ cc_resources = rule(
             default = True,
             doc = "Whether to include the extension '.bin' in the output file names.",
         ),
+        "data_type": attr.string(
+            values = ["char", "uchar", "uint"],
+            default = "uchar",
+            doc = "Data type for the array (char, uchar=unsigned char, uint=unsigned int).",
+        ),
         "_tool": attr.label(
-            default = Label("//tools:bin_to_cc"),  # Make sure this path is correct
+            default = Label("//tools:bin_to_cc"),
             cfg = "exec",
             executable = True,
             doc = "The binary to C/C++ conversion tool.",
         ),
     },
-    doc = "Converts binary files into .cpp and .h files. Each input file results in a separate .cpp and .h pair, defining a C-compatible struct {name, size, data}.",
+    doc = """
+    Converts binary files into C/C++ source files.
+    
+    This rule takes binary files and generates corresponding C/C++ header and source files
+    that contain the binary data as arrays. The generated files can be used to embed
+    resources directly into C/C++ programs.
+    
+    Attributes:
+        srcs: List of input binary files.
+        out_prefix: Optional prefix for output file names and C variable names.
+        ext_hide: Whether to exclude the file extension from generated names.
+        data_type: Data type for the generated array (char, uchar, or uint).
+    """,
 )
diff --git a/tools/bin_to_cc.py b/tools/bin_to_cc.py
index d4e1a2c..24fdd73 100755
--- a/tools/bin_to_cc.py
+++ b/tools/bin_to_cc.py
@@ -22,12 +22,22 @@ def main():
     parser.add_argument("--output_h", required=True, help="Output header file path (.h).")
     parser.add_argument("--output_cpp", required=True, help="Output source file path (.cpp).")
     parser.add_argument("--resource_name", required=True, help="Base name for C variable and include guard.")
+    parser.add_argument("--data_type", choices=['char', 'uchar', 'uint'], default='uchar',
+                      help="Data type for the array (char, uchar=unsigned char, uint=unsigned int). Default: uchar")
     args = parser.parse_args()
 
     c_var_name = sanitize_for_c_identifier(args.resource_name)
     header_basename = os.path.basename(args.output_h) # e.g., my_resource.h
     include_guard = f"__{sanitize_for_c_identifier(args.resource_name).upper()}_H__"
 
+    # 根据选择的数据类型确定C类型
+    c_type_map = {
+        'char': 'char',
+        'uchar': 'unsigned char',
+        'uint': 'unsigned int'
+    }
+    data_type = c_type_map[args.data_type]
+
     try:
         with open(args.input, "rb") as f_in:
             data = f_in.read()
@@ -50,7 +60,7 @@ def main():
 typedef struct {{
     const char* name;
     size_t size;
-    const unsigned char* data;
+    const {data_type}* data;
 }} __ResourceData_{c_var_name};
 
 extern const __ResourceData_{c_var_name} {c_var_name};
@@ -77,19 +87,44 @@ def main():
 
     cpp_content_parts.append(f"\n#ifdef __cplusplus\nextern \"C\" {{\n#endif\n")
 
-    cpp_content_parts.append(f"static const unsigned char {c_var_name}_data[] = {{")
-    if data:
+    cpp_content_parts.append(f"static const {data_type} {c_var_name}_data[] = {{")
+    
+    if args.data_type == 'uint':
+        # 对于unsigned int，每4个字节组成一个整数
+        for i in range(0, len(data), 4):
+            if i % 4 == 0:
+                cpp_content_parts.append("\n    ")
+            chunk = data[i:i+4]
+            # 如果最后一个块不足4字节，用0填充
+            while len(chunk) < 4:
+                chunk = chunk + b'\x00'
+            value = int.from_bytes(chunk, byteorder='little')
+            cpp_content_parts.append(f"0x{value:08X}u, ")
+    else:
+        # 对于char和unsigned char，保持原来的字节模式
         for i, byte in enumerate(data):
             if i % 12 == 0:
                 cpp_content_parts.append("\n    ")
-            cpp_content_parts.append(f"0x{byte:02X}, ")
-        # Remove trailing comma and space if data was not empty
+            if args.data_type == 'char':
+                cpp_content_parts.append(f"{byte:d}, ")
+            else:  # uchar
+                cpp_content_parts.append(f"0x{byte:02X}, ")
+
+    # Remove trailing comma and space if data was not empty
+    if data:
         cpp_content_parts[-1] = cpp_content_parts[-1].rstrip(', ')
     cpp_content_parts.append("\n};\n\n")
 
     cpp_content_parts.append(f"const __ResourceData_{c_var_name} {c_var_name} = {{\n")
     cpp_content_parts.append(f"    \"{args.resource_name}\",\n")
-    cpp_content_parts.append(f"    sizeof({c_var_name}_data),\n")
+    
+    # 对于uint类型，size需要调整为字节大小
+    if args.data_type == 'uint':
+        array_size = ((len(data) + 3) // 4) * 4  # 向上取整到4的倍数
+        cpp_content_parts.append(f"    {len(data)},  // 原始字节大小\n")
+    else:
+        cpp_content_parts.append(f"    sizeof({c_var_name}_data),\n")
+    
     cpp_content_parts.append(f"    {c_var_name}_data\n")
     cpp_content_parts.append(f"}};\n")
 
@@ -98,7 +133,7 @@ def main():
     try:
         os.makedirs(os.path.dirname(args.output_cpp), exist_ok=True)
         with open(args.output_cpp, "w") as f_cpp:
-            f_cpp.write("".join(cpp_content_parts))
+            f_cpp.write(''.join(cpp_content_parts))
     except IOError as e:
         print(f"Error writing source file {args.output_cpp}: {e}")
         return 1

From d5d9793f4e54f779ce51fa9aad3ebb655bfbbac3 Mon Sep 17 00:00:00 2001
From: kekxv <caesar@kekxv.com>
Date: Sun, 1 Jun 2025 08:21:17 +0000
Subject: [PATCH 2/2] fix windows build

---
 tools/bin_to_cc.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/bin_to_cc.py b/tools/bin_to_cc.py
index 24fdd73..ebcb7de 100755
--- a/tools/bin_to_cc.py
+++ b/tools/bin_to_cc.py
@@ -73,7 +73,8 @@ def main():
 """
     try:
         os.makedirs(os.path.dirname(args.output_h), exist_ok=True)
-        with open(args.output_h, "w") as f_h:
+        # 使用UTF-8编码写入头文件
+        with open(args.output_h, "w", encoding='utf-8') as f_h:
             f_h.write(h_content)
     except IOError as e:
         print(f"Error writing header file {args.output_h}: {e}")
@@ -132,7 +133,8 @@ def main():
 
     try:
         os.makedirs(os.path.dirname(args.output_cpp), exist_ok=True)
-        with open(args.output_cpp, "w") as f_cpp:
+        # 使用UTF-8编码写入源文件
+        with open(args.output_cpp, "w", encoding='utf-8') as f_cpp:
             f_cpp.write(''.join(cpp_content_parts))
     except IOError as e:
         print(f"Error writing source file {args.output_cpp}: {e}")