Skip to content

Commit 99cf2bb

Browse files
authored
Fix the failing Windows build (#1)
1 parent 4e62348 commit 99cf2bb

File tree

11 files changed

+28
-68
lines changed

11 files changed

+28
-68
lines changed

.github/workflows/release.yml

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,27 +75,27 @@ jobs:
7575
include:
7676
- os: ubuntu-latest
7777
target: x86_64-linux-gnu
78-
artifact_name: zigformer-cli-linux-x86_64.zip
78+
artifact_name: zigformer-linux-x86_64.zip
7979
asset_content_type: application/zip
8080
- os: ubuntu-latest
8181
target: aarch64-linux-gnu
82-
artifact_name: zigformer-cli-linux-aarch64.zip
82+
artifact_name: zigformer-linux-aarch64.zip
8383
asset_content_type: application/zip
8484
- os: macos-latest
8585
target: aarch64-macos # Apple Silicon (ARM64)
86-
artifact_name: zigformer-cli-macos-aarch64.zip
86+
artifact_name: zigformer-macos-aarch64.zip
8787
asset_content_type: application/zip
8888
- os: macos-latest
8989
target: x86_64-macos # Intel
90-
artifact_name: zigformer-cli-macos-x86_64.zip
90+
artifact_name: zigformer-macos-x86_64.zip
9191
asset_content_type: application/zip
9292
- os: windows-latest
9393
target: x86_64-windows-msvc
94-
artifact_name: zigformer-cli-windows-x86_64.zip
94+
artifact_name: zigformer-windows-x86_64.zip
9595
asset_content_type: application/zip
9696
- os: windows-latest
9797
target: aarch64-windows-msvc
98-
artifact_name: zigformer-cli-windows-aarch64.zip
98+
artifact_name: zigformer-windows-aarch64.zip
9999
asset_content_type: application/zip
100100

101101
steps:
@@ -120,6 +120,11 @@ jobs:
120120
ls -l zig-out/bin || true
121121
exit 1
122122
fi
123+
if [ ! -f zig-out/bin/zigformer-gui ]; then
124+
echo "ERROR: built binary zig-out/bin/zigformer-gui not found"
125+
ls -l zig-out/bin || true
126+
exit 1
127+
fi
123128
124129
- name: Verify built binary exists (Windows)
125130
if: runner.os == 'Windows'
@@ -129,13 +134,19 @@ jobs:
129134
dir zig-out\\bin
130135
exit 1
131136
}
137+
if (!(Test-Path "zig-out\\bin\\zigformer-gui.exe")) {
138+
Write-Host 'ERROR: built binary zig-out\\bin\\zigformer-gui.exe not found'
139+
dir zig-out\\bin
140+
exit 1
141+
}
132142
133143
- name: Prepare artifact for Linux/macOS
134144
if: runner.os != 'Windows'
135145
run: |
136146
RELEASE_DIR="release-${{ matrix.target }}-${{ github.run_id }}"
137147
mkdir -p "$RELEASE_DIR"
138148
mv zig-out/bin/zigformer-cli "$RELEASE_DIR"/
149+
mv zig-out/bin/zigformer-gui "$RELEASE_DIR"/
139150
# Create a zip of the release contents (avoid nesting an extra top-level directory)
140151
(cd "$RELEASE_DIR" && zip -r "../${{ matrix.artifact_name }}" .)
141152
@@ -145,6 +156,7 @@ jobs:
145156
$env:RELEASE_DIR = "release-${{ matrix.target }}-${{ github.run_id }}"
146157
New-Item -ItemType Directory -Path $env:RELEASE_DIR -Force | Out-Null
147158
Move-Item zig-out\bin\zigformer-cli.exe $env:RELEASE_DIR\
159+
Move-Item zig-out\bin\zigformer-gui.exe $env:RELEASE_DIR\
148160
Compress-Archive -Path "$env:RELEASE_DIR\*" -DestinationPath ${{ matrix.artifact_name }}
149161
150162
- name: Upload Release Asset

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ and [nanochat](https://github.com/karpathy/nanochat) projects, and follows the a
2828
["Language Models are Unsupervised Multitask Learners"](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)
2929
papers.
3030
It can be used as a [Zig library](https://CogitatorTech.github.io/zigformer/) for building LLMs or as a
31-
[standalone application](https://github.com/CogitatorTech/zigformer/releases)
31+
[standalone application](https://github.com/CogitatorTech/zigformer/releases/latest)
3232
for training, inference, and chatting with the model.
3333

3434
The diagrams below show the high-level architecture and its core components.
@@ -171,7 +171,7 @@ zig build run -- predict --prompt "How do mountains form?" --top-k 5 --load-mode
171171
zig build run-gui -- --load-model model.bin
172172
```
173173

174-
![ZigFormer Web UI](assets/screenshots/zigformer_webui_v0.1.0.jpeg)
174+
![ZigFormer Web UI](assets/screenshots/zigformer_webui_v0.1.1.jpeg)
175175

176176
---
177177

-261 KB
Binary file not shown.
234 KB
Loading

build.zig.zon

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.{
22
.name = .zigformer,
3-
.version = "0.1.0",
3+
.version = "0.1.1",
44
.fingerprint = 0xe046275379ddc4d8, // Changing this has security and trust implications.
55
.minimum_zig_version = "0.15.2",
66
.dependencies = .{

src/cli.zig

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ fn trainAndMaybeRepl(allocator: std.mem.Allocator, pretrain_path: []const u8, ch
216216
std.debug.print("Type a prompt and press Enter to generate text.\n", .{});
217217
std.debug.print("Type 'exit' to quit.\n", .{});
218218

219-
const stdin_file = std.fs.File{ .handle = std.posix.STDIN_FILENO };
219+
const stdin_file = std.fs.File.stdin();
220220
const stdin = stdin_file.deprecatedReader();
221221
var buffer: [1024]u8 = undefined;
222222
while (true) {
@@ -245,33 +245,6 @@ fn execRoot(ctx: chilli.CommandContext) !void {
245245
var config = if (config_path.len > 0) try loadConfig(allocator, config_path) else Config{};
246246
defer config.deinit(allocator);
247247

248-
// Override with CLI flags if provided (checking if they are different from defaults is tricky with chilli,
249-
// so we'll assume CLI flags take precedence if they are set to non-default values or if we just use them directly.
250-
// Actually, a better approach is: use config as base, then overwrite with CLI flags.
251-
// But chilli returns defaults if flag is missing.
252-
// So we need to know if flag was actually passed. Chilli doesn't easily expose this.
253-
// For now, let's just use CLI flags if config is NOT present, OR if we want to support overrides,
254-
// we have to accept that CLI defaults might overwrite config values.
255-
// To solve this properly:
256-
// 1. Load config
257-
// 2. For each field, check if CLI flag was passed (not easy with current chilli usage).
258-
// Alternative: We only use config if --config is passed, and ignore other flags? No, overrides are good.
259-
// Let's assume: Config file sets defaults. CLI flags override.
260-
// But chilli returns default values if flag is missing.
261-
// So if config has batch_size=64, and CLI default is 32, and user runs without --batch-size, chilli returns 32.
262-
// If we overwrite config with 32, we lose the config value.
263-
// We need to check if the flag was present.
264-
// Since we can't easily do that, let's prioritize CLI flags ONLY if they are explicitly different from our hardcoded defaults?
265-
// Or simpler: If --config is passed, we use it. We can manually parse args to see if flags are present, but that's messy.
266-
267-
// Let's stick to the plan: Config file sets values. CLI flags override.
268-
// If we want CLI to override, we need to know if user typed it.
269-
// Given the constraints, let's do this:
270-
// If --config is present, use it.
271-
// AND we will NOT read other flags if --config is present, to avoid confusion.
272-
// OR we can say: CLI flags are ignored if --config is present, EXCEPT for interactive/save-model maybe?
273-
// Let's go with: If --config is present, it is the source of truth.
274-
275248
if (config_path.len > 0) {
276249
std.debug.print("Loaded configuration from {s}\n", .{config_path});
277250
} else {
@@ -361,7 +334,7 @@ pub fn main() anyerror!void {
361334
var root_cmd = try chilli.Command.init(allocator, .{
362335
.name = "zigformer-cli",
363336
.description = "An educational transformer-based LLM in Zig",
364-
.version = "v0.1.0",
337+
.version = "v0.1.1",
365338
.exec = execRoot,
366339
});
367340
defer root_cmd.deinit();

src/gui.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ const Config = struct {
2727
}
2828
};
2929

30-
// Global state for the server
30+
// Global state holders for the server
3131
const ServerState = struct {
3232
allocator: std.mem.Allocator,
3333
model: *llm.LLM,
@@ -421,7 +421,7 @@ pub fn main() !void {
421421
var cmd = try chilli.Command.init(allocator, .{
422422
.name = "zigformer-gui",
423423
.description = "Web GUI for ZigFormer",
424-
.version = "0.1.0",
424+
.version = "0.1.1",
425425
.exec = execGui,
426426
});
427427
defer cmd.deinit();

src/gui/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@
381381

382382
<div class="control">
383383
<label>
384-
Top-k
384+
Top-p
385385
<span class="value-display" id="top-p-val">0.0</span>
386386
</label>
387387
<input id="top-p" max="1" min="0" step="0.05" type="range" value="0">

src/lib.zig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! ## ZigFormer
22
//!
3-
//! ZigFormer is an implementation of a transformer-based language model (like GPT-2)
3+
//! ZigFormer is an implementation of a transformer-based language model (LLM)
44
//! written in pure Zig. It provides a framework for understanding and experimenting with
55
//! transformer architectures.
66
//!
@@ -40,7 +40,7 @@
4040
//! - `self_attention`: Multi-head self-attention mechanism
4141
//! - `feed_forward`: Position-wise feed-forward networks
4242
//! - `layer_norm`: Layer normalization
43-
//! - `transformer`: Transformer block (attention and FFN)
43+
//! - `transformer`: Transformer block (feed-forward and attention layers)
4444
//! - `output_projection`: Output layer for vocabulary prediction
4545
//! - `llm`: Language model implementation with training and inference API
4646
//! - `optimizer`: Adam optimizer

src/zigformer/llm.zig

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -674,24 +674,6 @@ pub const LLM = struct {
674674
}
675675

676676
pub fn setBatchSize(self: *LLM, batch_size: usize) void {
677-
// Iterate over layers and set batch_size for SelfAttention layers
678-
// Note: We need to know which layers are SelfAttention.
679-
// In our simple structure, we know layers 1, 2, 3 are TransformerBlocks.
680-
// TransformerBlock contains SelfAttention.
681-
// But Layer is type-erased.
682-
// Ideally, we should add setBatchSize to Layer vtable, but that's a big change.
683-
// For now, we'll rely on the known structure and pointer casting, which is risky but fits the current style.
684-
// Actually, TransformerBlock has a setBatchSize method we should add.
685-
// Let's assume we add setBatchSize to TransformerBlock and call it here.
686-
687-
// Wait, we can't easily cast opaque pointers back to types without RTTI or knowing the type.
688-
// Given the fixed structure:
689-
// Layer 0: Embeddings (no batch_size needed)
690-
// Layer 1: TransformerBlock
691-
// Layer 2: TransformerBlock
692-
// Layer 3: TransformerBlock
693-
// Layer 4: OutputProjection (no batch_size needed)
694-
695677
const embeddings: *Embeddings = @ptrCast(@alignCast(self.network.items[0].self));
696678
embeddings.setBatchSize(batch_size);
697679

0 commit comments

Comments
 (0)