Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 86 additions & 16 deletions src/root.zig
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@

const std = @import("std");
const builtin = @import("builtin");
const native_os = builtin.os.tag;

const is_wasm = builtin.cpu.arch == .wasm32 or builtin.cpu.arch == .wasm64;
const is_windows = native_os == .windows;

// Internal modules
pub const parser = @import("parser.zig");
Expand Down Expand Up @@ -95,8 +97,10 @@ pub const ParseError = struct {
pub const Document = struct {
/// Memory-mapped file data (zero-copy base)
data: []const u8,
/// Whether we own the data (mmap'd)
/// Whether we own the data (mmap'd or allocated)
owns_data: bool,
/// Whether data was allocated (Windows) vs mmap'd (POSIX)
data_is_allocated: bool = false,
Copy link

Copilot AI Jan 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new data_is_allocated field has a default value of false, but this field is not initialized in the openFromMemory method (around line 234-246). While the default will apply, it would be clearer and more consistent with the other initialization blocks to explicitly set this field to false, especially since owns_data is already being set to false.

Copilot uses AI. Check for mistakes.

/// Cross-reference table
xref_table: XRefTable,
Expand Down Expand Up @@ -147,20 +151,30 @@ pub const Document = struct {
const stat = try file.stat();
const size = stat.size;

// Memory map the file
const data = try std.posix.mmap(
null,
size,
std.posix.PROT.READ,
.{ .TYPE = .PRIVATE },
file.handle,
0,
);

return openFromMemoryOwned(allocator, data, config);
if (comptime is_windows) {
// Windows: read file into allocated memory (no mmap support)
const data = try allocator.alignedAlloc(u8, .fromByteUnits(std.heap.page_size_min), size);
Copy link

Copilot AI Jan 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The alignment parameter syntax appears incorrect. In Zig's alignedAlloc function, the second parameter should be a compile-time known alignment value (like std.mem.page_size), not an alignment enum. The correct syntax should be std.heap.page_size_min directly as the second parameter, not wrapped in .fromByteUnits(). This will likely cause a compilation error.

Suggested change
const data = try allocator.alignedAlloc(u8, .fromByteUnits(std.heap.page_size_min), size);
const data = try allocator.alignedAlloc(u8, std.heap.page_size_min, size);

Copilot uses AI. Check for mistakes.
errdefer allocator.free(data);
const bytes_read = try file.readAll(data);
if (bytes_read != size) {
return error.UnexpectedEof;
}
return openFromMemoryOwnedAlloc(allocator, data, config);
Comment on lines +154 to +162
Copy link

Copilot AI Jan 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new Windows-specific file loading path (lines 154-162) lacks test coverage. The repository has comprehensive test suites (integration_test.zig, etc.), but there are no tests validating that Windows file loading and cleanup work correctly. Consider adding platform-specific tests or conditional tests that exercise this path when compiling for Windows.

Copilot uses AI. Check for mistakes.
} else {
// POSIX: memory map the file
const data = try std.posix.mmap(
null,
size,
std.posix.PROT.READ,
.{ .TYPE = .PRIVATE },
file.handle,
0,
);
return openFromMemoryOwned(allocator, data, config);
}
}

/// Open from owned memory (will be freed on close)
/// Open from owned memory (will be freed on close via munmap)
fn openFromMemoryOwned(allocator: std.mem.Allocator, data: []align(std.heap.page_size_min) u8, config: ErrorConfig) !*Document {
if (comptime is_wasm) {
@compileError("openFromMemoryOwned is not available on WASM. Use openFromMemory instead.");
Expand All @@ -172,6 +186,31 @@ pub const Document = struct {
doc.* = .{
.data = data,
.owns_data = true,
.data_is_allocated = false,
.xref_table = XRefTable.init(allocator),
.pages = .empty,
.object_cache = std.AutoHashMap(u32, Object).init(allocator),
.allocator = allocator,
.parsing_arena = std.heap.ArenaAllocator.init(allocator),
.error_config = config,
.errors = .empty,
.font_cache = std.StringHashMap(encoding.FontEncoding).init(allocator),
.font_obj_cache = std.AutoHashMap(u32, encoding.FontEncoding).init(allocator),
};

try doc.parseDocument();
return doc;
}

/// Open from owned allocated memory (Windows - will be freed on close via allocator.free)
fn openFromMemoryOwnedAlloc(allocator: std.mem.Allocator, data: []align(std.heap.page_size_min) u8, config: ErrorConfig) !*Document {
const doc = try allocator.create(Document);
errdefer allocator.destroy(doc);

doc.* = .{
.data = data,
.owns_data = true,
.data_is_allocated = true,
.xref_table = XRefTable.init(allocator),
.pages = .empty,
.object_cache = std.AutoHashMap(u32, Object).init(allocator),
Expand All @@ -195,6 +234,7 @@ pub const Document = struct {
doc.* = .{
.data = data,
.owns_data = false,
.data_is_allocated = false,
.xref_table = XRefTable.init(allocator),
.pages = .empty,
.object_cache = std.AutoHashMap(u32, Object).init(allocator),
Expand Down Expand Up @@ -335,7 +375,7 @@ pub const Document = struct {
// Use the comprehensive parseFontEncoding
const enc = encoding.parseFontEncoding(arena, fd, struct {
fn wrapper(ctx: *const anyopaque, obj: parser.Object) parser.Object {
const r: *const Resolver = @alignCast(@ptrCast(ctx));
const r: *const Resolver = @ptrCast(@alignCast(ctx));
return r.resolve(obj);
}
}.wrapper, &resolver) catch continue;
Expand All @@ -354,8 +394,14 @@ pub const Document = struct {
/// Close the document and free resources
pub fn close(self: *Document) void {
if (self.owns_data and !is_wasm) {
const aligned_ptr: [*]align(std.heap.page_size_min) u8 = @alignCast(@ptrCast(@constCast(self.data.ptr)));
std.posix.munmap(aligned_ptr[0..self.data.len]);
const aligned_ptr: [*]align(std.heap.page_size_min) u8 = @ptrCast(@alignCast(@constCast(self.data.ptr)));
if (self.data_is_allocated) {
// Windows (always) or future POSIX allocated path
self.allocator.free(aligned_ptr[0..self.data.len]);
} else if (comptime !is_windows) {
// POSIX: memory-mapped file
std.posix.munmap(aligned_ptr[0..self.data.len]);
}
}

// Free cached reading order
Expand Down Expand Up @@ -1577,3 +1623,27 @@ test "ErrorConfig presets" {
const permissive = ErrorConfig.permissive();
try std.testing.expect(permissive.continue_on_parse_error);
}

test "allocated memory path cleanup" {
// This test exercises the Windows-style allocated memory path
// to ensure data_is_allocated=true cleanup works correctly.
// On Windows, openWithConfig uses alignedAlloc instead of mmap.
const allocator = std.testing.allocator;
const testpdf = @import("testpdf.zig");

// Generate test PDF data
const pdf_data = try testpdf.generateMinimalPdf(allocator, "AllocTest");
defer allocator.free(pdf_data);

// Create page-aligned copy (simulates Windows file read path)
const aligned_data = try allocator.alignedAlloc(u8, .fromByteUnits(std.heap.page_size_min), pdf_data.len);
@memcpy(aligned_data, pdf_data);
// Note: don't defer free - Document takes ownership

// Use the allocated memory path (exercises data_is_allocated=true)
const doc = try Document.openFromMemoryOwnedAlloc(allocator, aligned_data, ErrorConfig.default());
defer doc.close(); // This must free aligned_data via allocator.free()

// Verify document parsed correctly
try std.testing.expectEqual(@as(usize, 1), doc.pageCount());
}