Skip to content

Commit 7776852

Browse files
authored
feat: revamp file IO, this time Windows compatible (leanprover#4950)
This implements a naive version of `getline` because Windows does not have `getline`. Given the fact that `FILE` has buffered IO, calling `fgetc` in a loop is not as big of a performance hazard as it might seem at first glance. The proper solution to this would of course be to have our own buffered IO so we are fully in charge of the buffer. In this situation we could check the entire buffer for a newline at once instead of char by char. However that is not going to happen for the near future so I propose we stay with this implementation. If reading individual lines of a file does truly end up being the performance bottle neck we have already won^^.
1 parent 63c4de5 commit 7776852

File tree

5 files changed

+80
-54
lines changed

5 files changed

+80
-54
lines changed

src/Init/System/IO.lean

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -470,31 +470,23 @@ def withFile (fn : FilePath) (mode : Mode) (f : Handle → IO α) : IO α :=
470470
def Handle.putStrLn (h : Handle) (s : String) : IO Unit :=
471471
h.putStr (s.push '\n')
472472

473-
partial def Handle.readBinToEnd (h : Handle) : IO ByteArray := do
473+
partial def Handle.readBinToEndInto (h : Handle) (buf : ByteArray) : IO ByteArray := do
474474
let rec loop (acc : ByteArray) : IO ByteArray := do
475475
let buf ← h.read 1024
476476
if buf.isEmpty then
477477
return acc
478478
else
479479
loop (acc ++ buf)
480-
loop ByteArray.empty
480+
loop buf
481481

482-
partial def Handle.readToEnd (h : Handle) : IO String := do
483-
let rec loop (s : String) := do
484-
let line ← h.getLine
485-
if line.isEmpty then
486-
return s
487-
else
488-
loop (s ++ line)
489-
loop ""
490-
491-
def readBinFile (fname : FilePath) : IO ByteArray := do
492-
let h ← Handle.mk fname Mode.read
493-
h.readBinToEnd
482+
partial def Handle.readBinToEnd (h : Handle) : IO ByteArray := do
483+
h.readBinToEndInto .empty
494484

495-
def readFile (fname : FilePath) : IO String := do
496-
let h ← Handle.mk fname Mode.read
497-
h.readToEnd
485+
def Handle.readToEnd (h : Handle) : IO String := do
486+
let data ← h.readBinToEnd
487+
match String.fromUTF8? data with
488+
| some s => return s
489+
| none => throw <| .userError s!"Tried to read from handle containing non UTF-8 data."
498490

499491
partial def lines (fname : FilePath) : IO (Array String) := do
500492
let h ← Handle.mk fname Mode.read
@@ -600,6 +592,28 @@ end System.FilePath
600592

601593
namespace IO
602594

595+
namespace FS
596+
597+
def readBinFile (fname : FilePath) : IO ByteArray := do
598+
-- Requires metadata so defined after metadata
599+
let mdata ← fname.metadata
600+
let size := mdata.byteSize.toUSize
601+
let handle ← IO.FS.Handle.mk fname .read
602+
let buf ←
603+
if size > 0 then
604+
handle.read mdata.byteSize.toUSize
605+
else
606+
pure <| ByteArray.mkEmpty 0
607+
handle.readBinToEndInto buf
608+
609+
def readFile (fname : FilePath) : IO String := do
610+
let data ← readBinFile fname
611+
match String.fromUTF8? data with
612+
| some s => return s
613+
| none => throw <| .userError s!"Tried to read file '{fname}' containing non UTF-8 data."
614+
615+
end FS
616+
603617
def withStdin [Monad m] [MonadFinally m] [MonadLiftT BaseIO m] (h : FS.Stream) (x : m α) : m α := do
604618
let prev ← setStdin h
605619
try x finally discard <| setStdin prev

src/runtime/io.cpp

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -485,43 +485,36 @@ extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_write(b_obj_arg h, b_obj_arg
485485
}
486486
}
487487

488-
/*
489-
Handle.getLine : (@& Handle) → IO Unit
490-
The line returned by `lean_io_prim_handle_get_line`
491-
is truncated at the first '\0' character and the
492-
rest of the line is discarded. */
488+
/* Handle.getLine : (@& Handle) → IO Unit */
493489
extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_get_line(b_obj_arg h, obj_arg /* w */) {
494490
FILE * fp = io_get_handle(h);
495-
const int buf_sz = 64;
496-
char buf_str[buf_sz]; // NOLINT
491+
497492
std::string result;
498-
bool first = true;
499-
while (true) {
500-
char * out = std::fgets(buf_str, buf_sz, fp);
501-
if (out != nullptr) {
502-
if (strlen(buf_str) < buf_sz-1 || buf_str[buf_sz-2] == '\n') {
503-
if (first) {
504-
return io_result_mk_ok(mk_string(out));
505-
} else {
506-
result.append(out);
507-
return io_result_mk_ok(mk_string(result));
508-
}
509-
}
510-
result.append(out);
511-
} else if (std::feof(fp)) {
512-
clearerr(fp);
513-
return io_result_mk_ok(mk_string(result));
514-
} else {
515-
return io_result_mk_error(decode_io_error(errno, nullptr));
493+
int c; // Note: int, not char, required to handle EOF
494+
while ((c = std::fgetc(fp)) != EOF) {
495+
result.push_back(c);
496+
if (c == '\n') {
497+
break;
516498
}
517-
first = false;
499+
}
500+
501+
if (std::ferror(fp)) {
502+
return io_result_mk_error(decode_io_error(errno, nullptr));
503+
} else if (std::feof(fp)) {
504+
clearerr(fp);
505+
return io_result_mk_ok(mk_string(result));
506+
} else {
507+
obj_res ret = io_result_mk_ok(mk_string(result));
508+
return ret;
518509
}
519510
}
520511

521512
/* Handle.putStr : (@& Handle) → (@& String) → IO Unit */
522513
extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_put_str(b_obj_arg h, b_obj_arg s, obj_arg /* w */) {
523514
FILE * fp = io_get_handle(h);
524-
if (std::fputs(lean_string_cstr(s), fp) != EOF) {
515+
usize n = lean_string_size(s) - 1; // - 1 to ignore the terminal NULL byte.
516+
usize m = std::fwrite(lean_string_cstr(s), 1, n, fp);
517+
if (m == n) {
525518
return io_result_mk_ok(box(0));
526519
} else {
527520
return io_result_mk_error(decode_io_error(errno, nullptr));

stage0/src/runtime/io.cpp

Lines changed: 15 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

stage0/src/runtime/object.h

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/lean/run/3546.lean

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
def test : IO Unit := do
2+
let tmpFile := "3546.tmp"
3+
let firstLine := "foo\u0000bar\n"
4+
let content := firstLine ++ "hello world\nbye"
5+
IO.FS.writeFile tmpFile content
6+
let handle ← IO.FS.Handle.mk tmpFile .read
7+
let firstReadLine ← handle.getLine
8+
let cond := firstLine == firstReadLine && firstReadLine.length == 8 -- paranoid
9+
IO.println cond
10+
IO.FS.removeFile tmpFile
11+
12+
/-- info: true -/
13+
#guard_msgs in
14+
#eval test

0 commit comments

Comments
 (0)