From d32843b606bdb59957662d267f258d3dc7bf27b5 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sat, 30 Nov 2024 16:34:42 +0100 Subject: [PATCH 01/25] Automatically enable JITPROFILING with ITTAPI (#55598) This helps when profiling remotely since VTunes doesn't support setting environment variables on remote systems. Will still respect `ENABLE_JITPROFILING=0`. --- src/codegen.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 0b483696567ad..b8bed0793730b 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -81,6 +81,10 @@ #include #include +#ifdef USE_ITTAPI +#include "ittapi/ittnotify.h" +#endif + using namespace llvm; static bool jl_fpo_disabled(const Triple &TT) { @@ -10427,8 +10431,16 @@ extern "C" void jl_init_llvm(void) const char *jit_profiling = getenv("ENABLE_JITPROFILING"); #if defined(JL_USE_INTEL_JITEVENTS) - if (jit_profiling && atoi(jit_profiling)) { - jl_using_intel_jitevents = 1; + if (jit_profiling) { + if (atoi(jit_profiling)) { + jl_using_intel_jitevents = 1; + } + } else { +#ifdef USE_ITTAPI + __itt_collection_state state = __itt_get_collection_state(); + jl_using_intel_jitevents = state == __itt_collection_init_successful || + state == __itt_collection_collector_exists; +#endif } #endif From ef328064a96fb143c28e2765f5a84b3ab8c43c87 Mon Sep 17 00:00:00 2001 From: Zentrik Date: Sun, 1 Dec 2024 04:35:39 +0000 Subject: [PATCH 02/25] Fix string handling in jlchecksum (#56720) A `TAGGED_RELEASE_BANNER` with spaces such as `Official https://julialang.org release` produces the error `/cache/build/builder-amdci4-5/julialang/julia-master/deps/tools/jlchecksum: 66: [: Official: unexpected operator`. --- deps/tools/jlchecksum | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/tools/jlchecksum b/deps/tools/jlchecksum index 329d3a2a845d4..9945ec89e6bda 100755 --- a/deps/tools/jlchecksum +++ b/deps/tools/jlchecksum @@ -63,7 +63,7 @@ find_checksum() fi done if [ ! -f "$DEPSDIR/checksums/$BASENAME/$CHECKSUM_TYPE" ]; then - if [ ${TAGGED_RELEASE_BANNER:-} ]; then + if [ "${TAGGED_RELEASE_BANNER:-}" ]; then echo "WARNING: $CHECKSUM_TYPE checksum for $BASENAME not found in deps/checksums/, failing release build." >&2 exit 3 fi From ea421126d831b18fa00408794247317b544b18d8 Mon Sep 17 00:00:00 2001 From: Priynsh <119518987+Priynsh@users.noreply.github.com> Date: Mon, 2 Dec 2024 00:29:52 +0530 Subject: [PATCH 03/25] Clarifying ispunct behavior difference between Julia and C in documentation (#56727) Fixes #56680. This PR updates the documentation for the ispunct function in Julia to explicitly note its differing behavior from the similarly named function in C. --------- Co-authored-by: Lilith Orion Hafner --- base/strings/unicode.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl index ad047514c85a6..fcb4a371e9898 100644 --- a/base/strings/unicode.jl +++ b/base/strings/unicode.jl @@ -534,11 +534,17 @@ iscntrl(c::AbstractChar) = c <= '\x1f' || '\x7f' <= c <= '\u9f' Tests whether a character belongs to the Unicode general category Punctuation, i.e. a character whose category code begins with 'P'. +!!! note + This behavior is different from the `ispunct` function in C. + # Examples ```jldoctest julia> ispunct('α') false +julia> ispunct('=') +false + julia> ispunct('/') true From 8ce7d0fce419746e36556d561fe7d1c89704e291 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 2 Dec 2024 11:13:51 -0400 Subject: [PATCH 04/25] [NEWS.md] Add PR numbers and remove some 1.11 changes that accidentally came back. (#56722) --- NEWS.md | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/NEWS.md b/NEWS.md index 61bad831e261c..c1d5f38f337b0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -55,11 +55,11 @@ Command-line option changes --------------------------- * The `-m/--module` flag can be passed to run the `main` function inside a package with a set of arguments. - This `main` function should be declared using `@main` to indicate that it is an entry point. + This `main` function should be declared using `@main` to indicate that it is an entry point. ([#52103]) * Enabling or disabling color text in Julia can now be controlled with the [`NO_COLOR`](https://no-color.org/) or [`FORCE_COLOR`](https://force-color.org/) environment variables. These variables are also honored by Julia's build system ([#53742], [#56346]). -* `--project=@temp` starts Julia with a temporary environment. +* `--project=@temp` starts Julia with a temporary environment. ([#51149]) * New `--trace-compile-timing` option to report how long each method reported by `--trace-compile` took to compile, in ms. ([#54662]) * `--trace-compile` now prints recompiled methods in yellow or with a trailing comment if color is not supported ([#55763]) @@ -72,7 +72,7 @@ Multi-threading changes a `OncePerProcess{T}` type, which allows defining a function that should be run exactly once the first time it is called, and then always return the same result value of type `T` every subsequent time afterwards. There are also `OncePerThread{T}` and `OncePerTask{T}` types for - similar usage with threads or tasks. ([#TBD]) + similar usage with threads or tasks. ([#55793]) Build system changes -------------------- @@ -86,32 +86,15 @@ New library functions * The new `isfull(c::Channel)` function can be used to check if `put!(c, some_value)` will block. ([#53159]) * `waitany(tasks; throw=false)` and `waitall(tasks; failfast=false, throw=false)` which wait multiple tasks at once ([#53341]). * `uuid7()` creates an RFC 9652 compliant UUID with version 7 ([#54834]). -* `insertdims(array; dims)` allows to insert singleton dimensions into an array which is the inverse operation to `dropdims` +* `insertdims(array; dims)` allows to insert singleton dimensions into an array which is the inverse operation to `dropdims`. ([#45793]) * The new `Fix` type is a generalization of `Fix1/Fix2` for fixing a single argument ([#54653]). New library features -------------------- -* `invmod(n, T)` where `T` is a native integer type now computes the modular inverse of `n` in the modular integer ring that `T` defines ([#52180]). -* `invmod(n)` is an abbreviation for `invmod(n, typeof(n))` for native integer types ([#52180]). -* `replace(string, pattern...)` now supports an optional `IO` argument to - write the output to a stream rather than returning a string ([#48625]). -* `sizehint!(s, n)` now supports an optional `shrink` argument to disable shrinking ([#51929]). -* New function `Docs.hasdoc(module, symbol)` tells whether a name has a docstring ([#52139]). -* New function `Docs.undocumented_names(module)` returns a module's undocumented public names ([#52413]). -* Passing an `IOBuffer` as a stdout argument for `Process` spawn now works as - expected, synchronized with `wait` or `success`, so a `Base.BufferStream` is - no longer required there for correctness to avoid data races ([#52461]). -* After a process exits, `closewrite` will no longer be automatically called on - the stream passed to it. Call `wait` on the process instead to ensure the - content is fully written, then call `closewrite` manually to avoid - data-races. Or use the callback form of `open` to have all that handled - automatically. -* `@timed` now additionally returns the elapsed compilation and recompilation time ([#52889]) * `escape_string` takes additional keyword arguments `ascii=true` (to escape all non-ASCII characters) and `fullhex=true` (to require full 4/8-digit hex numbers - for u/U escapes, e.g. for C compatibility) [#55099]). -* `filter` can now act on a `NamedTuple` ([#50795]). + for u/U escapes, e.g. for C compatibility) ([#55099]). * `tempname` can now take a suffix string to allow the file name to include a suffix and include that suffix in the uniquing checking ([#53474]) * `RegexMatch` objects can now be used to construct `NamedTuple`s and `Dict`s ([#50988]) @@ -133,7 +116,7 @@ Standard library changes * A new standard library for applying syntax highlighting to Julia code, this uses `JuliaSyntax` and `StyledStrings` to implement a `highlight` function - that creates an `AnnotatedString` with syntax highlighting applied. + that creates an `AnnotatedString` with syntax highlighting applied. ([#51810]) #### Package Manager From 6a0de347469abe38c970b9efbf0244f2d60806ca Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Sat, 30 Nov 2024 18:06:26 -0500 Subject: [PATCH 05/25] ircode: cleanup code crud - support gc running - don't duplicate field 4 - remove some unused code only previously needed for handling cycles (which are not valid in IR) --- src/clangsa/GCChecker.cpp | 3 +- src/ircode.c | 399 +++++++++++++++++++++++++------------- src/serialize.h | 69 ------- 3 files changed, 263 insertions(+), 208 deletions(-) diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp index cac89a6761d01..40093ca15859b 100644 --- a/src/clangsa/GCChecker.cpp +++ b/src/clangsa/GCChecker.cpp @@ -847,7 +847,8 @@ bool GCChecker::isGCTrackedType(QualType QT) { Name.ends_with_insensitive("jl_vararg_t") || Name.ends_with_insensitive("jl_opaque_closure_t") || Name.ends_with_insensitive("jl_globalref_t") || - // Probably not technically true for these, but let's allow it + // Probably not technically true for these, but let's allow it as a root + Name.ends_with_insensitive("jl_ircode_state") || Name.ends_with_insensitive("typemap_intersection_env") || Name.ends_with_insensitive("interpreter_state") || Name.ends_with_insensitive("jl_typeenv_t") || diff --git a/src/ircode.c b/src/ircode.c index bec8d46513eef..d3137ce26edef 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -10,17 +10,76 @@ #include "julia_internal.h" #include "serialize.h" -#ifndef _OS_WINDOWS_ -#include -#endif - -#include "valgrind.h" #include "julia_assert.h" #ifdef __cplusplus extern "C" { #endif +#define TAG_SYMBOL 2 +#define TAG_SSAVALUE 3 +#define TAG_DATATYPE 4 +#define TAG_SLOTNUMBER 5 +#define TAG_SVEC 6 +// #define TAG_UNUSED 7 +#define TAG_NULL 8 +#define TAG_EXPR 9 +#define TAG_PHINODE 10 +#define TAG_PHICNODE 11 +#define TAG_LONG_SYMBOL 12 +#define TAG_LONG_SVEC 13 +#define TAG_LONG_EXPR 14 +#define TAG_LONG_PHINODE 15 +#define TAG_LONG_PHICNODE 16 +#define TAG_METHODROOT 17 +#define TAG_EDGE 18 +#define TAG_STRING 19 +#define TAG_SHORT_INT64 20 +//#define TAG_UNUSED 21 +#define TAG_CNULL 22 +#define TAG_ARRAY1D 23 +#define TAG_SINGLETON 24 +#define TAG_MODULE 25 +#define TAG_TVAR 26 +#define TAG_METHOD_INSTANCE 27 +#define TAG_METHOD 28 +#define TAG_CODE_INSTANCE 29 +#define TAG_COMMONSYM 30 +#define TAG_NEARBYGLOBAL 31 +#define TAG_GLOBALREF 32 +#define TAG_CORE 33 +#define TAG_BASE 34 +#define TAG_BITYPENAME 35 +#define TAG_NEARBYMODULE 36 +#define TAG_INT32 37 +#define TAG_INT64 38 +#define TAG_UINT8 39 +#define TAG_VECTORTY 40 +#define TAG_PTRTY 41 +#define TAG_LONG_SSAVALUE 42 +#define TAG_LONG_METHODROOT 43 +#define TAG_LONG_EDGE 44 +#define TAG_SHORTER_INT64 45 +#define TAG_SHORT_INT32 46 +#define TAG_CALL1 47 +#define TAG_CALL2 48 +#define TAG_SHORT_BACKREF 49 +#define TAG_BACKREF 50 +#define TAG_UNIONALL 51 +#define TAG_GOTONODE 52 +#define TAG_QUOTENODE 53 +#define TAG_GENERAL 54 +#define TAG_GOTOIFNOT 55 +#define TAG_RETURNNODE 56 +#define TAG_ARGUMENT 57 +#define TAG_RELOC_METHODROOT 58 +#define TAG_BINDING 59 +#define TAG_MEMORYT 60 +#define TAG_ENTERNODE 61 + +#define LAST_TAG 61 + + typedef struct { ios_t *s; // method we're compressing for @@ -38,29 +97,29 @@ static jl_value_t *deser_tag[256]; static htable_t common_symbol_tag; static jl_value_t *deser_symbols[256]; -void *jl_lookup_ser_tag(jl_value_t *v) +static void *jl_lookup_ser_tag(jl_value_t *v) { return ptrhash_get(&ser_tag, v); } -void *jl_lookup_common_symbol(jl_value_t *v) +static void *jl_lookup_common_symbol(jl_value_t *v) { return ptrhash_get(&common_symbol_tag, v); } -jl_value_t *jl_deser_tag(uint8_t tag) +static jl_value_t *jl_deser_tag(uint8_t tag) { return deser_tag[tag]; } -jl_value_t *jl_deser_symbol(uint8_t tag) +static jl_value_t *jl_deser_symbol(uint8_t tag) { return deser_symbols[tag]; } // --- encoding --- -static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED; +static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal); #define jl_encode_value(s, v) jl_encode_value_((s), (jl_value_t*)(v), 0) static void tagged_root(rle_reference *rr, jl_ircode_state *s, int i) @@ -69,7 +128,7 @@ static void tagged_root(rle_reference *rr, jl_ircode_state *s, int i) s->relocatability = 0; } -static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v) JL_GC_DISABLED +static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v) { jl_array_t *rs = s->method->roots; int i, l = jl_array_nrows(rs); @@ -142,7 +201,7 @@ static void jl_encode_as_indexed_root(jl_ircode_state *s, jl_value_t *v) } } -static void jl_encode_memory_slice(jl_ircode_state *s, jl_genericmemory_t *mem, size_t offset, size_t len) JL_GC_DISABLED +static void jl_encode_memory_slice(jl_ircode_state *s, jl_genericmemory_t *mem, size_t offset, size_t len) { jl_datatype_t *t = (jl_datatype_t*)jl_typetagof(mem); size_t i; @@ -180,7 +239,7 @@ static void jl_encode_memory_slice(jl_ircode_state *s, jl_genericmemory_t *mem, } } -static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED +static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) { size_t i; @@ -306,8 +365,11 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) } for (i = 0; i < l; i++) { int32_t e = jl_array_data(edges, int32_t)[i]; - if (e <= 20) - jl_encode_value(s, jl_box_int32(e)); + if (e <= 0 && e <= 20) { // 1-byte encodings + jl_value_t *ebox = jl_box_int32(e); + JL_GC_PROMISE_ROOTED(ebox); + jl_encode_value(s, ebox); + } else jl_encode_int32(s, e); } @@ -333,25 +395,39 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) } else if (jl_is_gotonode(v)) { write_uint8(s->s, TAG_GOTONODE); - jl_encode_value(s, jl_get_nth_field(v, 0)); + jl_value_t *f = jl_get_nth_field(v, 0); + JL_GC_PUSH1(&f); + jl_encode_value(s, f); + JL_GC_POP(); } else if (jl_is_gotoifnot(v)) { write_uint8(s->s, TAG_GOTOIFNOT); - jl_encode_value(s, jl_get_nth_field(v, 0)); - jl_encode_value(s, jl_get_nth_field(v, 1)); + jl_value_t *f = jl_get_nth_field_noalloc(v, 0); + JL_GC_PUSH1(&f); + jl_encode_value(s, f); + f = jl_get_nth_field(v, 1); + jl_encode_value(s, f); + JL_GC_POP(); } else if (jl_is_enternode(v)) { write_uint8(s->s, TAG_ENTERNODE); - jl_encode_value(s, jl_get_nth_field(v, 0)); - jl_encode_value(s, jl_get_nth_field(v, 1)); + jl_value_t *f = jl_get_nth_field(v, 0); + JL_GC_PUSH1(&f); + jl_encode_value(s, f); + f = jl_get_nth_field_noalloc(v, 1); + jl_encode_value(s, f); + JL_GC_POP(); } else if (jl_is_argument(v)) { write_uint8(s->s, TAG_ARGUMENT); - jl_encode_value(s, jl_get_nth_field(v, 0)); + jl_value_t *f = jl_get_nth_field(v, 0); + JL_GC_PUSH1(&f); + jl_encode_value(s, f); + JL_GC_POP(); } else if (jl_is_returnnode(v)) { write_uint8(s->s, TAG_RETURNNODE); - jl_encode_value(s, jl_get_nth_field(v, 0)); + jl_encode_value(s, jl_returnnode_value(v)); } else if (jl_is_quotenode(v)) { write_uint8(s->s, TAG_QUOTENODE); @@ -394,19 +470,15 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) write_int32(s->s, jl_string_len(v)); ios_write(s->s, jl_string_data(v), jl_string_len(v)); } - else if (as_literal && jl_is_array(v)) { + else if (as_literal && jl_is_array(v) && jl_array_ndims(v)) { jl_array_t *ar = (jl_array_t*)v; - if (jl_array_ndims(ar) == 1) { - write_uint8(s->s, TAG_ARRAY1D); - } - else { - write_uint8(s->s, TAG_ARRAY); - write_uint16(s->s, jl_array_ndims(ar)); - } - for (i = 0; i < jl_array_ndims(ar); i++) - jl_encode_value(s, jl_box_long(jl_array_dim(ar, i))); + write_uint8(s->s, TAG_ARRAY1D); + size_t l = jl_array_dim0(ar); + jl_value_t *lbox = jl_box_long(l); + JL_GC_PUSH1(&lbox); + jl_encode_value(s, lbox); + JL_GC_POP(); jl_encode_value(s, jl_typeof(ar)); - size_t l = jl_array_len(ar); const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(ar->ref.mem))->layout; size_t offset; if (layout->flags.arrayelem_isunion || layout->size == 0) @@ -419,7 +491,10 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) jl_genericmemory_t* m = (jl_genericmemory_t*)v; write_uint8(s->s, TAG_MEMORYT); jl_encode_value(s, (jl_datatype_t*)jl_typetagof(v)); - jl_encode_value(s, jl_box_long(m->length)); + jl_value_t *lbox = jl_box_long(m->length); + JL_GC_PUSH1(&lbox); + jl_encode_value(s, lbox); + JL_GC_POP(); jl_encode_memory_slice(s, m, 0, m->length); } else if (as_literal && jl_is_layout_opaque(((jl_datatype_t*)jl_typeof(v))->layout)) { @@ -428,16 +503,8 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) else if (as_literal || jl_is_uniontype(v) || jl_is_newvarnode(v) || jl_is_linenode(v) || jl_is_upsilonnode(v) || jl_is_pinode(v) || jl_is_slotnumber(v) || jl_is_ssavalue(v) || (jl_isbits(jl_typeof(v)) && jl_datatype_size(jl_typeof(v)) <= 64)) { + write_uint8(s->s, TAG_GENERAL); jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v); - size_t tsz = jl_datatype_size(t); - if (tsz <= 255) { - write_uint8(s->s, TAG_SHORT_GENERAL); - write_uint8(s->s, tsz); - } - else { - write_uint8(s->s, TAG_GENERAL); - write_int32(s->s, tsz); - } jl_encode_value(s, t); char *data = (char*)jl_data_ptr(v); @@ -492,34 +559,35 @@ static jl_code_info_flags_t code_info_flags(uint8_t propagate_inbounds, uint8_t // --- decoding --- -static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED; +static jl_value_t *jl_decode_value(jl_ircode_state *s); -static jl_value_t *jl_decode_value_svec(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED +static jl_value_t *jl_decode_value_svec(jl_ircode_state *s, uint8_t tag) { size_t i, len; if (tag == TAG_SVEC) len = read_uint8(s->s); else len = read_int32(s->s); - jl_svec_t *sv = jl_alloc_svec_uninit(len); - jl_value_t **data = jl_svec_data(sv); - for (i = 0; i < len; i++) { - data[i] = jl_decode_value(s); - } + jl_svec_t *sv = jl_alloc_svec(len); + JL_GC_PUSH1(&sv); + for (i = 0; i < len; i++) + jl_svecset(sv, i, jl_decode_value(s)); + JL_GC_POP(); return (jl_value_t*)sv; } -static jl_value_t *jl_decode_value_memory(jl_ircode_state *s, jl_value_t *mty, size_t nel) JL_GC_DISABLED +static jl_genericmemory_t *jl_decode_value_memory(jl_ircode_state *s, jl_value_t *mty, size_t nel) { jl_genericmemory_t *m = jl_alloc_genericmemory(mty, nel); + JL_GC_PUSH1(&m); const jl_datatype_layout_t *layout = ((jl_datatype_t*)mty)->layout; if (layout->flags.arrayelem_isboxed) { jl_value_t **data = (jl_value_t**)m->ptr; size_t i, numel = m->length; for (i = 0; i < numel; i++) { data[i] = jl_decode_value(s); + jl_gc_wb(m, data[i]); } - assert(jl_astaggedvalue(m)->bits.gc == GC_CLEAN); // gc is disabled } else if (layout->first_ptr >= 0) { size_t i, numel = m->length; @@ -534,49 +602,48 @@ static jl_value_t *jl_decode_value_memory(jl_ircode_state *s, jl_value_t *mty, s if ((char*)fld != start) ios_readall(s->s, start, (const char*)fld - start); *fld = jl_decode_value(s); + jl_gc_wb(m, fld); start = (char*)&fld[1]; } data += elsz; if (data != start) ios_readall(s->s, start, data - start); } - assert(jl_astaggedvalue(m)->bits.gc == GC_CLEAN); // gc is disabled } else { size_t extra = jl_genericmemory_isbitsunion(m) ? m->length : 0; size_t tot = m->length * layout->size + extra; ios_readall(s->s, (char*)m->ptr, tot); } - return (jl_value_t*)m; + JL_GC_POP(); + return m; } JL_DLLEXPORT jl_array_t *jl_alloc_array_nd(jl_value_t *atype, size_t *dims, size_t ndims); -static jl_value_t *jl_decode_value_array(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED +static jl_value_t *jl_decode_value_array1d(jl_ircode_state *s, uint8_t tag) { - int16_t i, ndims; - if (tag == TAG_ARRAY1D) - ndims = 1; - else - ndims = read_uint16(s->s); - size_t *dims = (size_t*)alloca(ndims * sizeof(size_t)); - size_t len = 1; - for (i = 0; i < ndims; i++) { - dims[i] = jl_unbox_long(jl_decode_value(s)); - len *= dims[i]; - } + int16_t ndims = 1; + size_t dim0 = jl_unbox_long(jl_decode_value(s)); + size_t len = dim0; jl_value_t *aty = jl_decode_value(s); - jl_array_t *a = jl_alloc_array_nd(aty, dims, ndims); - a->ref.mem = (jl_genericmemory_t*)jl_decode_value_memory(s, jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)aty, 0), 1), len); + JL_GC_PROMISE_ROOTED(aty); // (JL_ALWAYS_LEAFTYPE) + jl_genericmemory_t *mem = jl_decode_value_memory(s, jl_field_type_concrete((jl_datatype_t*)jl_field_type_concrete((jl_datatype_t*)aty, 0), 1), len); + JL_GC_PUSH1(&mem); + int tsz = sizeof(jl_array_t) + ndims*sizeof(size_t); + jl_array_t *a = (jl_array_t*)jl_gc_alloc(s->ptls, tsz, aty); + a->ref.mem = mem; const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(a->ref.mem))->layout; if (layout->flags.arrayelem_isunion || layout->size == 0) a->ref.ptr_or_offset = (void*)0; else a->ref.ptr_or_offset = a->ref.mem->ptr; + a->dimsize[0] = dim0; + JL_GC_POP(); return (jl_value_t*)a; } -static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED +static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) { size_t i, len; jl_sym_t *head = NULL; @@ -597,14 +664,18 @@ static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) JL_GC_D if (head == NULL) head = (jl_sym_t*)jl_decode_value(s); jl_expr_t *e = jl_exprn(head, len); + JL_GC_PUSH1(&e); jl_value_t **data = jl_array_ptr_data(e->args); + jl_value_t *owner = jl_array_owner(e->args); for (i = 0; i < len; i++) { data[i] = jl_decode_value(s); + jl_gc_wb(owner, data[i]); } + JL_GC_POP(); return (jl_value_t*)e; } -static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED +static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) { size_t i, len_e, len_v; if (tag == TAG_PHINODE) { @@ -614,9 +685,13 @@ static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) JL_GC_DI len_e = read_int32(s->s); len_v = read_int32(s->s); } - jl_array_t *e = jl_alloc_array_1d(jl_array_int32_type, len_e); - jl_array_t *v = jl_alloc_vec_any(len_v); - jl_value_t *phi = jl_new_struct(jl_phinode_type, e, v); + jl_array_t *e = NULL; + jl_array_t *v = NULL; + jl_value_t *phi = NULL; + JL_GC_PUSH3(&e, &v, &phi); + e = jl_alloc_array_1d(jl_array_int32_type, len_e); + v = jl_alloc_vec_any(len_v); + phi = jl_new_struct(jl_phinode_type, e, v); int32_t *data_e = jl_array_data(e, int32_t); for (i = 0; i < len_e; i++) { data_e[i] = jl_unbox_int32(jl_decode_value(s)); @@ -624,11 +699,13 @@ static jl_value_t *jl_decode_value_phi(jl_ircode_state *s, uint8_t tag) JL_GC_DI jl_value_t **data_v = jl_array_ptr_data(v); for (i = 0; i < len_v; i++) { data_v[i] = jl_decode_value(s); + jl_gc_wb(jl_array_owner(v), data_v[i]); } + JL_GC_POP(); return phi; } -static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED +static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag) { size_t i, len; if (tag == TAG_PHICNODE) @@ -636,41 +713,53 @@ static jl_value_t *jl_decode_value_phic(jl_ircode_state *s, uint8_t tag) JL_GC_D else len = read_int32(s->s); jl_array_t *v = jl_alloc_vec_any(len); - jl_value_t *phic = jl_new_struct(jl_phicnode_type, v); + jl_value_t *phic = (jl_value_t*)v; + JL_GC_PUSH1(&phic); + phic = jl_new_struct(jl_phicnode_type, v); jl_value_t **data = jl_array_ptr_data(v); for (i = 0; i < len; i++) { data[i] = jl_decode_value(s); + jl_gc_wb(jl_array_owner(v), data[i]); } + JL_GC_POP(); return phic; } -static jl_value_t *jl_decode_value_globalref(jl_ircode_state *s) JL_GC_DISABLED +static jl_value_t *jl_decode_value_globalref(jl_ircode_state *s) { - jl_value_t *mod = jl_decode_value(s); - jl_value_t *var = jl_decode_value(s); - return jl_module_globalref((jl_module_t*)mod, (jl_sym_t*)var); + jl_module_t *mod = (jl_module_t*)jl_decode_value(s); + JL_GC_PROMISE_ROOTED(mod); + jl_sym_t *var = (jl_sym_t*)jl_decode_value(s); + JL_GC_PROMISE_ROOTED(var); + return jl_module_globalref(mod, var); } -static jl_value_t *jl_decode_value_any(jl_ircode_state *s, uint8_t tag) JL_GC_DISABLED +static jl_value_t *jl_decode_value_any(jl_ircode_state *s) { - int32_t sz = (tag == TAG_SHORT_GENERAL ? read_uint8(s->s) : read_int32(s->s)); - jl_value_t *v = jl_gc_alloc(s->ptls, sz, NULL); - jl_set_typeof(v, (void*)(intptr_t)0xf50); jl_datatype_t *dt = (jl_datatype_t*)jl_decode_value(s); - if (dt->smalltag) + JL_GC_PROMISE_ROOTED(dt); // (JL_ALWAYS_LEAFTYPE) + // jl_new_struct_uninit + size_t sz = jl_datatype_size(dt); + jl_value_t *v = jl_gc_alloc(s->ptls, sz, dt); + if (dt->smalltag) // TODO: do we need this? jl_set_typetagof(v, dt->smalltag, 0); - else - jl_set_typeof(v, dt); char *data = (char*)jl_data_ptr(v); size_t i, np = dt->layout->npointers; char *start = data; - for (i = 0; i < np; i++) { - uint32_t ptr = jl_ptr_offset(dt, i); - jl_value_t **fld = &((jl_value_t**)data)[ptr]; - if ((char*)fld != start) - ios_readall(s->s, start, (const char*)fld - start); - *fld = jl_decode_value(s); - start = (char*)&fld[1]; + if (np) { + if (sz > 0) + memset(v, 0, sz); + JL_GC_PUSH1(&v); + for (i = 0; i < np; i++) { + uint32_t ptr = jl_ptr_offset(dt, i); + jl_value_t **fld = &((jl_value_t**)data)[ptr]; + if ((char*)fld != start) + ios_readall(s->s, start, (const char*)fld - start); + *fld = jl_decode_value(s); + jl_gc_wb(v, *fld); + start = (char*)&fld[1]; + } + JL_GC_POP(); } data += jl_datatype_size(dt); if (data != start) @@ -678,7 +767,7 @@ static jl_value_t *jl_decode_value_any(jl_ircode_state *s, uint8_t tag) JL_GC_DI return v; } -static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED +static jl_value_t *jl_decode_value(jl_ircode_state *s) { assert(!ios_eof(s->s)); jl_value_t *v; @@ -724,10 +813,12 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED case TAG_SLOTNUMBER: v = jl_box_slotnumber(read_uint16(s->s)); return v; - case TAG_ARRAY: JL_FALLTHROUGH; case TAG_ARRAY1D: - return jl_decode_value_array(s, tag); + case TAG_ARRAY1D: + return jl_decode_value_array1d(s, tag); case TAG_MEMORYT: - return jl_decode_value_memory(s, jl_decode_value(s), jl_unbox_long(jl_decode_value(s))); + v = jl_decode_value(s); + JL_GC_PROMISE_ROOTED(v); // (JL_ALWAYS_LEAFTYPE) + return (jl_value_t*)jl_decode_value_memory(s, v, jl_unbox_long(jl_decode_value(s))); case TAG_EXPR: JL_FALLTHROUGH; case TAG_LONG_EXPR: JL_FALLTHROUGH; case TAG_CALL1: JL_FALLTHROUGH; @@ -738,27 +829,47 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED case TAG_PHICNODE: JL_FALLTHROUGH; case TAG_LONG_PHICNODE: return jl_decode_value_phic(s, tag); case TAG_GOTONODE: JL_FALLTHROUGH; case TAG_QUOTENODE: + { v = jl_new_struct_uninit(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type); + JL_GC_PUSH1(&v); set_nth_field(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type, v, 0, jl_decode_value(s), 0); + JL_GC_POP(); return v; + } case TAG_GOTOIFNOT: + { v = jl_new_struct_uninit(jl_gotoifnot_type); + JL_GC_PUSH1(&v); set_nth_field(jl_gotoifnot_type, v, 0, jl_decode_value(s), 0); set_nth_field(jl_gotoifnot_type, v, 1, jl_decode_value(s), 0); + JL_GC_POP(); return v; + } case TAG_ENTERNODE: + { v = jl_new_struct_uninit(jl_enternode_type); + JL_GC_PUSH1(&v); set_nth_field(jl_enternode_type, v, 0, jl_decode_value(s), 0); set_nth_field(jl_enternode_type, v, 1, jl_decode_value(s), 0); + JL_GC_POP(); return v; + } case TAG_ARGUMENT: + { v = jl_new_struct_uninit(jl_argument_type); + JL_GC_PUSH1(&v); set_nth_field(jl_argument_type, v, 0, jl_decode_value(s), 0); + JL_GC_POP(); return v; + } case TAG_RETURNNODE: + { v = jl_new_struct_uninit(jl_returnnode_type); + JL_GC_PUSH1(&v); set_nth_field(jl_returnnode_type, v, 0, jl_decode_value(s), 0); + JL_GC_POP(); return v; + } case TAG_SHORTER_INT64: v = jl_box_int64((int16_t)read_uint16(s->s)); return v; @@ -777,9 +888,14 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED case TAG_UINT8: return jl_box_uint8(read_uint8(s->s)); case TAG_NEARBYGLOBAL: - assert(s->method != NULL); + { + jl_method_t *m = s->method; + assert(m != NULL); + JL_GC_PROMISE_ROOTED(m); v = jl_decode_value(s); - return jl_module_globalref(s->method->module, (jl_sym_t*)v); + JL_GC_PROMISE_ROOTED(v); // symbol + return jl_module_globalref(m->module, (jl_sym_t*)v); + } case TAG_NEARBYMODULE: assert(s->method != NULL); return (jl_value_t*)s->method->module; @@ -792,19 +908,29 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED case TAG_BASE: return (jl_value_t*)jl_base_module; case TAG_VECTORTY: + { v = jl_decode_value(s); - return jl_apply_type2((jl_value_t*)jl_array_type, v, jl_box_long(1)); + JL_GC_PUSH1(&v); + v = jl_apply_type2((jl_value_t*)jl_array_type, v, jl_box_long(1)); + JL_GC_POP(); + return v; + } case TAG_PTRTY: + { v = jl_decode_value(s); - return jl_apply_type1((jl_value_t*)jl_pointer_type, v); + JL_GC_PUSH1(&v); + v = jl_apply_type1((jl_value_t*)jl_pointer_type, v); + JL_GC_POP(); + return v; + } case TAG_STRING: n = read_int32(s->s); v = jl_alloc_string(n); ios_readall(s->s, jl_string_data(v), n); return v; default: - assert(tag == TAG_GENERAL || tag == TAG_SHORT_GENERAL); - return jl_decode_value_any(s, tag); + assert(tag == TAG_GENERAL); + return jl_decode_value_any(s); } } @@ -880,8 +1006,6 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) assert(jl_array_nrows(code->code) == codelocs_nstmts(code->debuginfo->codelocs) || jl_string_len(code->debuginfo->codelocs) == 0); ios_t dest; ios_mem(&dest, 0); - int en = jl_gc_enable(0); // Might GC - size_t i; if (m->roots == NULL) { m->roots = jl_alloc_vec_any(0); @@ -919,38 +1043,35 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) write_int32(s.s, (int32_t)nargs); } - for (i = 0; i < 5; i++) { - int copy = 1; - if (i == 1) { // skip debuginfo - assert(jl_field_offset(jl_code_info_type, i) == offsetof(jl_code_info_t, debuginfo)); - continue; - } - jl_encode_value_(&s, jl_get_nth_field((jl_value_t*)code, i), copy); - } + jl_encode_value_(&s, (jl_value_t*)code->code, 1); + jl_encode_value_(&s, (jl_value_t*)code->ssavaluetypes, 1); + jl_encode_value_(&s, (jl_value_t*)code->ssaflags, 1); // For opaque closure, also save the slottypes. We technically only need the first slot type, // but this is simpler for now. We may want to refactor where this gets stored in the future. if (m->is_for_opaque_closure) jl_encode_value_(&s, code->slottypes, 1); + jl_string_t *v = NULL; + JL_GC_PUSH1(&v); // Slotnames. For regular methods, we require that m->slot_syms matches the // CodeInfo's slotnames, so we do not need to save it here. - if (m->generator) + if (m->generator) { // can't optimize generated functions - jl_encode_value_(&s, (jl_value_t*)jl_compress_argnames(code->slotnames), 1); - else + v = jl_compress_argnames(code->slotnames); + jl_encode_value_(&s, (jl_value_t*)v, 1); + } + else { jl_encode_value(&s, jl_nothing); + } write_uint8(s.s, s.relocatability); ios_flush(s.s); - jl_string_t *v = jl_pchar_to_string(s.s->buf, s.s->size); + v = jl_pchar_to_string(s.s->buf, s.s->size); ios_close(s.s); - if (jl_array_nrows(m->roots) == 0) { + if (jl_array_nrows(m->roots) == 0) m->roots = NULL; - } - JL_GC_PUSH1(&v); - jl_gc_enable(en); JL_UNLOCK(&m->writelock); // Might GC JL_GC_POP(); @@ -965,12 +1086,10 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t JL_LOCK(&m->writelock); // protect the roots array (Might GC) assert(jl_is_method(m)); assert(jl_is_string(data)); - size_t i; ios_t src; ios_mem(&src, 0); ios_setbuf(&src, (char*)jl_string_data(data), jl_string_len(data), 0); src.size = jl_string_len(data); - int en = jl_gc_enable(0); // Might GC jl_ircode_state s = { &src, m, @@ -978,8 +1097,10 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t jl_current_task->ptls, 1 }; - jl_code_info_t *code = jl_new_code_info_uninit(); + jl_value_t *slotnames = NULL; + JL_GC_PUSH2(&code, &slotnames); + jl_code_info_flags_t flags; flags.packed = read_uint16(s.s); code->inlining = flags.bits.inlining; @@ -991,9 +1112,9 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t code->purity.bits = read_uint16(s.s); code->inlining_cost = read_uint16(s.s); - size_t nslots = read_int32(s.s); code->slotflags = jl_alloc_array_1d(jl_array_uint8_type, nslots); + jl_gc_wb(code, code->slotflags); ios_readall(s.s, jl_array_data(code->slotflags, char), nslots); if (flags.bits.nargsmatchesmethod) { @@ -1002,25 +1123,29 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t code->nargs = read_int32(s.s); } - for (i = 0; i < 5; i++) { - if (i == 1) // skip debuginfo - continue; - assert(jl_field_isptr(jl_code_info_type, i)); - jl_value_t **fld = (jl_value_t**)((char*)jl_data_ptr(code) + jl_field_offset(jl_code_info_type, i)); - *fld = jl_decode_value(&s); - } - if (m->is_for_opaque_closure) + code->code = (jl_array_t*)jl_decode_value(&s); + jl_gc_wb(code, code->code); + code->ssavaluetypes = jl_decode_value(&s); + jl_gc_wb(code, code->ssavaluetypes); + code->ssaflags = (jl_array_t*)jl_decode_value(&s); + jl_gc_wb(code, code->ssaflags); + + if (m->is_for_opaque_closure) { code->slottypes = jl_decode_value(&s); + jl_gc_wb(code, code->slottypes); + } - jl_value_t *slotnames = jl_decode_value(&s); + slotnames = jl_decode_value(&s); if (!jl_is_string(slotnames)) slotnames = m->slot_syms; code->slotnames = jl_uncompress_argnames(slotnames); + jl_gc_wb(code, code->slotnames); if (metadata) code->debuginfo = jl_atomic_load_relaxed(&metadata->debuginfo); else code->debuginfo = m->debuginfo; + jl_gc_wb(code, code->debuginfo); assert(code->debuginfo); assert(jl_array_nrows(code->code) == codelocs_nstmts(code->debuginfo->codelocs) || jl_string_len(code->debuginfo->codelocs) == 0); @@ -1029,10 +1154,7 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t assert(ios_getc(s.s) == -1); ios_close(s.s); - JL_GC_PUSH1(&code); - jl_gc_enable(en); JL_UNLOCK(&m->writelock); // Might GC - JL_GC_POP(); if (metadata) { code->parent = metadata->def; jl_gc_wb(code, code->parent); @@ -1043,6 +1165,7 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t code->edges = (jl_value_t*)s.edges; jl_gc_wb(code, s.edges); } + JL_GC_POP(); return code; } @@ -1470,7 +1593,7 @@ void jl_init_serializer(void) deser_tag[TAG_DATATYPE] = (jl_value_t*)jl_datatype_type; deser_tag[TAG_SLOTNUMBER] = (jl_value_t*)jl_slotnumber_type; deser_tag[TAG_SVEC] = (jl_value_t*)jl_simplevector_type; - deser_tag[TAG_ARRAY] = (jl_value_t*)jl_array_type; + deser_tag[TAG_ARRAY1D] = (jl_value_t*)jl_array_type; deser_tag[TAG_MEMORYT] = (jl_value_t*)jl_genericmemory_type; deser_tag[TAG_EXPR] = (jl_value_t*)jl_expr_type; deser_tag[TAG_PHINODE] = (jl_value_t*)jl_phinode_type; diff --git a/src/serialize.h b/src/serialize.h index 3aa82a1d09a9b..549c1588073ff 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -7,69 +7,6 @@ extern "C" { #endif -#define TAG_SYMBOL 2 -#define TAG_SSAVALUE 3 -#define TAG_DATATYPE 4 -#define TAG_SLOTNUMBER 5 -#define TAG_SVEC 6 -#define TAG_ARRAY 7 -#define TAG_NULL 8 -#define TAG_EXPR 9 -#define TAG_PHINODE 10 -#define TAG_PHICNODE 11 -#define TAG_LONG_SYMBOL 12 -#define TAG_LONG_SVEC 13 -#define TAG_LONG_EXPR 14 -#define TAG_LONG_PHINODE 15 -#define TAG_LONG_PHICNODE 16 -#define TAG_METHODROOT 17 -#define TAG_EDGE 18 -#define TAG_STRING 19 -#define TAG_SHORT_INT64 20 -#define TAG_SHORT_GENERAL 21 -#define TAG_CNULL 22 -#define TAG_ARRAY1D 23 -#define TAG_SINGLETON 24 -#define TAG_MODULE 25 -#define TAG_TVAR 26 -#define TAG_METHOD_INSTANCE 27 -#define TAG_METHOD 28 -#define TAG_CODE_INSTANCE 29 -#define TAG_COMMONSYM 30 -#define TAG_NEARBYGLOBAL 31 -#define TAG_GLOBALREF 32 -#define TAG_CORE 33 -#define TAG_BASE 34 -#define TAG_BITYPENAME 35 -#define TAG_NEARBYMODULE 36 -#define TAG_INT32 37 -#define TAG_INT64 38 -#define TAG_UINT8 39 -#define TAG_VECTORTY 40 -#define TAG_PTRTY 41 -#define TAG_LONG_SSAVALUE 42 -#define TAG_LONG_METHODROOT 43 -#define TAG_LONG_EDGE 44 -#define TAG_SHORTER_INT64 45 -#define TAG_SHORT_INT32 46 -#define TAG_CALL1 47 -#define TAG_CALL2 48 -#define TAG_SHORT_BACKREF 49 -#define TAG_BACKREF 50 -#define TAG_UNIONALL 51 -#define TAG_GOTONODE 52 -#define TAG_QUOTENODE 53 -#define TAG_GENERAL 54 -#define TAG_GOTOIFNOT 55 -#define TAG_RETURNNODE 56 -#define TAG_ARGUMENT 57 -#define TAG_RELOC_METHODROOT 58 -#define TAG_BINDING 59 -#define TAG_MEMORYT 60 -#define TAG_ENTERNODE 61 - -#define LAST_TAG 61 - #define write_uint8(s, n) ios_putc((n), (s)) #define read_uint8(s) ((uint8_t)ios_getc((s))) #define write_int8(s, n) write_uint8((s), (n)) @@ -137,12 +74,6 @@ static inline uint32_t read_uint32(ios_t *s) JL_NOTSAFEPOINT #define read_uint(s) read_uint32(s) #endif - -void *jl_lookup_ser_tag(jl_value_t *v); -void *jl_lookup_common_symbol(jl_value_t *v); -jl_value_t *jl_deser_tag(uint8_t tag); -jl_value_t *jl_deser_symbol(uint8_t tag); - #ifdef __cplusplus } #endif From 42e14d6983ac855726b31b753adba05aaefcf884 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 2 Dec 2024 12:32:16 -0500 Subject: [PATCH 06/25] ircode: small optimization for nearby ssavalue Since most ssavalue are used just after their def, this gives a small memory savings on compressed IR (a fraction of a percent). --- src/ircode.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/ircode.c b/src/ircode.c index d3137ce26edef..de27f8dbeefca 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -21,7 +21,7 @@ extern "C" { #define TAG_DATATYPE 4 #define TAG_SLOTNUMBER 5 #define TAG_SVEC 6 -// #define TAG_UNUSED 7 +#define TAG_NEARBYSSAVALUE 7 #define TAG_NULL 8 #define TAG_EXPR 9 #define TAG_PHINODE 10 @@ -82,6 +82,7 @@ extern "C" { typedef struct { ios_t *s; + size_t ssaid; // method we're compressing for jl_method_t *method; jl_svec_t *edges; @@ -307,6 +308,10 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) jl_encode_value(s, jl_globalref_name(v)); } } + else if (jl_is_ssavalue(v) && s->ssaid - ((jl_ssavalue_t*)v)->id < 256) { + write_uint8(s->s, TAG_NEARBYSSAVALUE); + write_uint8(s->s, s->ssaid - ((jl_ssavalue_t*)v)->id); + } else if (jl_is_ssavalue(v) && ((jl_ssavalue_t*)v)->id < 256 && ((jl_ssavalue_t*)v)->id >= 0) { write_uint8(s->s, TAG_SSAVALUE); write_uint8(s->s, ((jl_ssavalue_t*)v)->id); @@ -807,6 +812,9 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) case TAG_SSAVALUE: v = jl_box_ssavalue(read_uint8(s->s)); return v; + case TAG_NEARBYSSAVALUE: + v = jl_box_ssavalue(s->ssaid - read_uint8(s->s)); + return v; case TAG_LONG_SSAVALUE: v = jl_box_ssavalue(read_uint16(s->s)); return v; @@ -1014,6 +1022,7 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) jl_value_t *edges = code->edges; jl_ircode_state s = { &dest, + 0, m, (!isdef && jl_is_svec(edges)) ? (jl_svec_t*)edges : jl_emptysvec, jl_current_task->ptls, @@ -1043,7 +1052,13 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) write_int32(s.s, (int32_t)nargs); } - jl_encode_value_(&s, (jl_value_t*)code->code, 1); + size_t i, l = jl_array_dim0(code->code); + write_uint64(s.s, l); + for (i = 0; i < l; i++) { + s.ssaid = i; + jl_encode_value(&s, jl_array_ptr_ref(code->code, i)); + } + s.ssaid = 0; jl_encode_value_(&s, (jl_value_t*)code->ssavaluetypes, 1); jl_encode_value_(&s, (jl_value_t*)code->ssaflags, 1); @@ -1092,6 +1107,7 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t src.size = jl_string_len(data); jl_ircode_state s = { &src, + 0, m, metadata == NULL ? NULL : jl_atomic_load_relaxed(&metadata->edges), jl_current_task->ptls, @@ -1123,8 +1139,14 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t code->nargs = read_int32(s.s); } - code->code = (jl_array_t*)jl_decode_value(&s); + size_t i, n = read_uint64(s.s); + code->code = jl_alloc_array_1d(jl_array_any_type, n); jl_gc_wb(code, code->code); + for (i = 0; i < n; i++) { + s.ssaid = i; + jl_array_ptr_set(code->code, i, jl_decode_value(&s)); + } + s.ssaid = 0; code->ssavaluetypes = jl_decode_value(&s); jl_gc_wb(code, code->ssavaluetypes); code->ssaflags = (jl_array_t*)jl_decode_value(&s); From f1b0b010dd20591a013c71d8f3f7a09503e55baf Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 2 Dec 2024 18:02:41 -0500 Subject: [PATCH 07/25] Fix scope of hoisted signature-local variables (#56712) When we declare inner methods, e.g. the `f` in ``` function fs() f(lhs::Integer) = 1 f(lhs::Integer, rhs::(local x=Integer; x)) = 2 return f end ``` we must hoist the definition of the (appropriately mangled) generic function `f` to top-level, including all variables that were used in the signature definition of `f`. This situation is a bit unique in the language because it uses inner function scope, but gets executed in toplevel scope. For example, you're not allowed to use a local of the inner function in the signature definition: ``` julia> function fs() local x=Integer f(lhs::Integer, rhs::x) = 2 return f end ERROR: syntax: local variable x cannot be used in closure declaration Stacktrace: [1] top-level scope @ REPL[3]:1 ``` In particular, the restriction is signature-local: ``` julia> function fs() f(rhs::(local x=Integer; x)) = 1 f(lhs::Integer, rhs::x) = 2 return f end ERROR: syntax: local variable x cannot be used in closure declaration Stacktrace: [1] top-level scope @ REPL[4]:1 ``` There's a special intermediate form `moved-local` that gets generated for this definition. In c6c3d72d1cbddb3d27e0df0e739bb27dd709a413, this form stopped getting generated for certain inner methods. I suspect this happened because of the incorrect assumption that the set of moved locals is being computed over all signatures, rather than being a per-signature property. The result of all of this was that this is one of the few places where lowering still generated a symbol as the lhs of an assignment for a global (instead of globalref), because the code that generates the assignment assumes it's a local, but the later pass doesn't know this. Because we still retain the code for this from before we started using globalref consistently, this wasn't generally causing a problems, except possibly leaking a global (or potentially assigning to a global when this wasn't intended). However, in follow on work, I want to make use of knowing whether the LHS is a global or local in lowering, so this was causing me trouble. Fix all of this by putting back the `moved-local` where it was dropped. Fixes #56711 --- src/julia-syntax.scm | 1 + test/syntax.jl | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index 72e97da3c2daa..852d5eb4d6f86 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -4279,6 +4279,7 @@ f(x) = yt(x) (if (or exists (and short (pair? alldefs))) `(toplevel-butfirst (null) + ,@(map (lambda (v) `(moved-local ,v)) moved-vars) ,@sp-inits ,@mk-method (latestworld)) diff --git a/test/syntax.jl b/test/syntax.jl index d9d311ac6615d..0fb752bae480f 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -4033,3 +4033,11 @@ end @test isa(create_inner_f_no_methods(), Function) @test length(methods(create_inner_f_no_methods())) == 0 @test Base.invoke_in_world(first(methods(create_inner_f_one_method)).primary_world, create_inner_f_one_method()) == 1 + +# Issue 56711 - Scope of signature hoisting +function fs56711() + f(lhs::Integer) = 1 + f(lhs::Integer, rhs::(local x_should_not_be_defined=Integer; x_should_not_be_defined)) = 2 + return f +end +@test !@isdefined(x_should_not_be_defined) From 1b37a2f9e0afc5f684e0b2a0af17cd9526aae529 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 2 Dec 2024 20:09:26 -0500 Subject: [PATCH 08/25] ircode: avoid serializing ssaflags in the common case when they are all zero When not all-zero, run-length encoding would also probably be great here for lowered code (before inference). --- Compiler/test/interpreter_exec.jl | 6 +++--- src/ircode.c | 30 +++++++++++++++++++++++------- src/julia_internal.h | 1 + 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/Compiler/test/interpreter_exec.jl b/Compiler/test/interpreter_exec.jl index 4972df1a27202..b1d450f8f4286 100644 --- a/Compiler/test/interpreter_exec.jl +++ b/Compiler/test/interpreter_exec.jl @@ -23,7 +23,7 @@ let m = Meta.@lower 1 + 1 ] nstmts = length(src.code) src.ssavaluetypes = nstmts - src.ssaflags = fill(UInt8(0x00), nstmts) + src.ssaflags = fill(zero(UInt32), nstmts) src.debuginfo = Core.DebugInfo(:none) Compiler.verify_ir(Compiler.inflate_ir(src)) global test29262 = true @@ -63,7 +63,7 @@ let m = Meta.@lower 1 + 1 ] nstmts = length(src.code) src.ssavaluetypes = nstmts - src.ssaflags = fill(UInt8(0x00), nstmts) + src.ssaflags = fill(zero(UInt32), nstmts) src.debuginfo = Core.DebugInfo(:none) m.args[1] = copy(src) Compiler.verify_ir(Compiler.inflate_ir(src)) @@ -103,7 +103,7 @@ let m = Meta.@lower 1 + 1 ] nstmts = length(src.code) src.ssavaluetypes = nstmts - src.ssaflags = fill(UInt8(0x00), nstmts) + src.ssaflags = fill(zero(UInt32), nstmts) src.debuginfo = Core.DebugInfo(:none) Compiler.verify_ir(Compiler.inflate_ir(src)) global test29262 = true diff --git a/src/ircode.c b/src/ircode.c index de27f8dbeefca..9e64e3fe2b574 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -549,7 +549,8 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) static jl_code_info_flags_t code_info_flags(uint8_t propagate_inbounds, uint8_t has_fcall, uint8_t nospecializeinfer, uint8_t isva, - uint8_t inlining, uint8_t constprop, uint8_t nargsmatchesmethod) + uint8_t inlining, uint8_t constprop, uint8_t nargsmatchesmethod, + jl_array_t *ssaflags) { jl_code_info_flags_t flags; flags.bits.propagate_inbounds = propagate_inbounds; @@ -559,6 +560,11 @@ static jl_code_info_flags_t code_info_flags(uint8_t propagate_inbounds, uint8_t flags.bits.inlining = inlining; flags.bits.constprop = constprop; flags.bits.nargsmatchesmethod = nargsmatchesmethod; + flags.bits.has_ssaflags = 0; + const uint32_t *ssaflag_data = jl_array_data(ssaflags, uint32_t); + for (size_t i = 0, l = jl_array_dim0(ssaflags); i < l; i++) + if (ssaflag_data[i]) + flags.bits.has_ssaflags = 1; return flags; } @@ -1033,7 +1039,8 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) jl_code_info_flags_t flags = code_info_flags(code->propagate_inbounds, code->has_fcall, code->nospecializeinfer, code->isva, code->inlining, code->constprop, - nargsmatchesmethod); + nargsmatchesmethod, + code->ssaflags); write_uint16(s.s, checked_size(flags.packed, IR_DATASIZE_FLAGS)); write_uint16(s.s, checked_size(code->purity.bits, IR_DATASIZE_PURITY)); write_uint16(s.s, checked_size(code->inlining_cost, IR_DATASIZE_INLINING_COST)); @@ -1060,7 +1067,11 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) } s.ssaid = 0; jl_encode_value_(&s, (jl_value_t*)code->ssavaluetypes, 1); - jl_encode_value_(&s, (jl_value_t*)code->ssaflags, 1); + assert(jl_typetagis(code->ssaflags, jl_array_uint32_type)); + assert(jl_array_dim0(code->ssaflags) == l); + const uint32_t *ssaflags_data = jl_array_data(code->ssaflags, uint32_t); + if (flags.bits.has_ssaflags) + ios_write(s.s, (const char*)ssaflags_data, l * sizeof(*ssaflags_data)); // For opaque closure, also save the slottypes. We technically only need the first slot type, // but this is simpler for now. We may want to refactor where this gets stored in the future. @@ -1139,18 +1150,23 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t code->nargs = read_int32(s.s); } - size_t i, n = read_uint64(s.s); - code->code = jl_alloc_array_1d(jl_array_any_type, n); + size_t i, l = read_uint64(s.s); + code->code = jl_alloc_array_1d(jl_array_any_type, l); jl_gc_wb(code, code->code); - for (i = 0; i < n; i++) { + for (i = 0; i < l; i++) { s.ssaid = i; jl_array_ptr_set(code->code, i, jl_decode_value(&s)); } s.ssaid = 0; code->ssavaluetypes = jl_decode_value(&s); jl_gc_wb(code, code->ssavaluetypes); - code->ssaflags = (jl_array_t*)jl_decode_value(&s); + code->ssaflags = jl_alloc_array_1d(jl_array_uint32_type, l); jl_gc_wb(code, code->ssaflags); + uint32_t *ssaflags_data = jl_array_data(code->ssaflags, uint32_t); + if (flags.bits.has_ssaflags) + ios_readall(s.s, (char*)ssaflags_data, l * sizeof(*ssaflags_data)); + else + memset(ssaflags_data, 0, l * sizeof(*ssaflags_data)); if (m->is_for_opaque_closure) { code->slottypes = jl_decode_value(&s); diff --git a/src/julia_internal.h b/src/julia_internal.h index e081c94329deb..2178f603441e0 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -651,6 +651,7 @@ typedef struct { uint16_t nargsmatchesmethod:1; uint16_t inlining:2; // 0 = use heuristic; 1 = aggressive; 2 = none uint16_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none + uint16_t has_ssaflags:1; } jl_code_info_flags_bitfield_t; typedef union { From efa917e8775cd40fdd74b657d1e5d2db2342cd07 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 2 Dec 2024 20:28:05 -0500 Subject: [PATCH 09/25] Extend `invoke` to accept CodeInstance (#56660) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an alternative mechanism to #56650 that largely achieves the same result, but by hooking into `invoke` rather than a generated function. They are orthogonal mechanisms, and its possible we want both. However, in #56650, both Jameson and Valentin were skeptical of the generated function signature bottleneck. This PR is sort of a hybrid of mechanism in #52964 and what I proposed in https://github.com/JuliaLang/julia/pull/56650#issuecomment-2493800877. In particular, this PR: 1. Extends `invoke` to support a CodeInstance in place of its usual `types` argument. 2. Adds a new `typeinf` optimized generic. The semantics of this optimized generic allow the compiler to instead call a companion `typeinf_edge` function, allowing a mid-inference interpreter switch (like #52964), without being forced through a concrete signature bottleneck. However, if calling `typeinf_edge` does not work (e.g. because the compiler version is mismatched), this still has well defined semantics, you just don't get inference support. The additional benefit of the `typeinf` optimized generic is that it lets custom cache owners tell the runtime how to "cure" code instances that have lost their native code. Currently the runtime only knows how to do that for `owner == nothing` CodeInstances (by re-running inference). This extension is not implemented, but the idea is that the runtime would be permitted to call the `typeinf` optimized generic on the dead CodeInstance's `owner` and `def` fields to obtain a cured CodeInstance (or a user-actionable error from the plugin). This PR includes an implementation of `with_new_compiler` from #56650. This PR includes just enough compiler support to make the compiler optimize this to the same code that #56650 produced: ``` julia> @code_typed with_new_compiler(sin, 1.0) CodeInfo( 1 ─ $(Expr(:foreigncall, :(:jl_get_tls_world_age), UInt64, svec(), 0, :(:ccall)))::UInt64 │ %2 = builtin Core.getfield(args, 1)::Float64 │ %3 = invoke sin(%2::Float64)::Float64 └── return %3 ) => Float64 ``` However, the implementation here is extremely incomplete. I'm putting it up only as a directional sketch to see if people prefer it over #56650. If so, I would prepare a cleaned up version of this PR that has the optimized generics as well as the curing support, but not the full inference integration (which needs a fair bit more work). --- .../extras/CompilerDevTools/Manifest.toml | 15 +++++ Compiler/extras/CompilerDevTools/Project.toml | 5 ++ .../CompilerDevTools/src/CompilerDevTools.jl | 56 +++++++++++++++++++ Compiler/src/abstractinterpretation.jl | 48 +++++++++++++--- Compiler/src/abstractlattice.jl | 2 +- Compiler/src/bootstrap.jl | 10 +++- Compiler/src/stmtinfo.jl | 11 ++++ Compiler/src/utilities.jl | 4 +- NEWS.md | 2 + base/docs/basedocs.jl | 17 ++++++ base/optimized_generics.jl | 27 +++++++++ src/builtins.c | 22 ++++++++ src/interpreter.c | 24 +++++++- test/core.jl | 14 +++++ 14 files changed, 242 insertions(+), 15 deletions(-) create mode 100644 Compiler/extras/CompilerDevTools/Manifest.toml create mode 100644 Compiler/extras/CompilerDevTools/Project.toml create mode 100644 Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl diff --git a/Compiler/extras/CompilerDevTools/Manifest.toml b/Compiler/extras/CompilerDevTools/Manifest.toml new file mode 100644 index 0000000000000..bcc78f1ded34a --- /dev/null +++ b/Compiler/extras/CompilerDevTools/Manifest.toml @@ -0,0 +1,15 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.12.0-DEV" +manifest_format = "2.0" +project_hash = "84f495a1bf065c95f732a48af36dd0cd2cefb9d5" + +[[deps.Compiler]] +path = "../.." +uuid = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1" +version = "0.0.2" + +[[deps.CompilerDevTools]] +path = "." +uuid = "92b2d91f-d2bd-4c05-9214-4609ac33433f" +version = "0.0.0" diff --git a/Compiler/extras/CompilerDevTools/Project.toml b/Compiler/extras/CompilerDevTools/Project.toml new file mode 100644 index 0000000000000..a2749a9a56a84 --- /dev/null +++ b/Compiler/extras/CompilerDevTools/Project.toml @@ -0,0 +1,5 @@ +name = "CompilerDevTools" +uuid = "92b2d91f-d2bd-4c05-9214-4609ac33433f" + +[deps] +Compiler = "807dbc54-b67e-4c79-8afb-eafe4df6f2e1" diff --git a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl new file mode 100644 index 0000000000000..cd3f7b7b4bdac --- /dev/null +++ b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl @@ -0,0 +1,56 @@ +module CompilerDevTools + +using Compiler +using Core.IR + +struct SplitCacheOwner; end +struct SplitCacheInterp <: Compiler.AbstractInterpreter + world::UInt + inf_params::Compiler.InferenceParams + opt_params::Compiler.OptimizationParams + inf_cache::Vector{Compiler.InferenceResult} + function SplitCacheInterp(; + world::UInt = Base.get_world_counter(), + inf_params::Compiler.InferenceParams = Compiler.InferenceParams(), + opt_params::Compiler.OptimizationParams = Compiler.OptimizationParams(), + inf_cache::Vector{Compiler.InferenceResult} = Compiler.InferenceResult[]) + new(world, inf_params, opt_params, inf_cache) + end +end + +Compiler.InferenceParams(interp::SplitCacheInterp) = interp.inf_params +Compiler.OptimizationParams(interp::SplitCacheInterp) = interp.opt_params +Compiler.get_inference_world(interp::SplitCacheInterp) = interp.world +Compiler.get_inference_cache(interp::SplitCacheInterp) = interp.inf_cache +Compiler.cache_owner(::SplitCacheInterp) = SplitCacheOwner() + +import Core.OptimizedGenerics.CompilerPlugins: typeinf, typeinf_edge +@eval @noinline typeinf(::SplitCacheOwner, mi::MethodInstance, source_mode::UInt8) = + Base.invoke_in_world(which(typeinf, Tuple{SplitCacheOwner, MethodInstance, UInt8}).primary_world, Compiler.typeinf_ext, SplitCacheInterp(; world=Base.tls_world_age()), mi, source_mode) + +@eval @noinline function typeinf_edge(::SplitCacheOwner, mi::MethodInstance, parent_frame::Compiler.InferenceState, world::UInt, source_mode::UInt8) + # TODO: This isn't quite right, we're just sketching things for now + interp = SplitCacheInterp(; world) + Compiler.typeinf_edge(interp, mi.def, mi.specTypes, Core.svec(), parent_frame, false, false) +end + +# TODO: This needs special compiler support to properly case split for multiple +# method matches, etc. +@noinline function mi_for_tt(tt, world=Base.tls_world_age()) + interp = SplitCacheInterp(; world) + match, _ = Compiler.findsup(tt, Compiler.method_table(interp)) + Base.specialize_method(match) +end + +function with_new_compiler(f, args...) + tt = Base.signature_type(f, typeof(args)) + world = Base.tls_world_age() + new_compiler_ci = Core.OptimizedGenerics.CompilerPlugins.typeinf( + SplitCacheOwner(), mi_for_tt(tt), Compiler.SOURCE_MODE_ABI + ) + invoke(f, new_compiler_ci, args...) +end + +export with_new_compiler + +end diff --git a/Compiler/src/abstractinterpretation.jl b/Compiler/src/abstractinterpretation.jl index 5946adf80ad52..ffb4f4312cdcf 100644 --- a/Compiler/src/abstractinterpretation.jl +++ b/Compiler/src/abstractinterpretation.jl @@ -2218,16 +2218,46 @@ function abstract_invoke(interp::AbstractInterpreter, arginfo::ArgInfo, si::Stmt ft = widenconst(ft′) ft === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())) types = argtype_by_index(argtypes, 3) - if types isa Const && types.val isa Method - method = types.val::Method - types = method # argument value - lookupsig = method.sig # edge kind - argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft)) - nargtype = typeintersect(lookupsig, argtype) - nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())) - nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below + if types isa Const && types.val isa Union{Method, CodeInstance} + method_or_ci = types.val + if isa(method_or_ci, CodeInstance) + our_world = sv.world.this + argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft)) + sig = method_or_ci.def.specTypes + exct = method_or_ci.exctype + if !hasintersect(argtype, sig) + return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())) + elseif !(argtype <: sig) + exct = Union{exct, TypeError} + end + callee_valid_range = WorldRange(method_or_ci.min_world, method_or_ci.max_world) + if !(our_world in callee_valid_range) + if our_world < first(callee_valid_range) + update_valid_age!(sv, WorldRange(first(sv.world.valid_worlds), first(callee_valid_range)-1)) + else + update_valid_age!(sv, WorldRange(last(callee_valid_range)+1, last(sv.world.valid_worlds))) + end + return Future(CallMeta(Bottom, ErrorException, EFFECTS_THROWS, NoCallInfo())) + end + # TODO: When we add curing, we may want to assume this is nothrow + if (method_or_ci.owner === Nothing && method_ir_ci.def.def isa Method) + exct = Union{exct, ErrorException} + end + update_valid_age!(sv, callee_valid_range) + return Future(CallMeta(method_or_ci.rettype, exct, Effects(decode_effects(method_or_ci.ipo_purity_bits), nothrow=(exct===Bottom)), + InvokeCICallInfo(method_or_ci))) + else + method = method_or_ci::Method + types = method # argument value + lookupsig = method.sig # edge kind + argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft)) + nargtype = typeintersect(lookupsig, argtype) + nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())) + nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below + # Fall through to generic invoke handling + end else - widenconst(types) >: Method && return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) + widenconst(types) >: Union{Method, CodeInstance} && return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3), false) isexact || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) unwrapped = unwrap_unionall(types) diff --git a/Compiler/src/abstractlattice.jl b/Compiler/src/abstractlattice.jl index c1f3050739170..7a9cff8918175 100644 --- a/Compiler/src/abstractlattice.jl +++ b/Compiler/src/abstractlattice.jl @@ -229,7 +229,7 @@ end if isa(t, Const) # don't consider mutable values useful constants val = t.val - return isa(val, Symbol) || isa(val, Type) || isa(val, Method) || !ismutable(val) + return isa(val, Symbol) || isa(val, Type) || isa(val, Method) || isa(val, CodeInstance) || !ismutable(val) end isa(t, PartialTypeVar) && return false # this isn't forwardable return is_const_prop_profitable_arg(widenlattice(𝕃), t) diff --git a/Compiler/src/bootstrap.jl b/Compiler/src/bootstrap.jl index 7ee439cc7ac67..475c53e317152 100644 --- a/Compiler/src/bootstrap.jl +++ b/Compiler/src/bootstrap.jl @@ -5,7 +5,15 @@ # especially try to make sure any recursive and leaf functions have concrete signatures, # since we won't be able to specialize & infer them at runtime -activate_codegen!() = ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel) +function activate_codegen!() + ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel) + Core.eval(Compiler, quote + let typeinf_world_age = Base.tls_world_age() + @eval Core.OptimizedGenerics.CompilerPlugins.typeinf(::Nothing, mi::MethodInstance, source_mode::UInt8) = + Base.invoke_in_world($(Expr(:$, :typeinf_world_age)), typeinf_ext_toplevel, mi, Base.tls_world_age(), source_mode) + end + end) +end function bootstrap!() let time() = ccall(:jl_clock_now, Float64, ()) diff --git a/Compiler/src/stmtinfo.jl b/Compiler/src/stmtinfo.jl index 830bfa02d2d99..9f0f1f38d4c8a 100644 --- a/Compiler/src/stmtinfo.jl +++ b/Compiler/src/stmtinfo.jl @@ -268,6 +268,17 @@ end add_edges_impl(edges::Vector{Any}, info::UnionSplitApplyCallInfo) = for split in info.infos; add_edges!(edges, split); end +""" + info::InvokeCICallInfo + +Represents a resolved call to `Core.invoke` targeting a `Core.CodeInstance` +""" +struct InvokeCICallInfo <: CallInfo + edge::CodeInstance +end +add_edges_impl(edges::Vector{Any}, info::InvokeCICallInfo) = + add_one_edge!(edges, info.edge) + """ info::InvokeCallInfo diff --git a/Compiler/src/utilities.jl b/Compiler/src/utilities.jl index 29f3dfa4afd4a..da20f9aafbfb2 100644 --- a/Compiler/src/utilities.jl +++ b/Compiler/src/utilities.jl @@ -54,8 +54,8 @@ function count_const_size(@nospecialize(x), count_self::Bool = true) # No definite size (isa(x, GenericMemory) || isa(x, String) || isa(x, SimpleVector)) && return MAX_INLINE_CONST_SIZE + 1 - if isa(x, Module) || isa(x, Method) - # We allow modules and methods, because we already assume they are externally + if isa(x, Module) || isa(x, Method) || isa(x, CodeInstance) + # We allow modules, methods and CodeInstance, because we already assume they are externally # rooted, so we count their contents as 0 size. return sizeof(Ptr{Cvoid}) end diff --git a/NEWS.md b/NEWS.md index c1d5f38f337b0..b77a786c24823 100644 --- a/NEWS.md +++ b/NEWS.md @@ -103,6 +103,8 @@ New library features * New `ltruncate`, `rtruncate` and `ctruncate` functions for truncating strings to text width, accounting for char widths ([#55351]) * `isless` (and thus `cmp`, sorting, etc.) is now supported for zero-dimensional `AbstractArray`s ([#55772]) * `invoke` now supports passing a Method instead of a type signature making this interface somewhat more flexible for certain uncommon use cases ([#56692]). +* `invoke` now supports passing a CodeInstance instead of a type, which can enable +certain compiler plugin workflows ([#56660]). Standard library changes ------------------------ diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl index 5119ceaf2164a..141950f5e92ff 100644 --- a/base/docs/basedocs.jl +++ b/base/docs/basedocs.jl @@ -2031,6 +2031,7 @@ applicable """ invoke(f, argtypes::Type, args...; kwargs...) invoke(f, argtypes::Method, args...; kwargs...) + invoke(f, argtypes::CodeInstance, args...; kwargs...) Invoke a method for the given generic function `f` matching the specified types `argtypes` on the specified arguments `args` and passing the keyword arguments `kwargs`. The arguments `args` must @@ -2056,6 +2057,22 @@ Note in particular that the specified `Method` may be entirely unreachable from If the method is part of the ordinary method table, this call behaves similar to `invoke(f, method.sig, args...)`. +!!! compat "Julia 1.12" + Passing a `Method` requires Julia 1.12. + +# Passing a `CodeInstance` instead of a signature +The `argtypes` argument may be a `CodeInstance`, bypassing both method lookup and specialization. +The semantics of this invocation are similar to a function pointer call of the `CodeInstance`'s +`invoke` pointer. It is an error to invoke a `CodeInstance` with arguments that do not match its +parent MethodInstance or from a world age not included in the `min_world`/`max_world` range. +It is undefined behavior to invoke a CodeInstance whose behavior does not match the constraints +specified in its fields. For some code instances with `owner !== nothing` (i.e. those generated +by external compilers), it may be an error to invoke them after passing through precompilation. +This is an advanced interface intended for use with external compiler plugins. + +!!! compat "Julia 1.12" + Passing a `CodeInstance` requires Julia 1.12. + # Examples ```jldoctest julia> f(x::Real) = x^2; diff --git a/base/optimized_generics.jl b/base/optimized_generics.jl index 86b54a294564d..c0b953777ca94 100644 --- a/base/optimized_generics.jl +++ b/base/optimized_generics.jl @@ -54,4 +54,31 @@ module KeyValue function get end end +# Compiler-recognized intrinsics for compiler plugins +""" + module CompilerPlugins + +Implements a pair of functions `typeinf`/`typeinf_edge`. When the optimizer sees +a call to `typeinf`, it has license to instead call `typeinf_edge`, supplying the +current inference stack in `parent_frame` (but otherwise supplying the arguments +to `typeinf`). typeinf_edge will return the `CodeInstance` that `typeinf` would +have returned at runtime. The optimizer may perform a non-IPO replacement of +the call to `typeinf` by the result of `typeinf_edge`. In addition, the IPO-safe +fields of the `CodeInstance` may be propagated in IPO mode. +""" +module CompilerPlugins + """ + typeinf(owner, mi, source_mode)::CodeInstance + + Return a `CodeInstance` for the given `mi` whose valid results include at + the least current tls world and satisfies the requirements of `source_mode`. + """ + function typeinf end + + """ + typeinf_edge(owner, mi, parent_frame, world, abi_mode)::CodeInstance + """ + function typeinf_edge end +end + end diff --git a/src/builtins.c b/src/builtins.c index c6b0bf130550b..3f555da9d2a83 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1587,6 +1587,28 @@ JL_CALLABLE(jl_f_invoke) if (!jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)m->sig)) jl_type_error("invoke: argument type error", argtypes, arg_tuple(args[0], &args[2], nargs - 1)); return jl_gf_invoke_by_method(m, args[0], &args[2], nargs - 1); + } else if (jl_is_code_instance(argtypes)) { + jl_code_instance_t *codeinst = (jl_code_instance_t*)args[1]; + jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke); + if (jl_tuple1_isa(args[0], &args[2], nargs - 2, (jl_datatype_t*)codeinst->def->specTypes)) { + jl_type_error("invoke: argument type error", codeinst->def->specTypes, arg_tuple(args[0], &args[2], nargs - 2)); + } + if (jl_atomic_load_relaxed(&codeinst->min_world) > jl_current_task->world_age || + jl_current_task->world_age > jl_atomic_load_relaxed(&codeinst->max_world)) { + jl_error("invoke: CodeInstance not valid for this world"); + } + if (!invoke) { + jl_compile_codeinst(codeinst); + invoke = jl_atomic_load_acquire(&codeinst->invoke); + } + if (invoke) { + return invoke(args[0], &args[2], nargs - 2, codeinst); + } else { + if (codeinst->owner != jl_nothing || !jl_is_method(codeinst->def->def.value)) { + jl_error("Failed to invoke or compile external codeinst"); + } + return jl_gf_invoke_by_method(codeinst->def->def.method, args[0], &args[2], nargs - 1); + } } if (!jl_is_tuple_type(jl_unwrap_unionall(argtypes))) jl_type_error("invoke", (jl_value_t*)jl_anytuple_type_type, argtypes); diff --git a/src/interpreter.c b/src/interpreter.c index 49a3afed14f0c..2dc1c9ed5a0c4 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -137,8 +137,28 @@ static jl_value_t *do_invoke(jl_value_t **args, size_t nargs, interpreter_state argv[i-1] = eval_value(args[i], s); jl_value_t *c = args[0]; assert(jl_is_code_instance(c) || jl_is_method_instance(c)); - jl_method_instance_t *meth = jl_is_method_instance(c) ? (jl_method_instance_t*)c : ((jl_code_instance_t*)c)->def; - jl_value_t *result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, meth); + jl_value_t *result = NULL; + if (jl_is_code_instance(c)) { + jl_code_instance_t *codeinst = (jl_code_instance_t*)c; + assert(jl_atomic_load_relaxed(&codeinst->min_world) <= jl_current_task->world_age && + jl_current_task->world_age <= jl_atomic_load_relaxed(&codeinst->max_world)); + jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke); + if (!invoke) { + jl_compile_codeinst(codeinst); + invoke = jl_atomic_load_acquire(&codeinst->invoke); + } + if (invoke) { + result = invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, codeinst); + + } else { + if (codeinst->owner != jl_nothing) { + jl_error("Failed to invoke or compile external codeinst"); + } + result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, codeinst->def); + } + } else { + result = jl_invoke(argv[0], nargs == 2 ? NULL : &argv[1], nargs - 2, (jl_method_instance_t*)c); + } JL_GC_POP(); return result; } diff --git a/test/core.jl b/test/core.jl index 39d02d5d567c9..63952e8728e1e 100644 --- a/test/core.jl +++ b/test/core.jl @@ -8353,9 +8353,23 @@ end @test eval(Expr(:toplevel, :(@define_call(f_macro_defined1)))) == 1 @test @define_call(f_macro_defined2) == 1 +# `invoke` of `Method` let m = which(+, (Int, Int)) @eval f56692(i) = invoke(+, $m, i, 4) global g56692() = f56692(5) == 9 ? "true" : false end @test @inferred(f56692(3)) == 7 @test @inferred(g56692()) == "true" + +# `invoke` of `CodeInstance` +f_invalidate_me() = return 1 +f_invoke_me() = return f_invalidate_me() +@test f_invoke_me() == 1 +const f_invoke_me_ci = Base.specialize_method(Base._which(Tuple{typeof(f_invoke_me)})).cache +f_call_me() = invoke(f_invoke_me, f_invoke_me_ci) +@test invoke(f_invoke_me, f_invoke_me_ci) == 1 +@test f_call_me() == 1 +@test_throws TypeError invoke(f_invoke_me, f_invoke_me_ci, 1) +f_invalidate_me() = 2 +@test_throws ErrorException invoke(f_invoke_me, f_invoke_me_ci) +@test_throws ErrorException f_call_me() From ba8290ea4cf47abd69ff45d2e011259dca161ed1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1ll=20Haraldsson?= Date: Tue, 3 Dec 2024 01:46:24 +0000 Subject: [PATCH 10/25] Update references to LTS from v1.6 to v1.10 (#56729) --- CONTRIBUTING.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fd7f1c89420d6..9a3fe2cd441b3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -278,8 +278,8 @@ Be sure to change the UUID value back before making the pull request. The process of [creating a patch release](https://docs.julialang.org/en/v1/devdocs/build/distributing/#Point-releasing-101) is roughly as follows: -1. Create a new branch (e.g. `backports-release-1.6`) against the relevant minor release - branch (e.g. `release-1.6`). Usually a corresponding pull request is created as well. +1. Create a new branch (e.g. `backports-release-1.10`) against the relevant minor release + branch (e.g. `release-1.10`). Usually a corresponding pull request is created as well. 2. Add commits, nominally from `master` (hence "backports"), to that branch. See below for more information on this process. @@ -291,8 +291,8 @@ The process of [creating a patch release](https://docs.julialang.org/en/v1/devdo the pull request associated with the backports branch. Fix any issues. 4. Once all test and benchmark reports look good, merge the backports branch into - the corresponding release branch (e.g. merge `backports-release-1.6` into - `release-1.6`). + the corresponding release branch (e.g. merge `backports-release-1.10` into + `release-1.10`). 5. Open a pull request that bumps the version of the relevant minor release to the next patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37718). From 2c87290f2e7d5c057d1f4bdce9c5568c01f31d69 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 3 Dec 2024 01:19:03 -0500 Subject: [PATCH 11/25] lowering: Canonicalize to builtins for global assignment (#56713) This adjusts lowering to emit `setglobal!` for assignment to globals, thus making the `=` expr head used only for slots in `CodeInfo` and entirely absent in `IRCode`. The primary reason for this is just to reduce the number of special cases that compiler passes have to reason about. In IRCode, `=` was already essentially equivalent to `setglobal!`, so there's no good reason not to canonicalize. Finally, the `=` syntax form for globals already gets recognized specially to insert `convert` calls to their declared binding type, so this doesn't impose any additional requirements on lowering to distinguish local from global assignments. In general, I'd also like to separate syntax and intermediate forms as much as possible where their semantics differ, which this accomplises by just using the builtin. This change is mostly semantically invisible, except that spliced-in GlobalRefs now declare their binding because they are indistinguishable from ordinary assignments at the stage where I inserted the lowering. If we want to, we can preserve the difference, but it'd be a bit more annoying for not much benefit, because: 1. The spliced in version was only recently made to work anyway, and 2. The semantics of when exactly bindings are declared is still messy on master anyway and will get tweaked shortly in further binding partitions work. --- Compiler/src/abstractinterpretation.jl | 8 +------- Compiler/src/optimize.jl | 11 +---------- Compiler/src/ssair/EscapeAnalysis.jl | 12 ------------ Compiler/src/ssair/verify.jl | 10 ++-------- Compiler/test/inline.jl | 2 +- src/interpreter.c | 24 +++++------------------- src/julia-syntax.scm | 14 ++++++++++---- test/syntax.jl | 5 +++-- 8 files changed, 23 insertions(+), 63 deletions(-) diff --git a/Compiler/src/abstractinterpretation.jl b/Compiler/src/abstractinterpretation.jl index ffb4f4312cdcf..24daaf1e6f626 100644 --- a/Compiler/src/abstractinterpretation.jl +++ b/Compiler/src/abstractinterpretation.jl @@ -4050,13 +4050,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState, nextr end effects === nothing || merge_override_effects!(interp, effects, frame) if lhs !== nothing && rt !== Bottom - if isa(lhs, SlotNumber) - changes = StateUpdate(lhs, VarState(rt, false)) - elseif isa(lhs, GlobalRef) - handle_global_assignment!(interp, frame, currsaw_latestworld, lhs, rt) - else - merge_effects!(interp, frame, EFFECTS_UNKNOWN) - end + changes = StateUpdate(lhs::SlotNumber, VarState(rt, false)) end end if !has_curr_ssaflag(frame, IR_FLAG_NOTHROW) diff --git a/Compiler/src/optimize.jl b/Compiler/src/optimize.jl index d2dfd26bfa00d..856e64e404388 100644 --- a/Compiler/src/optimize.jl +++ b/Compiler/src/optimize.jl @@ -1408,16 +1408,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes) return extyp === Union{} ? 0 : UNKNOWN_CALL_COST elseif head === :(=) - if ex.args[1] isa GlobalRef - cost = UNKNOWN_CALL_COST - else - cost = 0 - end - a = ex.args[2] - if a isa Expr - cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params)) - end - return cost + return statement_cost(ex.args[2], -1, src, sptypes, params) elseif head === :copyast return 100 end diff --git a/Compiler/src/ssair/EscapeAnalysis.jl b/Compiler/src/ssair/EscapeAnalysis.jl index 47a7840628bb5..4f32550d056b2 100644 --- a/Compiler/src/ssair/EscapeAnalysis.jl +++ b/Compiler/src/ssair/EscapeAnalysis.jl @@ -642,13 +642,6 @@ function analyze_escapes(ir::IRCode, nargs::Int, 𝕃ₒ::AbstractLattice, get_e escape_invoke!(astate, pc, stmt.args) elseif head === :new || head === :splatnew escape_new!(astate, pc, stmt.args) - elseif head === :(=) - lhs, rhs = stmt.args - if isa(lhs, GlobalRef) # global store - add_escape_change!(astate, rhs, ⊤) - else - unexpected_assignment!(ir, pc) - end elseif head === :foreigncall escape_foreigncall!(astate, pc, stmt.args) elseif head === :throw_undef_if_not # XXX when is this expression inserted ? @@ -981,11 +974,6 @@ function escape_unanalyzable_obj!(astate::AnalysisState, @nospecialize(obj), obj return objinfo end -@noinline function unexpected_assignment!(ir::IRCode, pc::Int) - @eval Main (ir = $ir; pc = $pc) - error("unexpected assignment found: inspect `Main.pc` and `Main.pc`") -end - is_nothrow(ir::IRCode, pc::Int) = has_flag(ir[SSAValue(pc)], IR_FLAG_NOTHROW) # NOTE if we don't maintain the alias set that is separated from the lattice state, we can do diff --git a/Compiler/src/ssair/verify.jl b/Compiler/src/ssair/verify.jl index 59051058e1750..072a564a31f78 100644 --- a/Compiler/src/ssair/verify.jl +++ b/Compiler/src/ssair/verify.jl @@ -363,14 +363,8 @@ function verify_ir(ir::IRCode, print::Bool=true, isforeigncall = false if isa(stmt, Expr) if stmt.head === :(=) - if stmt.args[1] isa SSAValue - @verify_error "SSAValue as assignment LHS" - raise_error() - end - if stmt.args[2] isa GlobalRef - # undefined GlobalRef as assignment RHS is OK - continue - end + @verify_error "Assignment should have been removed during SSA conversion" + raise_error() elseif stmt.head === :isdefined if length(stmt.args) > 2 || (length(stmt.args) == 2 && !isa(stmt.args[2], Bool)) @verify_error "malformed isdefined" diff --git a/Compiler/test/inline.jl b/Compiler/test/inline.jl index 46b78db3b781c..5f95fb761859e 100644 --- a/Compiler/test/inline.jl +++ b/Compiler/test/inline.jl @@ -2111,7 +2111,7 @@ for run_finalizer_escape_test in (run_finalizer_escape_test1, run_finalizer_esca global finalizer_escape::Int = 0 let src = code_typed1(run_finalizer_escape_test, Tuple{Bool, Bool}) - @test any(x->isexpr(x, :(=)), src.code) + @test any(iscall((src, Core.setglobal!)), src.code) end let diff --git a/src/interpreter.c b/src/interpreter.c index 2dc1c9ed5a0c4..fa4fba94a60a5 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -589,25 +589,11 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip, s->locals[n - 1] = rhs; } else { - jl_module_t *modu; - jl_sym_t *sym; - // Plain assignment is allowed to create bindings at - // toplevel and only for the current module - int alloc = toplevel; - if (jl_is_globalref(lhs)) { - modu = jl_globalref_mod(lhs); - sym = jl_globalref_name(lhs); - alloc &= modu == s->module; - } - else { - assert(jl_is_symbol(lhs)); - modu = s->module; - sym = (jl_sym_t*)lhs; - } - JL_GC_PUSH1(&rhs); - jl_binding_t *b = jl_get_binding_wr(modu, sym, alloc); - jl_checked_assignment(b, modu, sym, rhs); - JL_GC_POP(); + // This is an unmodeled error. Our frontend only generates + // legal `=` expressions, but since GlobalRef used to be legal + // here, give a loud error in case any package is modifying + // internals. + jl_error("Invalid IR: Assignment LHS not a Slot"); } } else if (head == jl_leave_sym) { diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index 852d5eb4d6f86..c7ca5d553bb31 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -4607,14 +4607,20 @@ f(x) = yt(x) (cdr cnd) (list cnd)))))) tests)) + (define (emit-assignment-or-setglobal lhs rhs) + (if (globalref? lhs) + (begin + (emit `(global ,lhs)) + (emit `(call (top setglobal!) ,(cadr lhs) (inert ,(caddr lhs)) ,rhs))) + (emit `(= ,lhs ,rhs)))) (define (emit-assignment lhs rhs) (if rhs (if (valid-ir-rvalue? lhs rhs) - (emit `(= ,lhs ,rhs)) + (emit-assignment-or-setglobal lhs rhs) (let ((rr (make-ssavalue))) (emit `(= ,rr ,rhs)) - (emit `(= ,lhs ,rr)))) - (emit `(= ,lhs (null)))) ; in unreachable code (such as after return), still emit the assignment so that the structure of those uses is preserved + (emit-assignment-or-setglobal lhs rr))) + (emit-assignment-or-setglobal lhs `(null))) ; in unreachable code (such as after return), still emit the assignment so that the structure of those uses is preserved #f) ;; the interpreter loop. `break-labels` keeps track of the labels to jump to ;; for all currently closing break-blocks. @@ -4693,7 +4699,7 @@ f(x) = yt(x) rhs (make-ssavalue)))) (if (not (eq? rr rhs)) (emit `(= ,rr ,rhs))) - (emit `(= ,lhs ,rr)) + (emit-assignment-or-setglobal lhs rr) (if tail (emit-return tail rr)) rr) (emit-assignment lhs rhs)))))) diff --git a/test/syntax.jl b/test/syntax.jl index 0fb752bae480f..315f2d8b0f38b 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -3713,7 +3713,7 @@ end module Foreign54607 # Syntactic, not dynamic try_to_create_binding1() = (Foreign54607.foo = 2) - # GlobalRef is allowed for same-module assignment + # GlobalRef is allowed for same-module assignment and declares the binding @eval try_to_create_binding2() = ($(GlobalRef(Foreign54607, :foo2)) = 2) function global_create_binding() global bar @@ -3728,7 +3728,7 @@ module Foreign54607 end @test_throws ErrorException (Foreign54607.foo = 1) @test_throws ErrorException Foreign54607.try_to_create_binding1() -@test_throws ErrorException Foreign54607.try_to_create_binding2() +Foreign54607.try_to_create_binding2() function assign_in_foreign_module() (Foreign54607.foo = 1) nothing @@ -3744,6 +3744,7 @@ Foreign54607.global_create_binding() @test isdefined(Foreign54607, :baz) @test isdefined(Foreign54607, :compiled_assign) @test isdefined(Foreign54607, :gr_assign) +@test isdefined(Foreign54607, :foo2) Foreign54607.bar = 8 @test Foreign54607.bar == 8 begin From 9acf1129c91cddd9194f529ad9cc82afd2694190 Mon Sep 17 00:00:00 2001 From: Julius Krumbiegel <22495855+jkrumbiegel@users.noreply.github.com> Date: Tue, 3 Dec 2024 08:45:28 +0100 Subject: [PATCH 12/25] Actually show glyphs for latex or emoji shortcodes being suggested in the REPL (#54800) When a user requests a completion for a backslash shortcode, this PR adds the glyphs for all the suggestions to the output. This makes it much easier to find the result one is looking for, especially if the user doesn't know all latex and emoji specifiers by heart. Before: image After: image --------- Co-authored-by: Dilum Aluthge --- stdlib/REPL/src/LineEdit.jl | 38 +++++++++++++++++++++++++---- stdlib/REPL/src/REPL.jl | 6 ++--- stdlib/REPL/src/REPLCompletions.jl | 34 +++++++++++++++++--------- stdlib/REPL/test/replcompletions.jl | 33 +++++++++++++++++-------- 4 files changed, 81 insertions(+), 30 deletions(-) diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl index e881a65ca6b1c..e15807f645119 100644 --- a/stdlib/REPL/src/LineEdit.jl +++ b/stdlib/REPL/src/LineEdit.jl @@ -181,7 +181,18 @@ struct EmptyHistoryProvider <: HistoryProvider end reset_state(::EmptyHistoryProvider) = nothing -complete_line(c::EmptyCompletionProvider, s; hint::Bool=false) = String[], "", true +# Before, completions were always given as strings. But at least for backslash +# completions, it's nice to see what glyphs are available in the completion preview. +# To separate between what's shown in the preview list of possible matches, and what's +# actually completed, we introduce this struct. +struct NamedCompletion + completion::String # what is actually completed, for example "\trianglecdot" + name::String # what is displayed in lists of possible completions, for example "◬ \trianglecdot" +end + +NamedCompletion(completion::String) = NamedCompletion(completion, completion) + +complete_line(c::EmptyCompletionProvider, s; hint::Bool=false) = NamedCompletion[], "", true # complete_line can be specialized for only two arguments, when the active module # doesn't matter (e.g. Pkg does this) @@ -308,6 +319,7 @@ end set_action!(s, command::Symbol) = nothing +common_prefix(completions::Vector{NamedCompletion}) = common_prefix(map(x -> x.completion, completions)) function common_prefix(completions::Vector{String}) ret = "" c1 = completions[1] @@ -330,6 +342,8 @@ end # does not restrict column length when multiple columns are used. const MULTICOLUMN_THRESHOLD = 5 +show_completions(s::PromptState, completions::Vector{NamedCompletion}) = show_completions(s, map(x -> x.name, completions)) + # Show available completions function show_completions(s::PromptState, completions::Vector{String}) # skip any lines of input after the cursor @@ -374,6 +388,18 @@ function complete_line(s::MIState) end end +# due to close coupling of the Pkg ReplExt `complete_line` can still return a vector of strings, +# so we convert those in this helper +function complete_line_named(args...; kwargs...)::Tuple{Vector{NamedCompletion},String,Bool} + result = complete_line(args...; kwargs...)::Union{Tuple{Vector{NamedCompletion},String,Bool},Tuple{Vector{String},String,Bool}} + if result isa Tuple{Vector{NamedCompletion},String,Bool} + return result + else + completions, partial, should_complete = result + return map(NamedCompletion, completions), partial, should_complete + end +end + function check_for_hint(s::MIState) st = state(s) if !options(st).hint_tab_completes || !eof(buffer(st)) @@ -383,12 +409,14 @@ function check_for_hint(s::MIState) return clear_hint(st) end - completions, partial, should_complete = try - complete_line(st.p.complete, st, s.active_module; hint = true)::Tuple{Vector{String},String,Bool} + named_completions, partial, should_complete = try + complete_line_named(st.p.complete, st, s.active_module; hint = true) catch @debug "error completing line for hint" exception=current_exceptions() return clear_hint(st) end + completions = map(x -> x.completion, named_completions) + isempty(completions) && return clear_hint(st) # Don't complete for single chars, given e.g. `x` completes to `xor` if length(partial) > 1 && should_complete @@ -425,7 +453,7 @@ function clear_hint(s::ModeState) end function complete_line(s::PromptState, repeats::Int, mod::Module; hint::Bool=false) - completions, partial, should_complete = complete_line(s.p.complete, s, mod; hint)::Tuple{Vector{String},String,Bool} + completions, partial, should_complete = complete_line_named(s.p.complete, s, mod; hint) isempty(completions) && return false if !should_complete # should_complete is false for cases where we only want to show @@ -435,7 +463,7 @@ function complete_line(s::PromptState, repeats::Int, mod::Module; hint::Bool=fal # Replace word by completion prev_pos = position(s) push_undo(s) - edit_splice!(s, (prev_pos - sizeof(partial)) => prev_pos, completions[1]) + edit_splice!(s, (prev_pos - sizeof(partial)) => prev_pos, completions[1].completion) else p = common_prefix(completions) if !isempty(p) && p != partial diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index 50f610ff3b3e8..6c3f4bd4ba73a 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -843,7 +843,7 @@ function complete_line(c::REPLCompletionProvider, s::PromptState, mod::Module; h full = LineEdit.input_string(s) ret, range, should_complete = completions(full, lastindex(partial), mod, c.modifiers.shift, hint) c.modifiers = LineEdit.Modifiers() - return unique!(String[completion_text(x) for x in ret]), partial[range], should_complete + return unique!(LineEdit.NamedCompletion[named_completion(x) for x in ret]), partial[range], should_complete end function complete_line(c::ShellCompletionProvider, s::PromptState; hint::Bool=false) @@ -851,14 +851,14 @@ function complete_line(c::ShellCompletionProvider, s::PromptState; hint::Bool=fa partial = beforecursor(s.input_buffer) full = LineEdit.input_string(s) ret, range, should_complete = shell_completions(full, lastindex(partial), hint) - return unique!(String[completion_text(x) for x in ret]), partial[range], should_complete + return unique!(LineEdit.NamedCompletion[named_completion(x) for x in ret]), partial[range], should_complete end function complete_line(c::LatexCompletions, s; hint::Bool=false) partial = beforecursor(LineEdit.buffer(s)) full = LineEdit.input_string(s)::String ret, range, should_complete = bslash_completions(full, lastindex(partial), hint)[2] - return unique!(String[completion_text(x) for x in ret]), partial[range], should_complete + return unique!(LineEdit.NamedCompletion[named_completion(x) for x in ret]), partial[range], should_complete end with_repl_linfo(f, repl) = f(outstream(repl)) diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl index 1f2c0cabbdb38..0bffb1a1015cd 100644 --- a/stdlib/REPL/src/REPLCompletions.jl +++ b/stdlib/REPL/src/REPLCompletions.jl @@ -2,7 +2,7 @@ module REPLCompletions -export completions, shell_completions, bslash_completions, completion_text +export completions, shell_completions, bslash_completions, completion_text, named_completion using Core: Const # We want to insulate the REPLCompletion module from any changes the user may @@ -13,6 +13,8 @@ using Base.Meta using Base: propertynames, something, IdSet using Base.Filesystem: _readdirx +using ..REPL.LineEdit: NamedCompletion + abstract type Completion end struct TextCompletion <: Completion @@ -57,8 +59,10 @@ struct MethodCompletion <: Completion end struct BslashCompletion <: Completion - bslash::String + completion::String # what is actually completed, for example "\trianglecdot" + name::String # what is displayed, for example "◬ \trianglecdot" end +BslashCompletion(completion::String) = BslashCompletion(completion, completion) struct ShellCompletion <: Completion text::String @@ -114,13 +118,21 @@ _completion_text(c::PackageCompletion) = c.package _completion_text(c::PropertyCompletion) = sprint(Base.show_sym, c.property) _completion_text(c::FieldCompletion) = sprint(Base.show_sym, c.field) _completion_text(c::MethodCompletion) = repr(c.method) -_completion_text(c::BslashCompletion) = c.bslash _completion_text(c::ShellCompletion) = c.text _completion_text(c::DictCompletion) = c.key _completion_text(c::KeywordArgumentCompletion) = c.kwarg*'=' completion_text(c) = _completion_text(c)::String +named_completion(c::BslashCompletion) = NamedCompletion(c.completion, c.name) + +function named_completion(c) + text = completion_text(c)::String + return NamedCompletion(text, text) +end + +named_completion_completion(c) = named_completion(c).completion::String + const Completions = Tuple{Vector{Completion}, UnitRange{Int}, Bool} function completes_global(x, name) @@ -984,12 +996,10 @@ function bslash_completions(string::String, pos::Int, hint::Bool=false) end # return possible matches; these cannot be mixed with regular # Julian completions as only latex / emoji symbols contain the leading \ - if startswith(s, "\\:") # emoji - namelist = Iterators.filter(k -> startswith(k, s), keys(emoji_symbols)) - else # latex - namelist = Iterators.filter(k -> startswith(k, s), keys(latex_symbols)) - end - return (true, (Completion[BslashCompletion(name) for name in sort!(collect(namelist))], slashpos:pos, true)) + symbol_dict = startswith(s, "\\:") ? emoji_symbols : latex_symbols + namelist = Iterators.filter(k -> startswith(k, s), keys(symbol_dict)) + completions = Completion[BslashCompletion(name, "$(symbol_dict[name]) $name") for name in sort!(collect(namelist))] + return (true, (completions, slashpos:pos, true)) end return (false, (Completion[], 0:-1, false)) end @@ -1099,7 +1109,7 @@ function complete_keyword_argument(partial::String, last_idx::Int, context_modul complete_keyval!(suggestions, last_word) end - return sort!(suggestions, by=completion_text), wordrange + return sort!(suggestions, by=named_completion_completion), wordrange end function get_loading_candidates(pkgstarts::String, project_file::String) @@ -1298,7 +1308,7 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif complete_identifiers!(suggestions, context_module, string, name, pos, separatorpos, startpos; shift) - return sort!(unique!(completion_text, suggestions), by=completion_text), (separatorpos+1):pos, true + return sort!(unique!(named_completion, suggestions), by=named_completion_completion), (separatorpos+1):pos, true elseif inc_tag === :cmd # TODO: should this call shell_completions instead of partially reimplementing it? let m = match(r"[\t\n\r\"`><=*?|]| (?!\\)", reverse(partial)) # fuzzy shell_parse in reverse @@ -1496,7 +1506,7 @@ function completions(string::String, pos::Int, context_module::Module=Main, shif complete_identifiers!(suggestions, context_module, string, name, pos, separatorpos, startpos; comp_keywords, complete_modules_only, shift) - return sort!(unique!(completion_text, suggestions), by=completion_text), namepos:pos, true + return sort!(unique!(named_completion, suggestions), by=named_completion_completion), namepos:pos, true end function shell_completions(string, pos, hint::Bool=false) diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl index b259567884486..2c8d48cc232cf 100644 --- a/stdlib/REPL/test/replcompletions.jl +++ b/stdlib/REPL/test/replcompletions.jl @@ -170,17 +170,23 @@ end function map_completion_text(completions) c, r, res = completions - return map(completion_text, c), r, res + return map(x -> named_completion(x).completion, c), r, res +end + +function map_named_completion(completions) + c, r, res = completions + return map(named_completion, c), r, res end test_complete(s) = map_completion_text(@inferred(completions(s, lastindex(s)))) test_scomplete(s) = map_completion_text(@inferred(shell_completions(s, lastindex(s)))) -test_bslashcomplete(s) = map_completion_text(@inferred(bslash_completions(s, lastindex(s)))[2]) test_complete_context(s, m=@__MODULE__; shift::Bool=true) = map_completion_text(@inferred(completions(s,lastindex(s), m, shift))) test_complete_foo(s) = test_complete_context(s, Main.CompletionFoo) test_complete_noshift(s) = map_completion_text(@inferred(completions(s, lastindex(s), Main, false))) +test_bslashcomplete(s) = map_named_completion(@inferred(bslash_completions(s, lastindex(s)))[2]) + test_methods_list(@nospecialize(f), tt) = map(x -> string(x.method), Base._methods_by_ftype(Base.signature_type(f, tt), 10, Base.get_world_counter())) @@ -350,7 +356,8 @@ end # test latex symbol completions let s = "\\alpha" c, r = test_bslashcomplete(s) - @test c[1] == "α" + @test c[1].completion == "α" + @test c[1].name == "α" @test r == 1:lastindex(s) @test length(c) == 1 end @@ -358,7 +365,8 @@ end # test latex symbol completions after unicode #9209 let s = "α\\alpha" c, r = test_bslashcomplete(s) - @test c[1] == "α" + @test c[1].completion == "α" + @test c[1].name == "α" @test r == 3:sizeof(s) @test length(c) == 1 end @@ -366,20 +374,25 @@ end # test emoji symbol completions let s = "\\:koala:" c, r = test_bslashcomplete(s) - @test c[1] == "🐨" + @test c[1].completion == "🐨" + @test c[1].name == "🐨" @test r == 1:sizeof(s) @test length(c) == 1 end let s = "\\:ko" c, r = test_bslashcomplete(s) - @test "\\:koala:" in c + ko = only(filter(c) do namedcompletion + namedcompletion.completion == "\\:koala:" + end) + @test ko.name == "🐨 \\:koala:" end # test emoji symbol completions after unicode #9209 let s = "α\\:koala:" c, r = test_bslashcomplete(s) - @test c[1] == "🐨" + @test c[1].name == "🐨" + @test c[1].completion == "🐨" @test r == 3:sizeof(s) @test length(c) == 1 end @@ -1069,8 +1082,8 @@ let s, c, r # Issue #8047 s = "@show \"/dev/nul\"" c,r = completions(s, 15) - c = map(completion_text, c) - @test "null\"" in c + c = map(named_completion, c) + @test "null\"" in [_c.completion for _c in c] @test r == 13:15 @test s[r] == "nul" @@ -1476,7 +1489,7 @@ function test_dict_completion(dict_name) @test c == Any["\"abcd\"]"] s = "$dict_name[\"abcd]" # trailing close bracket c, r = completions(s, lastindex(s) - 1) - c = map(completion_text, c) + c = map(x -> named_completion(x).completion, c) @test c == Any["\"abcd\""] s = "$dict_name[:b" c, r = test_complete(s) From 2590e675885b97579a7531c343a546f6f5bbcbe5 Mon Sep 17 00:00:00 2001 From: Daniel Wennberg Date: Tue, 3 Dec 2024 05:36:39 -0800 Subject: [PATCH 13/25] Update annotated.jl docstrings according to #55741 (#56736) Annotations now use a NamedTuple --- base/strings/annotated.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index c5c330fe0dfcd..814ee2afa9d55 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -55,9 +55,9 @@ like [`string`](@ref) but preserves any annotations present in the arguments. # Examples -```julia-repl +```jldoctest; setup=:(using Base: AnnotatedString) julia> AnnotatedString("this is an example annotated string", - [(1:18, :A => 1), (12:28, :B => 2), (18:35, :C => 3)]) + [(1:18, :A, 1), (12:28, :B, 2), (18:35, :C, 3)]) "this is an example annotated string" ``` """ @@ -87,8 +87,8 @@ AnnotatedChar(s::S, annotations::Vector{$Annotation}) # Examples -```julia-repl -julia> AnnotatedChar('j', :label => 1) +```jldoctest; setup=:(using Base: AnnotatedChar) +julia> AnnotatedChar('j', [(:label, 1)]) 'j': ASCII/Unicode U+006A (category Ll: Letter, lowercase) ``` """ @@ -232,11 +232,11 @@ See also [`AnnotatedString`](@ref) and [`AnnotatedChar`](@ref). ## Examples -```julia-repl +```jldoctest; setup=:(using Base: AnnotatedString, annotatedstring) julia> annotatedstring("now a AnnotatedString") "now a AnnotatedString" -julia> annotatedstring(AnnotatedString("annotated", [(1:9, :label => 1)]), ", and unannotated") +julia> annotatedstring(AnnotatedString("annotated", [(1:9, :label, 1)]), ", and unannotated") "annotated, and unannotated" ``` """ @@ -344,7 +344,7 @@ end annotate!(str::AnnotatedString, [range::UnitRange{Int}], label::Symbol, value) annotate!(str::SubString{AnnotatedString}, [range::UnitRange{Int}], label::Symbol, value) -Annotate a `range` of `str` (or the entire string) with a labeled value (`label` => `value`). +Annotate a `range` of `str` (or the entire string) with a labeled value `(label, value)`. To remove existing `label` annotations, use a value of `nothing`. The order in which annotations are applied to `str` is semantically meaningful, @@ -365,7 +365,7 @@ annotate!(s::SubString{<:AnnotatedString}, label::Symbol, @nospecialize(val::Any """ annotate!(char::AnnotatedChar, label::Symbol, value::Any) -Annotate `char` with the pair `label => value`. +Annotate `char` with the labeled value `(label, value)`. """ annotate!(c::AnnotatedChar, label::Symbol, @nospecialize(val::Any)) = (push!(c.annotations, Annotation((; label, val))); c) From 5ae26276c1a1834f7b2ebdaf03696278df59b11b Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 3 Dec 2024 19:50:25 -0500 Subject: [PATCH 14/25] effects: pack bits better (#56737) There is no reason to preserve duplicates of the bits for the value before and after inference, and many of the numbers in the comments had gotten incorrect. Now we are able to pack all 16 bits of currently defined bitflags into 20 bits, instead of 25 bits (although either case still rounds up to 32). There was also no reason for InferenceState to be mutating of CodeInfo during execution, so remove that mutation. --- Compiler/src/inferencestate.jl | 25 +++++++++---------- Compiler/src/optimize.jl | 45 +++++++++++++++++++--------------- Compiler/src/typeinfer.jl | 3 ++- src/julia.h | 13 +++------- 4 files changed, 42 insertions(+), 44 deletions(-) diff --git a/Compiler/src/inferencestate.jl b/Compiler/src/inferencestate.jl index 9eb929b725fbf..6988e74310fc5 100644 --- a/Compiler/src/inferencestate.jl +++ b/Compiler/src/inferencestate.jl @@ -267,6 +267,7 @@ mutable struct InferenceState bb_vartables::Vector{Union{Nothing,VarTable}} # nothing if not analyzed yet bb_saw_latestworld::Vector{Bool} ssavaluetypes::Vector{Any} + ssaflags::Vector{UInt32} edges::Vector{Any} stmt_info::Vector{CallInfo} @@ -343,6 +344,7 @@ mutable struct InferenceState bb_vartable1[i] = VarState(argtyp, i > nargtypes) end src.ssavaluetypes = ssavaluetypes = Any[ NOT_FOUND for i = 1:nssavalues ] + ssaflags = copy(src.ssaflags) unreachable = BitSet() pclimitations = IdSet{InferenceState}() @@ -374,7 +376,7 @@ mutable struct InferenceState this = new( mi, WorldWithRange(world, valid_worlds), mod, sptypes, slottypes, src, cfg, spec_info, - currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, bb_saw_latestworld, ssavaluetypes, edges, stmt_info, + currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, bb_saw_latestworld, ssavaluetypes, ssaflags, edges, stmt_info, tasks, pclimitations, limitations, cycle_backedges, callstack, parentid, frameid, cycleid, result, unreachable, bestguess, exc_bestguess, ipo_effects, restrict_abstract_call_sites, cache_mode, insert_coverage, @@ -1004,25 +1006,22 @@ function callers_in_cycle(sv::InferenceState) end callers_in_cycle(sv::IRInterpretationState) = AbsIntCycle(sv.callstack::Vector{AbsIntState}, 0, 0) -get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc] +get_curr_ssaflag(sv::InferenceState) = sv.ssaflags[sv.currpc] get_curr_ssaflag(sv::IRInterpretationState) = sv.ir.stmts[sv.curridx][:flag] -has_curr_ssaflag(sv::InferenceState, flag::UInt32) = has_flag(sv.src.ssaflags[sv.currpc], flag) +has_curr_ssaflag(sv::InferenceState, flag::UInt32) = has_flag(sv.ssaflags[sv.currpc], flag) has_curr_ssaflag(sv::IRInterpretationState, flag::UInt32) = has_flag(sv.ir.stmts[sv.curridx][:flag], flag) function set_curr_ssaflag!(sv::InferenceState, flag::UInt32, mask::UInt32=typemax(UInt32)) - curr_flag = sv.src.ssaflags[sv.currpc] - sv.src.ssaflags[sv.currpc] = (curr_flag & ~mask) | flag -end -function set_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32, mask::UInt32=typemax(UInt32)) - curr_flag = sv.ir.stmts[sv.curridx][:flag] - sv.ir.stmts[sv.curridx][:flag] = (curr_flag & ~mask) | flag + curr_flag = sv.ssaflags[sv.currpc] + sv.ssaflags[sv.currpc] = (curr_flag & ~mask) | flag + nothing end -add_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.src.ssaflags[sv.currpc] |= flag +add_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.ssaflags[sv.currpc] |= flag add_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32) = add_flag!(sv.ir.stmts[sv.curridx], flag) -sub_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.src.ssaflags[sv.currpc] &= ~flag +sub_curr_ssaflag!(sv::InferenceState, flag::UInt32) = sv.ssaflags[sv.currpc] &= ~flag sub_curr_ssaflag!(sv::IRInterpretationState, flag::UInt32) = sub_flag!(sv.ir.stmts[sv.curridx], flag) function merge_effects!(::AbstractInterpreter, caller::InferenceState, effects::Effects) @@ -1035,8 +1034,8 @@ function merge_effects!(::AbstractInterpreter, caller::InferenceState, effects:: end merge_effects!(::AbstractInterpreter, ::IRInterpretationState, ::Effects) = return -decode_statement_effects_override(sv::AbsIntState) = - decode_statement_effects_override(get_curr_ssaflag(sv)) +decode_statement_effects_override(sv::InferenceState) = decode_statement_effects_override(sv.src.ssaflags[sv.currpc]) +decode_statement_effects_override(sv::IRInterpretationState) = decode_statement_effects_override(UInt32(0)) struct InferenceLoopState rt diff --git a/Compiler/src/optimize.jl b/Compiler/src/optimize.jl index 856e64e404388..1c02bd67b5bd4 100644 --- a/Compiler/src/optimize.jl +++ b/Compiler/src/optimize.jl @@ -17,37 +17,41 @@ const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError const IR_FLAG_NULL = zero(UInt32) # This statement is marked as @inbounds by user. -# Ff replaced by inlining, any contained boundschecks may be removed. +# If replaced by inlining, any contained boundschecks may be removed. const IR_FLAG_INBOUNDS = one(UInt32) << 0 # This statement is marked as @inline by user const IR_FLAG_INLINE = one(UInt32) << 1 # This statement is marked as @noinline by user const IR_FLAG_NOINLINE = one(UInt32) << 2 -# An optimization pass has updated this statement in a way that may -# have exposed information that inference did not see. Re-running -# inference on this statement may be profitable. -const IR_FLAG_REFINED = one(UInt32) << 3 # This statement is proven :consistent -const IR_FLAG_CONSISTENT = one(UInt32) << 4 +const IR_FLAG_CONSISTENT = one(UInt32) << 3 # This statement is proven :effect_free -const IR_FLAG_EFFECT_FREE = one(UInt32) << 5 +const IR_FLAG_EFFECT_FREE = one(UInt32) << 4 # This statement is proven :nothrow -const IR_FLAG_NOTHROW = one(UInt32) << 6 -# This statement is proven :terminates -const IR_FLAG_TERMINATES = one(UInt32) << 7 -# This statement is proven :noub -const IR_FLAG_NOUB = one(UInt32) << 8 -# TODO: Both of these should eventually go away once -# This statement is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY -const IR_FLAG_EFIIMO = one(UInt32) << 9 -# This statement is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY -const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 10 +const IR_FLAG_NOTHROW = one(UInt32) << 5 +# This statement is proven :terminates_globally +const IR_FLAG_TERMINATES = one(UInt32) << 6 +#const IR_FLAG_TERMINATES_LOCALLY = one(UInt32) << 7 +#const IR_FLAG_NOTASKSTATE = one(UInt32) << 8 +#const IR_FLAG_INACCESSIBLEMEM = one(UInt32) << 9 +const IR_FLAG_NOUB = one(UInt32) << 10 +#const IR_FLAG_NOUBINIB = one(UInt32) << 11 +#const IR_FLAG_CONSISTENTOVERLAY = one(UInt32) << 12 # This statement is :nortcall -const IR_FLAG_NORTCALL = one(UInt32) << 11 +const IR_FLAG_NORTCALL = one(UInt32) << 13 +# An optimization pass has updated this statement in a way that may +# have exposed information that inference did not see. Re-running +# inference on this statement may be profitable. +const IR_FLAG_REFINED = one(UInt32) << 16 # This statement has no users and may be deleted if flags get refined to IR_FLAGS_REMOVABLE -const IR_FLAG_UNUSED = one(UInt32) << 12 +const IR_FLAG_UNUSED = one(UInt32) << 17 +# TODO: Both of these next two should eventually go away once +# This statement is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY +const IR_FLAG_EFIIMO = one(UInt32) << 18 +# This statement is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY +const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 19 -const NUM_IR_FLAGS = 13 # sync with julia.h +const NUM_IR_FLAGS = 3 # sync with julia.h const IR_FLAGS_EFFECTS = IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | @@ -815,6 +819,7 @@ function scan_non_dataflow_flags!(inst::Instruction, sv::PostOptAnalysisState) sv.nortcall = false end end + nothing end function scan_inconsistency!(inst::Instruction, sv::PostOptAnalysisState) diff --git a/Compiler/src/typeinfer.jl b/Compiler/src/typeinfer.jl index 83ec0271ea474..20c0a5000bd39 100644 --- a/Compiler/src/typeinfer.jl +++ b/Compiler/src/typeinfer.jl @@ -413,6 +413,7 @@ function finishinfer!(me::InferenceState, interp::AbstractInterpreter) ipo_effects = result.ipo_effects = me.ipo_effects = adjust_effects(me) result.exc_result = me.exc_bestguess = refine_exception_type(me.exc_bestguess, ipo_effects) me.src.rettype = widenconst(ignorelimited(bestguess)) + me.src.ssaflags = me.ssaflags me.src.min_world = first(me.world.valid_worlds) me.src.max_world = last(me.world.valid_worlds) istoplevel = !(me.linfo.def isa Method) @@ -936,7 +937,7 @@ function codeinfo_for_const(interp::AbstractInterpreter, mi::MethodInstance, @no tree.slotflags = fill(0x00, nargs) tree.ssavaluetypes = 1 tree.debuginfo = DebugInfo(mi) - tree.ssaflags = UInt32[0] + tree.ssaflags = [IR_FLAG_NULL] tree.rettype = Core.Typeof(val) tree.edges = Core.svec() set_inlineable!(tree, true) diff --git a/src/julia.h b/src/julia.h index 944fd3c43a297..6c0dd700f9472 100644 --- a/src/julia.h +++ b/src/julia.h @@ -279,7 +279,7 @@ typedef union __jl_purity_overrides_t { } _jl_purity_overrides_t; #define NUM_EFFECTS_OVERRIDES 11 -#define NUM_IR_FLAGS 13 +#define NUM_IR_FLAGS 3 // This type describes a single function body typedef struct _jl_code_info_t { @@ -292,15 +292,8 @@ typedef struct _jl_code_info_t { // 1 << 0 = inbounds region // 1 << 1 = callsite inline region // 1 << 2 = callsite noinline region - // 1 << 3 = refined statement - // 1 << 4 = :consistent - // 1 << 5 = :effect_free - // 1 << 6 = :nothrow - // 1 << 7 = :terminates - // 1 << 8 = :noub - // 1 << 9 = :effect_free_if_inaccessiblememonly - // 1 << 10 = :inaccessiblemem_or_argmemonly - // 1 << 11-19 = callsite effects overrides + // 1 << 3-14 = purity + // 1 << 16+ = reserved for inference // miscellaneous data: jl_array_t *slotnames; // names of local variables jl_array_t *slotflags; // local var bit flags From e48bf8cba9a9305050704e3f21b53431ae123e93 Mon Sep 17 00:00:00 2001 From: DilumAluthgeBot <43731525+DilumAluthgeBot@users.noreply.github.com> Date: Wed, 4 Dec 2024 07:04:39 -0500 Subject: [PATCH 15/25] =?UTF-8?q?=F0=9F=A4=96=20[master]=20Bump=20the=20Pk?= =?UTF-8?q?g=20stdlib=20from=207b759d7f0=20to=20d84a1a38b=20(#56743)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stdlib: Pkg URL: https://github.com/JuliaLang/Pkg.jl.git Stdlib branch: master Julia branch: master Old commit: 7b759d7f0 New commit: d84a1a38b Julia version: 1.12.0-DEV Pkg version: 1.12.0 Bump invoked by: @KristofferC Powered by: [BumpStdlibs.jl](https://github.com/JuliaLang/BumpStdlibs.jl) Diff: https://github.com/JuliaLang/Pkg.jl/compare/7b759d7f0af56c5ad01f2289bbad71284a556970...d84a1a38b6466fa7400e9ad2874a0ef963a10456 ``` $ git log --oneline 7b759d7f0..d84a1a38b d84a1a38b Allow use of a url and subdir in [sources] (#4039) cd75456a8 Fix heading (#4102) b61066120 rename FORMER_STDLIBS -> UPGRADABLE_STDLIBS (#4070) 814949ed2 Increase version of `StaticArrays` in `why` tests (#4077) 83e13631e Run CI on backport branch too (#4094) ``` Co-authored-by: Dilum Aluthge --- .../Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/md5 | 1 - .../Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/sha512 | 1 - .../Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/md5 | 1 + .../Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/sha512 | 1 + stdlib/Pkg.version | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) delete mode 100644 deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/md5 delete mode 100644 deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/sha512 create mode 100644 deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/md5 create mode 100644 deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/sha512 diff --git a/deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/md5 b/deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/md5 deleted file mode 100644 index e55e74562d717..0000000000000 --- a/deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -20d63322fc5b547d4c2464c27e9a6a0e diff --git a/deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/sha512 b/deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/sha512 deleted file mode 100644 index 5094dddb8142a..0000000000000 --- a/deps/checksums/Pkg-7b759d7f0af56c5ad01f2289bbad71284a556970.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -93dd178af474c76cce9368416d34570b66cc44c7c311e4dc14569d3f9deed70afcae8a2b1976535ed0732ed305c6d8d1b0ef04cbeeaa3af2891e97650d51467d diff --git a/deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/md5 b/deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/md5 new file mode 100644 index 0000000000000..f5f87b3f2fa9e --- /dev/null +++ b/deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/md5 @@ -0,0 +1 @@ +1a5c995237815e0d7d5ee1ec50006c1c diff --git a/deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/sha512 b/deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/sha512 new file mode 100644 index 0000000000000..839cd4704a135 --- /dev/null +++ b/deps/checksums/Pkg-d84a1a38b6466fa7400e9ad2874a0ef963a10456.tar.gz/sha512 @@ -0,0 +1 @@ +2e0b984c8272fe4468e0b527698a58d5010ef3f18d38d862665902e5a0e0b7ba65d3085b3d9de367a7b48a216e71d1611687804254503b3905b7b4d217a00f2f diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version index 8b40c45c4366f..9ef9ca4b04376 100644 --- a/stdlib/Pkg.version +++ b/stdlib/Pkg.version @@ -1,4 +1,4 @@ PKG_BRANCH = master -PKG_SHA1 = 7b759d7f0af56c5ad01f2289bbad71284a556970 +PKG_SHA1 = d84a1a38b6466fa7400e9ad2874a0ef963a10456 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1 From 8aac4cc282004e5f4c7fc70810207956fde98164 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Thu, 5 Dec 2024 00:33:33 -0700 Subject: [PATCH 16/25] Hide IRShow include from Revise (#56756) Revise in theory wants to re-evaluate this include, but it fails at doing so, because the include call no longer works after bootstrap. It happens to work right now on master, because the lowering of `Compiler.include` happens to hide the include call from Revise, but that's a Revise bug I'm about to fix. Address this by moving the include call into the package and using an absolute include if necessary. --- Compiler/src/Compiler.jl | 16 +++++++++++++++- base/show.jl | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Compiler/src/Compiler.jl b/Compiler/src/Compiler.jl index 2cf7e5508196c..ddcea8a6c5cbb 100644 --- a/Compiler/src/Compiler.jl +++ b/Compiler/src/Compiler.jl @@ -181,12 +181,26 @@ include("bootstrap.jl") include("reflection_interface.jl") include("opaque_closure.jl") +macro __SOURCE_FILE__() + __source__.file === nothing && return nothing + return __source__.file::Symbol +end + module IRShow end +function load_irshow!() + if isdefined(Base, :end_base_include) + # This code path is exclusively for Revise, which may want to re-run this + # after bootstrap. + include(IRShow, Base.joinpath(Base.dirname(Base.String(@__SOURCE_FILE__)), "ssair/show.jl")) + else + include(IRShow, "ssair/show.jl") + end +end if !isdefined(Base, :end_base_include) # During bootstrap, skip including this file and defer it to base/show.jl to include later else # When this module is loaded as the standard library, include this file as usual - include(IRShow, "ssair/show.jl") + load_irshow!() end end # baremodule Compiler diff --git a/base/show.jl b/base/show.jl index 23957d6e29b2d..cb36488b92bc1 100644 --- a/base/show.jl +++ b/base/show.jl @@ -2821,7 +2821,7 @@ function show(io::IO, vm::Core.TypeofVararg) end end -Compiler.include(Compiler.IRShow, "ssair/show.jl") # define `show` for the compiler types +Compiler.load_irshow!() const IRShow = Compiler.IRShow # an alias for compatibility function show(io::IO, src::CodeInfo; debuginfo::Symbol=:source) From e572d2316fb5bd6cb5e57c0d4300edddb4eb2062 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 5 Dec 2024 18:12:23 +0900 Subject: [PATCH 17/25] fixup!: JuliaLang/julia#56756 (#56758) We need to quote it, otherwise it would result in `UnderVarError`. --- Compiler/src/Compiler.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Compiler/src/Compiler.jl b/Compiler/src/Compiler.jl index ddcea8a6c5cbb..8dd15462ed998 100644 --- a/Compiler/src/Compiler.jl +++ b/Compiler/src/Compiler.jl @@ -183,7 +183,7 @@ include("opaque_closure.jl") macro __SOURCE_FILE__() __source__.file === nothing && return nothing - return __source__.file::Symbol + return QuoteNode(__source__.file::Symbol) end module IRShow end From 5835c3b69e4e0f47eeb9a512d91622b50ad3423c Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Thu, 5 Dec 2024 17:38:12 +0530 Subject: [PATCH 18/25] Accept more general Integer sizes in reshape (#55521) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR generalizes the `reshape` methods to accept `Integer`s instead of `Int`s, and adds a `_reshape_uncolon` method for `Integer` arguments. The current `_reshape_uncolon` method that accepts `Int`s is left unchanged to ensure that the inferred types are not impacted. I've also tried to ensure that most `Integer` subtypes in `Base` that may be safely converted to `Int`s pass through that method. The call sequence would now go like this: ```julia reshape(A, ::Tuple{Vararg{Union{Integer, Colon}}}) -> reshape(A, ::Tuple{Vararg{Integer}}) -> reshape(A, ::Tuple{Vararg{Int}}) (fallback) ``` This lets packages define `reshape(A::CustomArray, ::Tuple{Integer, Vararg{Integer}})` without having to implement `_reshape_uncolon` by themselves (or having to call internal `Base` functions, as in https://github.com/JuliaArrays/FillArrays.jl/issues/373). `reshape` calls involving a `Colon` would convert this to an `Integer` in `Base`, and then pass the `Integer` sizes to the custom method defined in the package. This PR does not resolve issues like https://github.com/JuliaLang/julia/issues/40076 because this still converts `Integer`s to `Int`s in the actual reshaping step. However, `BigInt` sizes that may be converted to `Int`s will work now: ```julia julia> reshape(1:4, big(2), big(2)) 2×2 reshape(::UnitRange{Int64}, 2, 2) with eltype Int64: 1 3 2 4 julia> reshape(1:4, big(1), :) 1×4 reshape(::UnitRange{Int64}, 1, 4) with eltype Int64: 1 2 3 4 ``` Note that the reshape method with `Integer` sizes explicitly converts these to `Int`s to avoid self-recursion (as opposed to calling `to_shape` to carry out the conversion implicitly). In the future, we may want to decide what to do with types or values that can't be converted to an `Int`. --------- Co-authored-by: Neven Sajko --- base/reshapedarray.jl | 57 +++++++++++++++++++++----------- test/abstractarray.jl | 20 +++++++++++ test/offsetarray.jl | 11 ++++++ test/testhelpers/OffsetArrays.jl | 4 ++- 4 files changed, 72 insertions(+), 20 deletions(-) diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl index 07f608588837b..f65a7d8c9561a 100644 --- a/base/reshapedarray.jl +++ b/base/reshapedarray.jl @@ -121,37 +121,56 @@ reshape reshape(parent::AbstractArray, dims::IntOrInd...) = reshape(parent, dims) reshape(parent::AbstractArray, shp::Tuple{Union{Integer,OneTo}, Vararg{Union{Integer,OneTo}}}) = reshape(parent, to_shape(shp)) +reshape(parent::AbstractArray, dims::Tuple{Integer, Vararg{Integer}}) = reshape(parent, map(Int, dims)) reshape(parent::AbstractArray, dims::Dims) = _reshape(parent, dims) # Allow missing dimensions with Colon(): reshape(parent::AbstractVector, ::Colon) = parent reshape(parent::AbstractVector, ::Tuple{Colon}) = parent reshape(parent::AbstractArray, dims::Int...) = reshape(parent, dims) -reshape(parent::AbstractArray, dims::Union{Int,Colon}...) = reshape(parent, dims) -reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Int,Colon}}}) = reshape(parent, _reshape_uncolon(parent, dims)) -@inline function _reshape_uncolon(A, dims) - @noinline throw1(dims) = throw(DimensionMismatch(string("new dimensions $(dims) ", - "may have at most one omitted dimension specified by `Colon()`"))) - @noinline throw2(A, dims) = throw(DimensionMismatch(string("array size $(length(A)) ", - "must be divisible by the product of the new dimensions $dims"))) - pre = _before_colon(dims...)::Tuple{Vararg{Int}} +reshape(parent::AbstractArray, dims::Integer...) = reshape(parent, dims) +reshape(parent::AbstractArray, dims::Union{Integer,Colon}...) = reshape(parent, dims) +reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Integer,Colon}}}) = reshape(parent, _reshape_uncolon(parent, dims)) + +@noinline throw1(dims) = throw(DimensionMismatch(LazyString("new dimensions ", dims, + " may have at most one omitted dimension specified by `Colon()`"))) +@noinline throw2(lenA, dims) = throw(DimensionMismatch(string("array size ", lenA, + " must be divisible by the product of the new dimensions ", dims))) + +@inline function _reshape_uncolon(A, _dims::Tuple{Vararg{Union{Integer, Colon}}}) + # promote the dims to `Int` at least + dims = map(x -> x isa Colon ? x : promote_type(typeof(x), Int)(x), _dims) + pre = _before_colon(dims...) post = _after_colon(dims...) _any_colon(post...) && throw1(dims) - post::Tuple{Vararg{Int}} len = length(A) - sz, is_exact = if iszero(len) - (0, true) + _reshape_uncolon_computesize(len, dims, pre, post) +end +@inline function _reshape_uncolon_computesize(len::Int, dims, pre::Tuple{Vararg{Int}}, post::Tuple{Vararg{Int}}) + sz = if iszero(len) + 0 else let pr = Core.checked_dims(pre..., post...) # safe product - if iszero(pr) - throw2(A, dims) - end - (quo, rem) = divrem(len, pr) - (Int(quo), iszero(rem)) + quo = _reshape_uncolon_computesize_nonempty(len, dims, pr) + convert(Int, quo) end - end::Tuple{Int,Bool} - is_exact || throw2(A, dims) - (pre..., sz, post...)::Tuple{Int,Vararg{Int}} + end + (pre..., sz, post...) +end +@inline function _reshape_uncolon_computesize(len, dims, pre, post) + pr = prod((pre..., post...)) + sz = if iszero(len) + promote(len, pr)[1] # zero of the correct type + else + _reshape_uncolon_computesize_nonempty(len, dims, pr) + end + (pre..., sz, post...) +end +@inline function _reshape_uncolon_computesize_nonempty(len, dims, pr) + iszero(pr) && throw2(len, dims) + (quo, rem) = divrem(len, pr) + iszero(rem) || throw2(len, dims) + quo end @inline _any_colon() = false @inline _any_colon(dim::Colon, tail...) = true diff --git a/test/abstractarray.jl b/test/abstractarray.jl index 2a2ec8e8e432c..4af4099eced45 100644 --- a/test/abstractarray.jl +++ b/test/abstractarray.jl @@ -2186,3 +2186,23 @@ end copyto!(A, 1, x, 1) @test A == axes(A,1) end + +@testset "reshape with Integer sizes" begin + @test reshape(1:4, big(2), big(2)) == reshape(1:4, 2, 2) + a = [1 2 3; 4 5 6] + reshaped_arrays = ( + reshape(a, 3, 2), + reshape(a, (3, 2)), + reshape(a, big(3), big(2)), + reshape(a, (big(3), big(2))), + reshape(a, :, big(2)), + reshape(a, (:, big(2))), + reshape(a, big(3), :), + reshape(a, (big(3), :)), + ) + @test allequal(reshaped_arrays) + for b ∈ reshaped_arrays + @test b isa Matrix{Int} + @test b.ref === a.ref + end +end diff --git a/test/offsetarray.jl b/test/offsetarray.jl index fb5855dfbaa0d..8e2ee33c49ed6 100644 --- a/test/offsetarray.jl +++ b/test/offsetarray.jl @@ -914,3 +914,14 @@ end b = sum(a, dims=1) @test b[begin] == sum(r) end + +@testset "reshape" begin + A0 = [1 3; 2 4] + A = reshape(A0, 2:3, 4:5) + @test axes(A) == Base.IdentityUnitRange.((2:3, 4:5)) + + B = reshape(A0, -10:-9, 9:10) + @test isa(B, OffsetArray{Int,2}) + @test parent(B) == A0 + @test axes(B) == Base.IdentityUnitRange.((-10:-9, 9:10)) +end diff --git a/test/testhelpers/OffsetArrays.jl b/test/testhelpers/OffsetArrays.jl index 17b2d8c28680a..e895372a34974 100644 --- a/test/testhelpers/OffsetArrays.jl +++ b/test/testhelpers/OffsetArrays.jl @@ -529,7 +529,7 @@ _similar_axes_or_length(AT, ax::I, ::I) where {I} = similar(AT, map(_indexlength # reshape accepts a single colon Base.reshape(A::AbstractArray, inds::OffsetAxis...) = reshape(A, inds) -function Base.reshape(A::AbstractArray, inds::Tuple{OffsetAxis,Vararg{OffsetAxis}}) +function Base.reshape(A::AbstractArray, inds::Tuple{Vararg{OffsetAxis}}) AR = reshape(no_offset_view(A), map(_indexlength, inds)) O = OffsetArray(AR, map(_offset, axes(AR), inds)) return _popreshape(O, axes(AR), _filterreshapeinds(inds)) @@ -557,6 +557,8 @@ Base.reshape(A::OffsetArray, inds::Tuple{OffsetAxis,Vararg{OffsetAxis}}) = OffsetArray(_reshape(parent(A), inds), map(_toaxis, inds)) # And for non-offset axes, we can just return a reshape of the parent directly Base.reshape(A::OffsetArray, inds::Tuple{Union{Integer,Base.OneTo},Vararg{Union{Integer,Base.OneTo}}}) = _reshape_nov(A, inds) +Base.reshape(A::OffsetArray, inds::Tuple{Integer,Vararg{Integer}}) = _reshape_nov(A, inds) +Base.reshape(A::OffsetArray, inds::Tuple{Union{Colon, Integer}, Vararg{Union{Colon, Integer}}}) = _reshape_nov(A, inds) Base.reshape(A::OffsetArray, inds::Dims) = _reshape_nov(A, inds) Base.reshape(A::OffsetVector, ::Colon) = A Base.reshape(A::OffsetVector, ::Tuple{Colon}) = A From b8230c02c6e015cc5fd8e33811474ff1b82299bc Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 6 Dec 2024 06:17:24 -0500 Subject: [PATCH 19/25] drop llvm 16 support (#56751) Co-authored-by: Valentin Churavy --- doc/src/devdocs/build/build.md | 2 +- src/APInt-C.cpp | 54 -------- src/APInt-C.h | 15 --- src/Makefile | 6 +- src/aotcompile.cpp | 30 +---- src/ccall.cpp | 3 - src/cgutils.cpp | 66 +--------- src/codegen.cpp | 81 +----------- src/disasm.cpp | 10 +- src/jitlayers.cpp | 156 +---------------------- src/jitlayers.h | 20 +-- src/llvm-codegen-shared.h | 22 +--- src/llvm-cpufeatures.cpp | 2 +- src/llvm-julia-licm.cpp | 4 - src/llvm-late-gc-lowering.cpp | 21 --- src/llvm-lower-handlers.cpp | 4 - src/llvm-multiversioning.cpp | 8 +- src/llvm-pass-helpers.cpp | 14 -- src/llvm-propagate-addrspaces.cpp | 18 --- src/llvm-ptls.cpp | 8 -- src/llvm-remove-addrspaces.cpp | 28 ---- src/llvm-simdloop.cpp | 4 - src/llvm-version.h | 8 +- src/pipeline.cpp | 36 +----- src/processor.cpp | 8 -- src/runtime_ccall.cpp | 4 - src/runtime_intrinsics.c | 11 -- src/support/win32-clang-ABI-bug/optional | 10 -- 28 files changed, 19 insertions(+), 634 deletions(-) diff --git a/doc/src/devdocs/build/build.md b/doc/src/devdocs/build/build.md index 553f7c2e815cf..966ef3d102d1c 100644 --- a/doc/src/devdocs/build/build.md +++ b/doc/src/devdocs/build/build.md @@ -249,7 +249,7 @@ The most complicated dependency is LLVM, for which we require additional patches For packaging Julia with LLVM, we recommend either: - bundling a Julia-only LLVM library inside the Julia package, or - adding the patches to the LLVM package of the distribution. - * A complete list of patches is available in on [Github](https://github.com/JuliaLang/llvm-project) see the `julia-release/15.x` branch. + * A complete list of patches is available in on [Github](https://github.com/JuliaLang/llvm-project) see the `julia-release/18.x` branch. * The only Julia-specific patch is the lib renaming (`llvm7-symver-jlprefix.patch`), which should _not_ be applied to a system LLVM. * The remaining patches are all upstream bug fixes, and have been contributed into upstream LLVM. diff --git a/src/APInt-C.cpp b/src/APInt-C.cpp index e73399c2ecde4..86b0bdb27638b 100644 --- a/src/APInt-C.cpp +++ b/src/APInt-C.cpp @@ -475,7 +475,6 @@ void LLVMTrunc(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *otys, integerP memcpy(pr, pa, onumbytes); } -#if JL_LLVM_VERSION >= 170000 extern "C" JL_DLLEXPORT unsigned countr_zero_8(uint8_t Val) { return countr_zero(Val); @@ -495,27 +494,6 @@ extern "C" JL_DLLEXPORT unsigned countr_zero_64(uint64_t Val) { return countr_zero(Val); } -#else -extern "C" JL_DLLEXPORT -unsigned countTrailingZeros_8(uint8_t Val) { - return countTrailingZeros(Val); -} - -extern "C" JL_DLLEXPORT -unsigned countTrailingZeros_16(uint16_t Val) { - return countTrailingZeros(Val); -} - -extern "C" JL_DLLEXPORT -unsigned countTrailingZeros_32(uint32_t Val) { - return countTrailingZeros(Val); -} - -extern "C" JL_DLLEXPORT -unsigned countTrailingZeros_64(uint64_t Val) { - return countTrailingZeros(Val); -} -#endif extern "C" JL_DLLEXPORT void jl_LLVMSMod(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr) { @@ -545,7 +523,6 @@ void jl_LLVMFlipSign(unsigned numbits, integerPart *pa, integerPart *pb, integer memcpy(pr, pa, numbytes); } -#if JL_LLVM_VERSION >= 170000 extern "C" JL_DLLEXPORT unsigned LLVMPopcount(unsigned numbits, integerPart *pa) { CREATE(a) @@ -575,34 +552,3 @@ unsigned LLVMCountl_zero(unsigned numbits, integerPart *pa) { CREATE(a) return a.countl_zero(); } -#else -extern "C" JL_DLLEXPORT -unsigned LLVMCountPopulation(unsigned numbits, integerPart *pa) { - CREATE(a) - return a.countPopulation(); -} - -extern "C" JL_DLLEXPORT -unsigned LLVMCountTrailingOnes(unsigned numbits, integerPart *pa) { - CREATE(a) - return a.countTrailingOnes(); -} - -extern "C" JL_DLLEXPORT -unsigned LLVMCountTrailingZeros(unsigned numbits, integerPart *pa) { - CREATE(a) - return a.countTrailingZeros(); -} - -extern "C" JL_DLLEXPORT -unsigned LLVMCountLeadingOnes(unsigned numbits, integerPart *pa) { - CREATE(a) - return a.countLeadingOnes(); -} - -extern "C" JL_DLLEXPORT -unsigned LLVMCountLeadingZeros(unsigned numbits, integerPart *pa) { - CREATE(a) - return a.countLeadingZeros(); -} -#endif diff --git a/src/APInt-C.h b/src/APInt-C.h index a44d922a40d24..59ce3c765eeec 100644 --- a/src/APInt-C.h +++ b/src/APInt-C.h @@ -54,19 +54,11 @@ JL_DLLEXPORT int LLVMDiv_uov(unsigned numbits, integerPart *pa, integerPart *pb, JL_DLLEXPORT int LLVMRem_sov(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr); JL_DLLEXPORT int LLVMRem_uov(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr); -#if JL_LLVM_VERSION >= 170000 JL_DLLEXPORT unsigned LLVMPopcount(unsigned numbits, integerPart *pa); JL_DLLEXPORT unsigned LLVMCountr_one(unsigned numbits, integerPart *pa); JL_DLLEXPORT unsigned LLVMCountr_zero(unsigned numbits, integerPart *pa); JL_DLLEXPORT unsigned LLVMCountl_one(unsigned numbits, integerPart *pa); JL_DLLEXPORT unsigned LLVMCountl_zero(unsigned numbits, integerPart *pa); -#else -JL_DLLEXPORT unsigned LLVMCountPopulation(unsigned numbits, integerPart *pa); -JL_DLLEXPORT unsigned LLVMCountTrailingOnes(unsigned numbits, integerPart *pa); -JL_DLLEXPORT unsigned LLVMCountTrailingZeros(unsigned numbits, integerPart *pa); -JL_DLLEXPORT unsigned LLVMCountLeadingOnes(unsigned numbits, integerPart *pa); -JL_DLLEXPORT unsigned LLVMCountLeadingZeros(unsigned numbits, integerPart *pa); -#endif JL_DLLEXPORT void LLVMFPtoSI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr); JL_DLLEXPORT void LLVMFPtoUI(jl_datatype_t *ty, integerPart *pa, jl_datatype_t *oty, integerPart *pr); @@ -82,17 +74,10 @@ JL_DLLEXPORT int LLVMFPtoUI_exact(jl_datatype_t *ty, integerPart *pa, jl_datatyp JL_DLLEXPORT void jl_LLVMSMod(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr); JL_DLLEXPORT void jl_LLVMFlipSign(unsigned numbits, integerPart *pa, integerPart *pb, integerPart *pr); -#if JL_LLVM_VERSION >= 170000 JL_DLLEXPORT unsigned countr_zero_8(uint8_t Val); JL_DLLEXPORT unsigned countr_zero_16(uint16_t Val); JL_DLLEXPORT unsigned countr_zero_32(uint32_t Val); JL_DLLEXPORT unsigned countr_zero_64(uint64_t Val); -#else -JL_DLLEXPORT unsigned countTrailingZeros_8(uint8_t Val); -JL_DLLEXPORT unsigned countTrailingZeros_16(uint16_t Val); -JL_DLLEXPORT unsigned countTrailingZeros_32(uint32_t Val); -JL_DLLEXPORT unsigned countTrailingZeros_64(uint64_t Val); -#endif //uint8_t getSwappedBytes_8(uint8_t Value); // no-op //uint16_t getSwappedBytes_16(uint16_t Value); diff --git a/src/Makefile b/src/Makefile index 3458f51fa5548..9355ca2c4c675 100644 --- a/src/Makefile +++ b/src/Makefile @@ -77,11 +77,7 @@ else # JULIACODEGEN != LLVM endif -RT_LLVM_LIBS := support - -ifeq ($(shell test $(LLVM_VER_MAJ) -ge 16 && echo true),true) -RT_LLVM_LIBS += targetparser -endif +RT_LLVM_LIBS := support targetparser ifeq ($(OS),WINNT) SRCS += win32_ucontext diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 1d1e48efc8c6c..ba9a3c05e41ff 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -4,11 +4,7 @@ #include "platform.h" // target support -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include "llvm/Support/CodeGen.h" #include #include @@ -963,11 +959,7 @@ static FunctionInfo getFunctionWeight(const Function &F) auto val = F.getFnAttribute("julia.mv.clones").getValueAsString(); // base16, so must be at most 4 * length bits long // popcount gives number of clones - #if JL_LLVM_VERSION >= 170000 info.clones = APInt(val.size() * 4, val, 16).popcount() + 1; - #else - info.clones = APInt(val.size() * 4, val, 16).countPopulation() + 1; - #endif } info.weight += info.insts; // more basic blocks = more complex than just sum of insts, @@ -1372,7 +1364,7 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer // So for now we inject a definition of these functions that calls our runtime // functions. We do so after optimization to avoid cloning these functions. // Float16 conversion routines -#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_) && JL_LLVM_VERSION >= 160000 +#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_) // LLVM 16 reverted to soft-float ABI for passing half on x86_64 Darwin // https://github.com/llvm/llvm-project/commit/2bcf51c7f82ca7752d1bba390a2e0cb5fdd05ca9 injectCRTAlias(M, "__gnu_h2f_ieee", "julia_half_to_float", @@ -1721,9 +1713,6 @@ static SmallVector add_output(Module &M, TargetMachine &TM, Stri std::function func = [&, i]() { LLVMContext ctx; ctx.setDiscardValueNames(true); - #if JL_LLVM_VERSION < 170000 - SetOpaquePointer(ctx); - #endif // Lazily deserialize the entire module timers[i].deserialize.startTimer(); auto EM = getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx); @@ -1888,7 +1877,7 @@ void jl_dump_native_impl(void *native_code, Str += "10.14.0"; TheTriple.setOSName(Str); } - Optional RelocModel; + std::optional RelocModel; if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD() || TheTriple.isOSOpenBSD()) { RelocModel = Reloc::PIC_; } @@ -1933,9 +1922,6 @@ void jl_dump_native_impl(void *native_code, JL_TIMING(NATIVE_AOT, NATIVE_Sysimg); LLVMContext Context; Context.setDiscardValueNames(true); - #if JL_LLVM_VERSION < 170000 - SetOpaquePointer(Context); - #endif Module sysimgM("sysimg", Context); sysimgM.setTargetTriple(TheTriple.str()); sysimgM.setDataLayout(DL); @@ -2081,9 +2067,6 @@ void jl_dump_native_impl(void *native_code, JL_TIMING(NATIVE_AOT, NATIVE_Metadata); LLVMContext Context; Context.setDiscardValueNames(true); - #if JL_LLVM_VERSION < 170000 - SetOpaquePointer(Context); - #endif Module metadataM("metadata", Context); metadataM.setTargetTriple(TheTriple.str()); metadataM.setDataLayout(DL); @@ -2299,16 +2282,7 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, jl_ } else { auto p = literal_static_pointer_val(global.first, global.second->getValueType()); - #if JL_LLVM_VERSION >= 170000 Type *elty = PointerType::get(output.getContext(), 0); - #else - Type *elty; - if (p->getType()->isOpaquePointerTy()) { - elty = PointerType::get(output.getContext(), 0); - } else { - elty = p->getType()->getNonOpaquePointerElementType(); - } - #endif // For pretty printing, when LLVM inlines the global initializer into its loads auto alias = GlobalAlias::create(elty, 0, GlobalValue::PrivateLinkage, global.second->getName() + ".jit", p, global.second->getParent()); global.second->setInitializer(ConstantExpr::getBitCast(alias, global.second->getValueType())); diff --git a/src/ccall.cpp b/src/ccall.cpp index 52f8f807132e5..9aa1c56e2e78f 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -264,9 +264,6 @@ static GlobalVariable *emit_plt_thunk( SmallVector args; for (auto &arg : plt->args()) args.push_back(&arg); - #if JL_LLVM_VERSION < 170000 - assert(cast(ptr->getType())->isOpaqueOrPointeeTypeMatches(functype)); - #endif CallInst *ret = irbuilder.CreateCall( functype, ptr, ArrayRef(args)); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 7d4bd917eff30..9e170555e6149 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -60,11 +60,7 @@ static Value *decay_derived(jl_codectx_t &ctx, Value *V) if (T->getPointerAddressSpace() == AddressSpace::Derived) return V; // Once llvm deletes pointer element types, we won't need it here any more either. - #if JL_LLVM_VERSION >= 170000 Type *NewT = PointerType::get(T, AddressSpace::Derived); - #else - Type *NewT = PointerType::getWithSamePointeeType(cast(T), AddressSpace::Derived); - #endif return ctx.builder.CreateAddrSpaceCast(V, NewT); } @@ -74,11 +70,7 @@ static Value *maybe_decay_tracked(jl_codectx_t &ctx, Value *V) Type *T = V->getType(); if (T->getPointerAddressSpace() != AddressSpace::Tracked) return V; - #if JL_LLVM_VERSION >= 170000 Type *NewT = PointerType::get(T, AddressSpace::Derived); - #else - Type *NewT = PointerType::getWithSamePointeeType(cast(T), AddressSpace::Derived); - #endif return ctx.builder.CreateAddrSpaceCast(V, NewT); } @@ -1010,54 +1002,6 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const { if (sz == 0) return; - #if JL_LLVM_VERSION < 170000 - // If the types are small and simple, use load and store directly. - // Going through memcpy can cause LLVM (e.g. SROA) to create bitcasts between float and int - // that interferes with other optimizations. - // TODO: Restore this for opaque pointers? Needs extra type information from the caller. - if (ctx.builder.getContext().supportsTypedPointers() && sz <= 64) { - // The size limit is arbitrary but since we mainly care about floating points and - // machine size vectors this should be enough. - const DataLayout &DL = jl_Module->getDataLayout(); - auto srcty = cast(src->getType()); - //TODO unsafe nonopaque pointer - auto srcel = srcty->getNonOpaquePointerElementType(); - auto dstty = cast(dst->getType()); - //TODO unsafe nonopaque pointer - auto dstel = dstty->getNonOpaquePointerElementType(); - while (srcel->isArrayTy() && srcel->getArrayNumElements() == 1) { - src = ctx.builder.CreateConstInBoundsGEP2_32(srcel, src, 0, 0); - srcel = srcel->getArrayElementType(); - srcty = srcel->getPointerTo(); - } - while (dstel->isArrayTy() && dstel->getArrayNumElements() == 1) { - dst = ctx.builder.CreateConstInBoundsGEP2_32(dstel, dst, 0, 0); - dstel = dstel->getArrayElementType(); - dstty = dstel->getPointerTo(); - } - - llvm::Type *directel = nullptr; - if (srcel->isSized() && srcel->isSingleValueType() && DL.getTypeStoreSize(srcel) == sz) { - directel = srcel; - dst = emit_bitcast(ctx, dst, srcty); - } - else if (dstel->isSized() && dstel->isSingleValueType() && - DL.getTypeStoreSize(dstel) == sz) { - directel = dstel; - src = emit_bitcast(ctx, src, dstty); - } - if (directel) { - if (isa(src) && !src->hasName()) - setName(ctx.emission_context, src, "memcpy_refined_src"); - if (isa(dst) && !dst->hasName()) - setName(ctx.emission_context, dst, "memcpy_refined_dst"); - auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, MaybeAlign(align_src), is_volatile)); - dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, align_dst, is_volatile)); - ++SkippedMemcpys; - return; - } - } - #endif ++EmittedMemcpys; // the memcpy intrinsic does not allow to specify different alias tags @@ -1940,7 +1884,7 @@ static std::pair emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, // actual `isa` calls, this optimization should already have been performed upstream // anyway, but having this optimization in codegen might still be beneficial for // `typeassert`s if we can make it correct. - Optional known_isa; + std::optional known_isa; jl_value_t *intersected_type = type; if (x.constant) known_isa = jl_isa(x.constant, type); @@ -3786,11 +3730,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig for (auto *User : Val->users()) { if (isa(User)) { GetElementPtrInst *Inst = cast(User); - #if JL_LLVM_VERSION >= 170000 Inst->mutateType(PointerType::get(Inst->getType(), ToAS)); - #else - Inst->mutateType(PointerType::getWithSamePointeeType(cast(Inst->getType()), ToAS)); - #endif recursively_adjust_ptr_type(Inst, FromAS, ToAS); } else if (isa(User)) { @@ -3799,11 +3739,7 @@ static void recursively_adjust_ptr_type(llvm::Value *Val, unsigned FromAS, unsig } else if (isa(User)) { BitCastInst *Inst = cast(User); - #if JL_LLVM_VERSION >= 170000 Inst->mutateType(PointerType::get(Inst->getType(), ToAS)); - #else - Inst->mutateType(PointerType::getWithSamePointeeType(cast(Inst->getType()), ToAS)); - #endif recursively_adjust_ptr_type(Inst, FromAS, ToAS); } } diff --git a/src/codegen.cpp b/src/codegen.cpp index b8bed0793730b..b9d42b9b5af16 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -23,11 +23,7 @@ #include #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include @@ -637,11 +633,7 @@ static AttributeList get_func_attrs(LLVMContext &C) static AttributeList get_donotdelete_func_attrs(LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly()); -#else - FnAttrs.addAttribute(Attribute::InaccessibleMemOnly); -#endif FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoUnwind); return AttributeList::get(C, @@ -671,11 +663,7 @@ static AttributeList get_attrs_box_float(LLVMContext &C, unsigned nbytes) auto FnAttrs = AttrBuilder(C); FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoUnwind); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly()); -#else - FnAttrs.addAttribute(Attribute::InaccessibleMemOnly); -#endif auto RetAttrs = AttrBuilder(C); RetAttrs.addAttribute(Attribute::NonNull); RetAttrs.addDereferenceableAttr(nbytes); @@ -691,11 +679,7 @@ static AttributeList get_attrs_box_sext(LLVMContext &C, unsigned nbytes) auto FnAttrs = AttrBuilder(C); FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoUnwind); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly()); -#else - FnAttrs.addAttribute(Attribute::InaccessibleMemOnly); -#endif auto RetAttrs = AttrBuilder(C); RetAttrs.addAttribute(Attribute::NonNull); RetAttrs.addAttribute(Attribute::getWithDereferenceableBytes(C, nbytes)); @@ -712,11 +696,7 @@ static AttributeList get_attrs_box_zext(LLVMContext &C, unsigned nbytes) auto FnAttrs = AttrBuilder(C); FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoUnwind); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly()); -#else - FnAttrs.addAttribute(Attribute::InaccessibleMemOnly); -#endif auto RetAttrs = AttrBuilder(C); RetAttrs.addAttribute(Attribute::NonNull); RetAttrs.addDereferenceableAttr(nbytes); @@ -1141,12 +1121,7 @@ static const auto jlegalx_func = new JuliaFunction{ return FunctionType::get(getInt32Ty(C), {T, T, T_size}, false); }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleOrArgMemOnly()); -#else - FnAttrs.addAttribute(Attribute::ReadOnly); - FnAttrs.addAttribute(Attribute::ArgMemOnly); -#endif FnAttrs.addAttribute(Attribute::NoUnwind); return AttributeList::get(C, AttributeSet::get(C, FnAttrs), @@ -1166,9 +1141,7 @@ static const auto jl_alloc_obj_func = new JuliaFunction{ auto FnAttrs = AttrBuilder(C); FnAttrs.addAllocSizeAttr(1, None); // returns %1 bytes FnAttrs.addAllocKindAttr(AllocFnKind::Alloc); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef)); -#endif FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoUnwind); auto RetAttrs = AttrBuilder(C); @@ -1204,11 +1177,7 @@ static const auto jl_typeof_func = new JuliaFunction<>{ }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::none()); -#else - FnAttrs.addAttribute(Attribute::ReadNone); -#endif FnAttrs.addAttribute(Attribute::NoUnwind); FnAttrs.addAttribute(Attribute::NoRecurse); return AttributeList::get(C, @@ -1223,11 +1192,7 @@ static const auto jl_write_barrier_func = new JuliaFunction<>{ {JuliaType::get_prjlvalue_ty(C)}, true); }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly()); -#else - FnAttrs.addAttribute(Attribute::InaccessibleMemOnly); -#endif FnAttrs.addAttribute(Attribute::NoUnwind); FnAttrs.addAttribute(Attribute::NoRecurse); return AttributeList::get(C, @@ -1300,12 +1265,7 @@ static const auto memcmp_func = new JuliaFunction{ {getPointerTy(C), getPointerTy(C), T_size}, false); }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref)); -#else - FnAttrs.addAttribute(Attribute::ArgMemOnly); - FnAttrs.addAttribute(Attribute::ReadOnly); -#endif FnAttrs.addAttribute(Attribute::NoUnwind); return AttributeList::get(C, AttributeSet::get(C, FnAttrs), @@ -1355,11 +1315,7 @@ static const auto jlfieldindex_func = new JuliaFunction<>{ }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::readOnly()); -#else - FnAttrs.addAttribute(Attribute::ReadOnly); -#endif FnAttrs.addAttribute(Attribute::NoUnwind); FnAttrs.addAttribute(Attribute::WillReturn); return AttributeList::get(C, @@ -1425,9 +1381,7 @@ static const auto jl_allocgenericmemory = new JuliaFunction= 160000 FnAttrs.addMemoryAttr(MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef) | MemoryEffects::argMemOnly(ModRefInfo::Ref)); -#endif FnAttrs.addAttribute(Attribute::WillReturn); RetAttrs.addAlignmentAttr(Align(16)); RetAttrs.addAttribute(Attribute::NonNull); @@ -1505,11 +1459,7 @@ static const auto pointer_from_objref_func = new JuliaFunction<>{ {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived)}, false); }, [](LLVMContext &C) { AttrBuilder FnAttrs(C); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::none()); -#else - FnAttrs.addAttribute(Attribute::ReadNone); -#endif FnAttrs.addAttribute(Attribute::NoUnwind); return AttributeList::get(C, AttributeSet::get(C, FnAttrs), @@ -1532,11 +1482,7 @@ static const auto gc_loaded_func = new JuliaFunction<>{ FnAttrs.addAttribute(Attribute::Speculatable); FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoRecurse); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::none()); -#else - FnAttrs.addAttribute(Attribute::ReadNone); -#endif AttrBuilder RetAttrs(C); RetAttrs.addAttribute(Attribute::NonNull); RetAttrs.addAttribute(Attribute::NoUndef); @@ -2440,11 +2386,7 @@ static Value *emit_inttoptr(jl_codectx_t &ctx, Value *v, Type *ty) auto ptr = I->getOperand(0); if (ty->getPointerAddressSpace() == ptr->getType()->getPointerAddressSpace()) return ptr; - #if JL_LLVM_VERSION >= 170000 else - #else - else if (cast(ty)->hasSameElementTypeAs(cast(ptr->getType()))) - #endif return ctx.builder.CreateAddrSpaceCast(ptr, ty); } ++EmittedIntToPtrs; @@ -10343,10 +10285,6 @@ char jl_using_perf_jitevents = 0; int jl_is_timing_passes = 0; -#if JL_LLVM_VERSION < 170000 -int jl_opaque_ptrs_set = 0; -#endif - extern "C" void jl_init_llvm(void) { jl_page_size = jl_getpagesize(); @@ -10365,12 +10303,8 @@ extern "C" void jl_init_llvm(void) initializeAnalysis(Registry); initializeTransformUtils(Registry); initializeInstCombine(Registry); -#if JL_LLVM_VERSION >= 160000 - // TODO -#else - initializeAggressiveInstCombine(Registry); - initializeInstrumentation(Registry); -#endif + // TODO: initializeAggressiveInstCombine(Registry); + // TODO: initializeInstrumentation(Registry); initializeTarget(Registry); #ifdef USE_POLLY polly::initializePollyPasses(Registry); @@ -10396,17 +10330,6 @@ extern "C" void jl_init_llvm(void) if (clopt && clopt->getNumOccurrences() == 0) cl::ProvidePositionalOption(clopt, "4", 1); - #if JL_LLVM_VERSION < 170000 - // we want the opaque-pointers to be opt-in, per LLVMContext, for this release - // so change the default value back to pre-14.x, without changing the NumOccurrences flag for it - clopt = llvmopts.lookup("opaque-pointers"); - if (clopt && clopt->getNumOccurrences() == 0) { - clopt->addOccurrence(1, clopt->ArgStr, "false", true); - } else { - jl_opaque_ptrs_set = 1; - } - #endif - clopt = llvmopts.lookup("time-passes"); if (clopt && clopt->getNumOccurrences() > 0) jl_is_timing_passes = 1; diff --git a/src/disasm.cpp b/src/disasm.cpp index b944e48430c29..6a7985bd7ec1b 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -58,11 +58,7 @@ #include "llvm-version.h" // for outputting disassembly -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include #include @@ -505,7 +501,7 @@ jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metada auto TSM = std::unique_ptr(unwrap(dump->TSM)); //If TSM is not passed in, then the context MUST be locked externally. //RAII will release the lock - Optional lock; + std::optional lock; if (TSM) { lock.emplace(TSM->getContext().getLock()); } @@ -1107,11 +1103,7 @@ static void jl_dump_asm_internal( const MCOperand &OpI = Inst.getOperand(Op); if (OpI.isImm()) { int64_t imm = OpI.getImm(); - #if JL_LLVM_VERSION >= 170000 if (opinfo.operands()[Op].OperandType == MCOI::OPERAND_PCREL) - #else - if (opinfo.OpInfo[Op].OperandType == MCOI::OPERAND_PCREL) - #endif imm += Fptr + Index; const char *name = DisInfo.lookupSymbolName(imm); if (name) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 42ddfb688af39..80867daade267 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -37,11 +37,7 @@ #include #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include @@ -1103,22 +1099,13 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin { return Error::success(); } -#if JL_LLVM_VERSION >= 160000 Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override -#else - Error notifyRemovingResources(orc::ResourceKey K) override -#endif { return Error::success(); } -#if JL_LLVM_VERSION >= 160000 void notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey, orc::ResourceKey SrcKey) override {} -#else - void notifyTransferringResources(orc::ResourceKey DstKey, - orc::ResourceKey SrcKey) override {} -#endif void modifyPassConfig(MaterializationResponsibility &MR, jitlink::LinkGraph &, jitlink::PassConfiguration &PassConfig) override @@ -1168,21 +1155,12 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { Error notifyFailed(orc::MaterializationResponsibility &MR) override { return Error::success(); } -#if JL_LLVM_VERSION >= 160000 Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override -#else - Error notifyRemovingResources(orc::ResourceKey K) override -#endif { return Error::success(); } -#if JL_LLVM_VERSION >= 160000 void notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey, orc::ResourceKey SrcKey) override {} -#else - void notifyTransferringResources(orc::ResourceKey DstKey, - orc::ResourceKey SrcKey) override {} -#endif void modifyPassConfig(orc::MaterializationResponsibility &, jitlink::LinkGraph &, @@ -1199,11 +1177,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { for (auto block : section.blocks()) { secsize += block->getSize(); } -#if JL_LLVM_VERSION >= 160000 if ((section.getMemProt() & orc::MemProt::Exec) == orc::MemProt::None) { -#else - if ((section.getMemProt() & jitlink::MemProt::Exec) == jitlink::MemProt::None) { -#endif data_size += secsize; } else { code_size += secsize; @@ -1235,11 +1209,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin { // TODO: Port our memory management optimisations to JITLink instead of using the // default InProcessMemoryManager. std::unique_ptr createJITLinkMemoryManager() JL_NOTSAFEPOINT { -#if JL_LLVM_VERSION < 160000 - return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper()); -#else return cantFail(orc::MapperJITLinkMemoryManager::CreateWithMapper(/*Reservation Granularity*/ 16 * 1024 * 1024)); -#endif } #ifdef _COMPILER_CLANG_ @@ -1288,21 +1258,11 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager { bool IsReadOnly) override { return MemMgr->allocateDataSection(Size, Alignment, SectionID, SectionName, IsReadOnly); } -#if JL_LLVM_VERSION >= 160000 virtual void reserveAllocationSpace(uintptr_t CodeSize, Align CodeAlign, uintptr_t RODataSize, Align RODataAlign, uintptr_t RWDataSize, Align RWDataAlign) override { return MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, RWDataSize, RWDataAlign); } -#else - virtual void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign, - uintptr_t RODataSize, - uint32_t RODataAlign, - uintptr_t RWDataSize, - uint32_t RWDataAlign) override { - return MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, RWDataSize, RWDataAlign); - } -#endif virtual bool needsToReserveAllocationSpace() override { return MemMgr->needsToReserveAllocationSpace(); } @@ -1415,7 +1375,7 @@ namespace { FeaturesStr = Features.getString(); } // Allocate a target... - Optional codemodel = + std::optional codemodel = #ifdef _P64 // Make sure we are using the large code model on 64bit // Let LLVM pick a default suitable for jitting on 32bit @@ -1830,11 +1790,6 @@ struct JuliaOJIT::DLSymOptimizer { if (named) { auto T = GV.getValueType(); assert(T->isPointerTy()); - #if JL_LLVM_VERSION < 170000 - if (!T->isOpaquePointerTy()) { - T = T->getNonOpaquePointerElementType(); - } - #endif init = GlobalAlias::create(T, 0, GlobalValue::PrivateLinkage, GV.getName() + ".jit", init, &M); } GV.setInitializer(init); @@ -1885,11 +1840,6 @@ struct JuliaOJIT::DLSymOptimizer { if (named) { auto T = CI->getType(); assert(T->isPointerTy()); - #if JL_LLVM_VERSION < 170000 - if (!T->isOpaquePointerTy()) { - T = T->getNonOpaquePointerElementType(); - } - #endif init = GlobalAlias::create(T, 0, GlobalValue::PrivateLinkage, CI->getName() + ".jit", init, &M); } // DCE and SimplifyCFG will kill the branching structure around @@ -1933,19 +1883,6 @@ void fixupTM(TargetMachine &TM) { } } -#if JL_LLVM_VERSION < 170000 -extern int jl_opaque_ptrs_set; -void SetOpaquePointer(LLVMContext &ctx) { - if (jl_opaque_ptrs_set) - return; -#ifndef JL_LLVM_OPAQUE_POINTERS - ctx.setOpaquePointers(false); -#else - ctx.setOpaquePointers(true); -#endif -} -#endif - llvm::DataLayout jl_create_datalayout(TargetMachine &TM) { // Mark our address spaces as non-integral auto jl_data_layout = TM.createDataLayout(); @@ -2053,7 +1990,7 @@ JuliaOJIT::JuliaOJIT() orc::SymbolAliasMap jl_crt = { // Float16 conversion routines -#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_) && JL_LLVM_VERSION >= 160000 +#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_) // LLVM 16 reverted to soft-float ABI for passing half on x86_64 Darwin // https://github.com/llvm/llvm-project/commit/2bcf51c7f82ca7752d1bba390a2e0cb5fdd05ca9 { mangle("__gnu_h2f_ieee"), { mangle("julia_half_to_float"), JITSymbolFlags::Exported } }, @@ -2087,7 +2024,6 @@ JuliaOJIT::JuliaOJIT() #ifdef MSAN_EMUTLS_WORKAROUND orc::SymbolMap msan_crt; - #if JL_LLVM_VERSION >= 170000 msan_crt[mangle("__emutls_get_address")] = {ExecutorAddr::fromPtr(msan_workaround::getTLSAddress), JITSymbolFlags::Exported}; msan_crt[mangle("__emutls_v.__msan_param_tls")] = {ExecutorAddr::fromPtr( reinterpret_cast(static_cast(msan_workaround::MSanTLS::param))), JITSymbolFlags::Exported}; @@ -2105,25 +2041,6 @@ JuliaOJIT::JuliaOJIT() reinterpret_cast(static_cast(msan_workaround::MSanTLS::va_arg_overflow_size))), JITSymbolFlags::Exported}; msan_crt[mangle("__emutls_v.__msan_origin_tls")] = {ExecutorAddr::fromPtr( reinterpret_cast(static_cast(msan_workaround::MSanTLS::origin))), JITSymbolFlags::Exported}; - #else - msan_crt[mangle("__emutls_get_address")] = JITEvaluatedSymbol::fromPointer(msan_workaround::getTLSAddress, JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_param_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::param)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_param_origin_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::param_origin)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_retval_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::retval)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_retval_origin_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::retval_origin)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_va_arg_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::va_arg)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_va_arg_origin_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::va_arg_origin)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_va_arg_overflow_size_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::va_arg_overflow_size)), JITSymbolFlags::Exported); - msan_crt[mangle("__emutls_v.__msan_origin_tls")] = JITEvaluatedSymbol::fromPointer( - reinterpret_cast(static_cast(msan_workaround::MSanTLS::origin)), JITSymbolFlags::Exported); - #endif cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt))); #endif #if JL_LLVM_VERSION < 190000 @@ -2133,11 +2050,7 @@ JuliaOJIT::JuliaOJIT() // hopefully fixed upstream by e7698a13e319a9919af04d3d693a6f6ea7168a44 static int64_t jl___asan_globals_registered; orc::SymbolMap asan_crt; - #if JL_LLVM_VERSION >= 170000 asan_crt[mangle("___asan_globals_registered")] = {ExecutorAddr::fromPtr(&jl___asan_globals_registered), JITSymbolFlags::Common | JITSymbolFlags::Exported}; - #else - asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&jl___asan_globals_registered, JITSymbolFlags::Common | JITSymbolFlags::Exported); - #endif cantFail(JD.define(orc::absoluteSymbols(asan_crt))); #endif #endif @@ -2148,9 +2061,6 @@ JuliaOJIT::~JuliaOJIT() = default; ThreadSafeContext JuliaOJIT::makeContext() { auto ctx = std::make_unique(); - #if JL_LLVM_VERSION < 170000 - SetOpaquePointer(*ctx); - #endif return orc::ThreadSafeContext(std::move(ctx)); } @@ -2162,11 +2072,7 @@ orc::SymbolStringPtr JuliaOJIT::mangle(StringRef Name) void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr) { - #if JL_LLVM_VERSION >= 170000 cantFail(JD.define(orc::absoluteSymbols({{mangle(Name), {ExecutorAddr::fromPtr((void*)Addr), JITSymbolFlags::Exported}}}))); - #else - cantFail(JD.define(orc::absoluteSymbols({{mangle(Name), JITEvaluatedSymbol::fromPointer((void*)Addr)}}))); - #endif } void JuliaOJIT::addModule(orc::ThreadSafeModule TSM) @@ -2245,7 +2151,6 @@ SmallVector JuliaOJIT::findSymbols(ArrayRef Names) return Addrs; } -#if JL_LLVM_VERSION >= 170000 Expected JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly) { orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD}; @@ -2286,50 +2191,6 @@ uint64_t JuliaOJIT::getFunctionAddress(StringRef Name) } return addr->getAddress().getValue(); } -#else -JL_JITSymbol JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly) -{ - orc::JITDylib* SearchOrders[3] = {&JD, &GlobalJD, &ExternalJD}; - ArrayRef SearchOrder = ArrayRef(&SearchOrders[0], ExportedSymbolsOnly ? 3 : 1); - auto Sym = ES.lookup(SearchOrder, Name); - if (Sym) - return *Sym; - return Sym.takeError(); -} - -JL_JITSymbol JuliaOJIT::findUnmangledSymbol(StringRef Name) -{ - return findSymbol(getMangledName(Name), true); -} - -Expected JuliaOJIT::findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) -{ - orc::JITDylib* SearchOrders[3] = {&ExternalJD, &GlobalJD, &JD}; - ArrayRef SearchOrder = ArrayRef(&SearchOrders[0], ExternalJDOnly ? 1 : 3); - auto Sym = ES.lookup(SearchOrder, getMangledName(Name)); - return Sym; -} - -uint64_t JuliaOJIT::getGlobalValueAddress(StringRef Name) -{ - auto addr = findSymbol(getMangledName(Name), false); - if (!addr) { - consumeError(addr.takeError()); - return 0; - } - return cantFail(addr).getAddress(); -} - -uint64_t JuliaOJIT::getFunctionAddress(StringRef Name) -{ - auto addr = findSymbol(getMangledName(Name), false); - if (!addr) { - consumeError(addr.takeError()); - return 0; - } - return cantFail(addr).getAddress(); -} -#endif StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst) { @@ -2361,13 +2222,8 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl } #ifdef JL_USE_JITLINK -#if JL_LLVM_VERSION >= 170000 #define addAbsoluteToMap(map,name) \ (map[mangle(#name)] = {ExecutorAddr::fromPtr(&name), JITSymbolFlags::Exported | JITSymbolFlags::Callable}, orc::ExecutorAddr::fromPtr(&name)) -#else -#define addAbsoluteToMap(map,name) \ - (map[mangle(#name)] = JITEvaluatedSymbol::fromPointer(&name, JITSymbolFlags::Exported | JITSymbolFlags::Callable), orc::ExecutorAddr::fromPtr(&name)) -#endif void JuliaOJIT::enableJITDebuggingSupport() { @@ -2552,11 +2408,7 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS } // Reparent the global variable: SG.removeFromParent(); - #if JL_LLVM_VERSION >= 170000 dest.insertGlobalVariable(&SG); - #else - dest.getGlobalList().push_back(&SG); - #endif // Comdat is owned by the Module SG.setComdat(nullptr); } @@ -2603,11 +2455,7 @@ void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTS } } SG.removeFromParent(); - #if JL_LLVM_VERSION >= 170000 dest.insertAlias(&SG); - #else - dest.getAliasList().push_back(&SG); - #endif } // metadata nodes need to be explicitly merged not just copied diff --git a/src/jitlayers.h b/src/jitlayers.h index baba5412226e3..6665be6a33faa 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -363,14 +363,6 @@ class MaxAlignedAllocImpl }; using MaxAlignedAlloc = MaxAlignedAllocImpl<>; -#if JL_LLVM_VERSION < 170000 -typedef JITSymbol JL_JITSymbol; -// The type that is similar to SymbolInfo on LLVM 4.0 is actually -// `JITEvaluatedSymbol`. However, we only use this type when a JITSymbol -// is expected. -typedef JITSymbol JL_SymbolInfo; -#endif - using CompilerResultT = Expected>; using OptimizerResultT = Expected; using SharedBytesT = StringSet::MapEntryTy)>>; @@ -472,7 +464,7 @@ class JuliaOJIT { } private: ResourcePool &pool; - Optional resource; + std::optional resource; }; OwningResource operator*() JL_NOTSAFEPOINT { @@ -558,16 +550,10 @@ class JuliaOJIT { orc::ExecutionSession &getExecutionSession() JL_NOTSAFEPOINT { return ES; } orc::JITDylib &getExternalJITDylib() JL_NOTSAFEPOINT { return ExternalJD; } - #if JL_LLVM_VERSION >= 170000 Expected findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT; Expected findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT; Expected findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT; SmallVector findSymbols(ArrayRef Names) JL_NOTSAFEPOINT; - #else - JITEvaluatedSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT; - JITEvaluatedSymbol findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT; - Expected findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT; - #endif uint64_t getGlobalValueAddress(StringRef Name) JL_NOTSAFEPOINT; uint64_t getFunctionAddress(StringRef Name) JL_NOTSAFEPOINT; StringRef getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl_code_instance_t *codeinst) JL_NOTSAFEPOINT; @@ -660,10 +646,6 @@ Module &jl_codegen_params_t::shared_module() JL_NOTSAFEPOINT { } void fixupTM(TargetMachine &TM) JL_NOTSAFEPOINT; -#if JL_LLVM_VERSION < 170000 -void SetOpaquePointer(LLVMContext &ctx) JL_NOTSAFEPOINT; -#endif - void optimizeDLSyms(Module &M); // NewPM diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h index a99e18f3e3762..d9551e0552f9c 100644 --- a/src/llvm-codegen-shared.h +++ b/src/llvm-codegen-shared.h @@ -1,5 +1,6 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#include #include #include #include @@ -8,30 +9,15 @@ #include #include #include - -#if JL_LLVM_VERSION >= 160000 #include -#endif #include "julia.h" #define STR(csym) #csym #define XSTR(csym) STR(csym) -#if JL_LLVM_VERSION >= 160000 - -#include - -template -using Optional = std::optional; static constexpr std::nullopt_t None = std::nullopt; -#else - -#include - -#endif - enum AddressSpace { Generic = 0, Tracked = 10, @@ -180,7 +166,7 @@ static inline llvm::Instruction *tbaa_decorate(llvm::MDNode *md, llvm::Instructi using namespace llvm; inst->setMetadata(llvm::LLVMContext::MD_tbaa, md); if (llvm::isa(inst) && md && md == get_tbaa_const(md->getContext())) { - inst->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(md->getContext(), None)); + inst->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(md->getContext(), std::nullopt)); } return inst; } @@ -245,11 +231,7 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s if (!F) { FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_size->getPointerTo()}, false); F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M); -#if JL_LLVM_VERSION >= 160000 F->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); -#else - F->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); -#endif } builder.CreateCall(F, {signal_page}); } diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp index 05d62adc57926..a6e963664b0f3 100644 --- a/src/llvm-cpufeatures.cpp +++ b/src/llvm-cpufeatures.cpp @@ -37,7 +37,7 @@ STATISTIC(LoweredWithoutFMA, "Number of have_fma's that were lowered to false"); extern JuliaOJIT *jl_ExecutionEngine; // whether this platform unconditionally (i.e. without needing multiversioning) supports FMA -Optional always_have_fma(Function &intr, const Triple &TT) JL_NOTSAFEPOINT { +std::optional always_have_fma(Function &intr, const Triple &TT) JL_NOTSAFEPOINT { if (TT.isAArch64()) { auto intr_name = intr.getName(); auto typ = intr_name.substr(strlen("julia.cpu.have_fma.")); diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp index 8d80f7fd54877..baf844dffa89c 100644 --- a/src/llvm-julia-licm.cpp +++ b/src/llvm-julia-licm.cpp @@ -342,11 +342,7 @@ struct JuliaLICM : public JuliaPassContext { } } if (changed && SE) { -#if JL_LLVM_VERSION >= 160000 SE->forgetLoopDispositions(); -#else - SE->forgetLoopDispositions(L); -#endif } #ifdef JL_VERIFY_PASSES assert(!verifyLLVMIR(*L)); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index d95cc9c49b698..ff2cac6e49406 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1163,9 +1163,6 @@ State LateLowerGCFrame::LocalScan(Function &F) { } if (CI->hasStructRetAttr()) { Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType(); - #if JL_LLVM_VERSION < 170000 - assert(cast(CI->getArgOperand(0)->getType())->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType())); - #endif auto tracked = CountTrackedPointers(ElT, true); if (tracked.count) { AllocaInst *SRet = dyn_cast((CI->arg_begin()[0])->stripInBoundsOffsets()); @@ -1252,38 +1249,20 @@ State LateLowerGCFrame::LocalScan(Function &F) { callee->getName() == "memcmp") { continue; } -#if JL_LLVM_VERSION >= 160000 if (callee->getMemoryEffects().onlyReadsMemory() || callee->getMemoryEffects().onlyAccessesArgPointees()) { continue; } -#else - if (callee->hasFnAttribute(Attribute::ReadNone) || - callee->hasFnAttribute(Attribute::ReadOnly) || - callee->hasFnAttribute(Attribute::ArgMemOnly)) { - continue; - } -#endif if (MemTransferInst *MI = dyn_cast(CI)) { MaybeTrackDst(S, MI); } } -#if JL_LLVM_VERSION >= 160000 if (isa(CI) || CI->getMemoryEffects().onlyAccessesArgPointees() || CI->getMemoryEffects().onlyReadsMemory()) { // Intrinsics are never safepoints. continue; } -#else - if (isa(CI) || - CI->hasFnAttr(Attribute::ArgMemOnly) || - CI->hasFnAttr(Attribute::ReadNone) || - CI->hasFnAttr(Attribute::ReadOnly)) { - // Intrinsics are never safepoints. - continue; - } -#endif SmallVector CalleeRoots; for (Use &U : CI->args()) { // Find all callee rooted arguments. diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp index e09ea892ee488..c359bf6c117ce 100644 --- a/src/llvm-lower-handlers.cpp +++ b/src/llvm-lower-handlers.cpp @@ -8,11 +8,7 @@ #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include #include diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp index 22ef973decfe9..a76d076ebd6f3 100644 --- a/src/llvm-multiversioning.cpp +++ b/src/llvm-multiversioning.cpp @@ -15,11 +15,7 @@ #include #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include #include @@ -51,7 +47,7 @@ using namespace llvm; -extern Optional always_have_fma(Function&, const Triple &TT); +extern std::optional always_have_fma(Function&, const Triple &TT); namespace { constexpr uint32_t clone_mask = @@ -185,7 +181,7 @@ struct TargetSpec { } }; -static Optional> get_target_specs(Module &M) { +static std::optional> get_target_specs(Module &M) { auto md = M.getModuleFlag("julia.mv.specs"); if (!md) return None; diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index 4e9e4826b4f75..ca25251040fb2 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -129,9 +129,7 @@ namespace jl_intrinsics { static Function *addGCAllocAttributes(Function *target) { auto FnAttrs = AttrBuilder(target->getContext()); -#if JL_LLVM_VERSION >= 160000 FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef)); -#endif FnAttrs.addAllocKindAttr(AllocFnKind::Alloc); FnAttrs.addAttribute(Attribute::WillReturn); FnAttrs.addAttribute(Attribute::NoUnwind); @@ -228,11 +226,7 @@ namespace jl_intrinsics { false), Function::ExternalLinkage, QUEUE_GC_ROOT_NAME); -#if JL_LLVM_VERSION >= 160000 intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); -#else - intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); -#endif return intrinsic; }); @@ -248,11 +242,7 @@ namespace jl_intrinsics { false), Function::ExternalLinkage, SAFEPOINT_NAME); -#if JL_LLVM_VERSION >= 160000 intrinsic->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); -#else - intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); -#endif return intrinsic; }); } @@ -309,11 +299,7 @@ namespace jl_well_known { false), Function::ExternalLinkage, GC_QUEUE_ROOT_NAME); -#if JL_LLVM_VERSION >= 160000 func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); -#else - func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); -#endif return func; }); diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp index 4e5a2ee5e0d54..06a52ad3dcb43 100644 --- a/src/llvm-propagate-addrspaces.cpp +++ b/src/llvm-propagate-addrspaces.cpp @@ -163,22 +163,14 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc Instruction *InstV = cast(V); Instruction *NewV = InstV->clone(); ToInsert.push_back(std::make_pair(NewV, InstV)); - #if JL_LLVM_VERSION >= 170000 Type *NewRetTy = PointerType::get(InstV->getType(), allocaAddressSpace); - #else - Type *NewRetTy = PointerType::getWithSamePointeeType(cast(InstV->getType()), allocaAddressSpace); - #endif NewV->mutateType(NewRetTy); LiftingMap[InstV] = NewV; ToRevisit.push_back(NewV); } } auto CollapseCastsAndLift = [&](Value *CurrentV, Instruction *InsertPt) -> Value * { - #if JL_LLVM_VERSION >= 170000 PointerType *TargetType = PointerType::get(CurrentV->getType(), allocaAddressSpace); - #else - PointerType *TargetType = PointerType::getWithSamePointeeType(cast(CurrentV->getType()), allocaAddressSpace); - #endif while (!LiftingMap.count(CurrentV)) { if (isa(CurrentV)) CurrentV = cast(CurrentV)->getOperand(0); @@ -192,17 +184,7 @@ Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Module *M, Value *V, Instruc } if (LiftingMap.count(CurrentV)) CurrentV = LiftingMap[CurrentV]; - #if JL_LLVM_VERSION >= 170000 assert(CurrentV->getType() == TargetType); - #else - if (CurrentV->getType() != TargetType) { - // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine - assert(CurrentV->getContext().supportsTypedPointers()); - auto *BCI = new BitCastInst(CurrentV, TargetType); - ToInsert.push_back(std::make_pair(BCI, InsertPt)); - CurrentV = BCI; - } - #endif return CurrentV; }; diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index 614ed15f840e6..e36136859517a 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -9,11 +9,7 @@ #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include #include @@ -67,11 +63,7 @@ struct LowerPTLS { void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const { -#if JL_LLVM_VERSION >= 160000 pgcstack->addFnAttr(Attribute::getWithMemoryEffects(pgcstack->getContext(), MemoryEffects::none())); -#else - addFnAttr(pgcstack, Attribute::ReadNone); -#endif addFnAttr(pgcstack, Attribute::NoUnwind); } diff --git a/src/llvm-remove-addrspaces.cpp b/src/llvm-remove-addrspaces.cpp index 89ae1d292d108..bb492f467e74c 100644 --- a/src/llvm-remove-addrspaces.cpp +++ b/src/llvm-remove-addrspaces.cpp @@ -44,19 +44,7 @@ class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper { DstTy = SrcTy; if (auto Ty = dyn_cast(SrcTy)) { - #if JL_LLVM_VERSION >= 170000 DstTy = PointerType::get(Ty->getContext(), ASRemapper(Ty->getAddressSpace())); - #else - if (Ty->isOpaque()) { - DstTy = PointerType::get(Ty->getContext(), ASRemapper(Ty->getAddressSpace())); - } - else { - //Remove once opaque pointer transition is complete - DstTy = PointerType::get( - remapType(Ty->getNonOpaquePointerElementType()), - ASRemapper(Ty->getAddressSpace())); - } - #endif } else if (auto Ty = dyn_cast(SrcTy)) { SmallVector Params; @@ -157,24 +145,8 @@ class AddrspaceRemoveValueMaterializer : public ValueMaterializer { Ops.push_back(NewOp ? cast(NewOp) : Op); } - #if JL_LLVM_VERSION >= 170000 if (CE->getOpcode() != Instruction::GetElementPtr) DstV = CE->getWithOperands(Ops, Ty); - #else - if (CE->getOpcode() == Instruction::GetElementPtr) { - // GEP const exprs need to know the type of the source. - // asserts remapType(typeof arg0) == typeof mapValue(arg0). - Constant *Src = CE->getOperand(0); - auto ptrty = cast(Src->getType()->getScalarType()); - //Remove once opaque pointer transition is complete - if (!ptrty->isOpaque()) { - Type *SrcTy = remapType(ptrty->getNonOpaquePointerElementType()); - DstV = CE->getWithOperands(Ops, Ty, false, SrcTy); - } - } - else - DstV = CE->getWithOperands(Ops, Ty); - #endif } } diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp index e12b30e3db466..3faa9d9728e67 100644 --- a/src/llvm-simdloop.cpp +++ b/src/llvm-simdloop.cpp @@ -297,11 +297,7 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution } if (SE) -#if JL_LLVM_VERSION >= 160000 SE->forgetLoopDispositions(); -#else - SE->forgetLoopDispositions(&L); -#endif } #ifdef JL_VERIFY_PASSES diff --git a/src/llvm-version.h b/src/llvm-version.h index 984e918d480cc..061d80deb02f9 100644 --- a/src/llvm-version.h +++ b/src/llvm-version.h @@ -10,12 +10,8 @@ #define JL_LLVM_VERSION (LLVM_VERSION_MAJOR * 10000 + LLVM_VERSION_MINOR * 100 \ + LLVM_VERSION_PATCH) -#if JL_LLVM_VERSION < 150000 - #error Only LLVM versions >= 15.0.0 are supported by Julia -#endif - -#if JL_LLVM_VERSION >= 160000 - #define JL_LLVM_OPAQUE_POINTERS 1 +#if JL_LLVM_VERSION < 170000 + #error Only LLVM versions >= 17.0.0 are supported by Julia #endif #if JL_LLVM_VERSION < 19000 && defined(_CPU_RISCV64_) diff --git a/src/pipeline.cpp b/src/pipeline.cpp index f8976099ee53c..8c9054c0d65ff 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -150,15 +150,9 @@ namespace { // Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn(); // MPM.addPass(RequireAnalysisPass()); //Let's assume the defaults are actually fine for our purposes - #if JL_LLVM_VERSION < 160000 - // MPM.addPass(ModuleAddressSanitizerPass( - // Opts, UseGlobalGC, UseOdrIndicator, DestructorKind)); - MPM.addPass(ModuleAddressSanitizerPass(AddressSanitizerOptions())); - #else // LLVM 16+ // MPM.addPass(AddressSanitizerPass( // Opts, UseGlobalGC, UseOdrIndicator, DestructorKind)); MPM.addPass(AddressSanitizerPass(AddressSanitizerOptions(), true, false)); - #endif // } }; ASanPass(/*SanitizerKind::Address, */false); @@ -347,12 +341,8 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder FPM.addPass(DCEPass()); FPM.addPass(SimplifyCFGPass(basicSimplifyCFGOptions())); if (O.getSpeedupLevel() >= 1) { -#if JL_LLVM_VERSION >= 160000 // TODO check the LLVM 15 default. FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); -#else - FPM.addPass(SROAPass()); -#endif } MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } @@ -389,12 +379,8 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, if (O.getSpeedupLevel() >= 1) { FunctionPassManager FPM; if (O.getSpeedupLevel() >= 2) { -#if JL_LLVM_VERSION >= 160000 // TODO check the LLVM 15 default. FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); -#else - FPM.addPass(SROAPass()); -#endif // SROA can duplicate PHI nodes which can block LowerSIMD FPM.addPass(InstCombinePass()); FPM.addPass(JumpThreadingPass()); @@ -468,12 +454,8 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder * if (options.enable_scalar_optimizations) { if (O.getSpeedupLevel() >= 2) { JULIA_PASS(FPM.addPass(AllocOptPass())); - #if JL_LLVM_VERSION >= 160000 // TODO check the LLVM 15 default. FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); - #else - FPM.addPass(SROAPass()); - #endif FPM.addPass(InstSimplifyPass()); FPM.addPass(GVNPass()); FPM.addPass(MemCpyOptPass()); @@ -737,12 +719,7 @@ void NewPM::run(Module &M) { //We must recreate the analysis managers every time //so that analyses from previous runs of the pass manager //do not hang around for the next run -#if JL_LLVM_VERSION >= 160000 StandardInstrumentations SI(M.getContext(),false); -#else - StandardInstrumentations SI(false); -#endif -#if JL_LLVM_VERSION >= 170000 PassInstrumentationCallbacks PIC; adjustPIC(PIC); TimePasses.registerCallbacks(PIC); @@ -752,17 +729,6 @@ void NewPM::run(Module &M) { ModuleAnalysisManager MAM; SI.registerCallbacks(PIC, &MAM); SI.getTimePasses().setOutStream(nulls()); //TODO: figure out a better way of doing this -#else - FunctionAnalysisManager FAM(createFAM(O, *TM.get())); - PassInstrumentationCallbacks PIC; - adjustPIC(PIC); - TimePasses.registerCallbacks(PIC); - SI.registerCallbacks(PIC, &FAM); - SI.getTimePasses().setOutStream(nulls()); //TODO: figure out a better way of doing this - LoopAnalysisManager LAM; - CGSCCAnalysisManager CGAM; - ModuleAnalysisManager MAM; -#endif PassBuilder PB(TM.get(), PipelineTuningOptions(), None, &PIC); PB.registerLoopAnalyses(LAM); PB.registerFunctionAnalyses(FAM); @@ -794,7 +760,7 @@ OptimizationLevel getOptLevel(int optlevel) { } //This part is also basically stolen from LLVM's PassBuilder.cpp file -static Optional> parseJuliaPipelineOptions(StringRef name) { +static std::optional> parseJuliaPipelineOptions(StringRef name) { if (name.consume_front("julia")) { auto O = OptimizationLevel::O2; auto options = OptimizationOptions::defaults(); diff --git a/src/processor.cpp b/src/processor.cpp index bc12f5b54be19..3edebcc2f3ae6 100644 --- a/src/processor.cpp +++ b/src/processor.cpp @@ -7,11 +7,7 @@ #include #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include @@ -162,11 +158,7 @@ struct FeatureList { { int cnt = 0; for (size_t i = 0; i < n; i++) - #if JL_LLVM_VERSION >= 170000 cnt += llvm::popcount(eles[i]); - #else - cnt += llvm::countPopulation(eles[i]); - #endif return cnt; } inline bool empty() const diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp index 56f8487d8fb73..2a6cb00961594 100644 --- a/src/runtime_ccall.cpp +++ b/src/runtime_ccall.cpp @@ -4,11 +4,7 @@ #include #include #include -#if JL_LLVM_VERSION >= 170000 #include -#else -#include -#endif #include #include "julia.h" diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 450096eef5b01..80281b733bf44 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -1574,7 +1574,6 @@ bi_iintrinsic_cnvtb_fast(LLVMAShr, ashr_op, ashr_int, , 1) //un_iintrinsic_fast(LLVMByteSwap, bswap_op, bswap_int, u) un_iintrinsic_slow(LLVMByteSwap, bswap_int, u) //#define ctpop_op(a) __builtin_ctpop(a) -#if JL_LLVM_VERSION >= 170000 //uu_iintrinsic_fast(LLVMPopcount, ctpop_op, ctpop_int, u) uu_iintrinsic_slow(LLVMPopcount, ctpop_int, u) //#define ctlz_op(a) __builtin_ctlz(a) @@ -1583,16 +1582,6 @@ uu_iintrinsic_slow(LLVMCountl_zero, ctlz_int, u) //#define cttz_op(a) __builtin_cttz(a) //uu_iintrinsic_fast(LLVMCountr_zero, cttz_op, cttz_int, u) uu_iintrinsic_slow(LLVMCountr_zero, cttz_int, u) -#else -//uu_iintrinsic_fast(LLVMCountPopulation, ctpop_op, ctpop_int, u) -uu_iintrinsic_slow(LLVMCountPopulation, ctpop_int, u) -//#define ctlz_op(a) __builtin_ctlz(a) -//uu_iintrinsic_fast(LLVMCountLeadingZeros, ctlz_op, ctlz_int, u) -uu_iintrinsic_slow(LLVMCountLeadingZeros, ctlz_int, u) -//#define cttz_op(a) __builtin_cttz(a) -//uu_iintrinsic_fast(LLVMCountTrailingZeros, cttz_op, cttz_int, u) -uu_iintrinsic_slow(LLVMCountTrailingZeros, cttz_int, u) -#endif #define not_op(a) ~a un_iintrinsic_fast(LLVMFlipAllBits, not_op, not_int, u) diff --git a/src/support/win32-clang-ABI-bug/optional b/src/support/win32-clang-ABI-bug/optional index a2ecdad1e33ce..fd2f7646e1766 100644 --- a/src/support/win32-clang-ABI-bug/optional +++ b/src/support/win32-clang-ABI-bug/optional @@ -57,7 +57,6 @@ namespace optional_detail { // // The move constructible / assignable conditions emulate the remaining behavior // of std::is_trivially_copyable. -#if JL_LLVM_VERSION >= 170000 template ::value && std::is_trivially_copy_assignable::value && @@ -65,15 +64,6 @@ template ::value) && (std::is_trivially_move_assignable::value || !std::is_move_assignable::value))> -#else -template ::value && - std::is_trivially_copy_assignable::value && - (llvm::is_trivially_move_constructible::value || - !std::is_move_constructible::value) && - (std::is_trivially_move_assignable::value || - !std::is_move_assignable::value))> -#endif class OptionalStorage { union { char empty; From fe2617378cb6ea56a1b5e41ff33b7ad34bfa71ac Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 6 Dec 2024 05:32:45 -0700 Subject: [PATCH 20/25] Address some post-commit review from #56660 (#56747) Some more questions still to be answered, but this should address the immediate actionable review items. --- .../extras/CompilerDevTools/src/CompilerDevTools.jl | 12 ++---------- Compiler/src/abstractinterpretation.jl | 9 +++++---- Compiler/src/stmtinfo.jl | 2 +- base/docs/basedocs.jl | 4 ++-- base/optimized_generics.jl | 2 +- src/builtins.c | 10 ++++++---- 6 files changed, 17 insertions(+), 22 deletions(-) diff --git a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl index cd3f7b7b4bdac..5d0df5ccaa4e4 100644 --- a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl +++ b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl @@ -34,19 +34,11 @@ import Core.OptimizedGenerics.CompilerPlugins: typeinf, typeinf_edge Compiler.typeinf_edge(interp, mi.def, mi.specTypes, Core.svec(), parent_frame, false, false) end -# TODO: This needs special compiler support to properly case split for multiple -# method matches, etc. -@noinline function mi_for_tt(tt, world=Base.tls_world_age()) - interp = SplitCacheInterp(; world) - match, _ = Compiler.findsup(tt, Compiler.method_table(interp)) - Base.specialize_method(match) -end - function with_new_compiler(f, args...) - tt = Base.signature_type(f, typeof(args)) + mi = @ccall jl_method_lookup(Any[f, args...]::Ptr{Any}, (1+length(args))::Csize_t, Base.tls_world_age()::Csize_t)::Ref{Core.MethodInstance} world = Base.tls_world_age() new_compiler_ci = Core.OptimizedGenerics.CompilerPlugins.typeinf( - SplitCacheOwner(), mi_for_tt(tt), Compiler.SOURCE_MODE_ABI + SplitCacheOwner(), mi, Compiler.SOURCE_MODE_ABI ) invoke(f, new_compiler_ci, args...) end diff --git a/Compiler/src/abstractinterpretation.jl b/Compiler/src/abstractinterpretation.jl index 24daaf1e6f626..2efd8a2b7264f 100644 --- a/Compiler/src/abstractinterpretation.jl +++ b/Compiler/src/abstractinterpretation.jl @@ -2223,11 +2223,12 @@ function abstract_invoke(interp::AbstractInterpreter, arginfo::ArgInfo, si::Stmt if isa(method_or_ci, CodeInstance) our_world = sv.world.this argtype = argtypes_to_type(pushfirst!(argtype_tail(argtypes, 4), ft)) - sig = method_or_ci.def.specTypes + specsig = method_or_ci.def.specTypes + defdef = method_or_ci.def.def exct = method_or_ci.exctype - if !hasintersect(argtype, sig) + if !hasintersect(argtype, specsig) return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo())) - elseif !(argtype <: sig) + elseif !(argtype <: specsig) || (isa(defdef, Method) && !(argtype <: defdef.sig)) exct = Union{exct, TypeError} end callee_valid_range = WorldRange(method_or_ci.min_world, method_or_ci.max_world) @@ -2257,7 +2258,7 @@ function abstract_invoke(interp::AbstractInterpreter, arginfo::ArgInfo, si::Stmt # Fall through to generic invoke handling end else - widenconst(types) >: Union{Method, CodeInstance} && return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) + hasintersect(widenconst(types), Union{Method, CodeInstance}) && return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3), false) isexact || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) unwrapped = unwrap_unionall(types) diff --git a/Compiler/src/stmtinfo.jl b/Compiler/src/stmtinfo.jl index 9f0f1f38d4c8a..4f55f068e9456 100644 --- a/Compiler/src/stmtinfo.jl +++ b/Compiler/src/stmtinfo.jl @@ -277,7 +277,7 @@ struct InvokeCICallInfo <: CallInfo edge::CodeInstance end add_edges_impl(edges::Vector{Any}, info::InvokeCICallInfo) = - add_one_edge!(edges, info.edge) + add_inlining_edge!(edges, info.edge) """ info::InvokeCallInfo diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl index 141950f5e92ff..c62c980fae1e1 100644 --- a/base/docs/basedocs.jl +++ b/base/docs/basedocs.jl @@ -2064,8 +2064,8 @@ to `invoke(f, method.sig, args...)`. The `argtypes` argument may be a `CodeInstance`, bypassing both method lookup and specialization. The semantics of this invocation are similar to a function pointer call of the `CodeInstance`'s `invoke` pointer. It is an error to invoke a `CodeInstance` with arguments that do not match its -parent MethodInstance or from a world age not included in the `min_world`/`max_world` range. -It is undefined behavior to invoke a CodeInstance whose behavior does not match the constraints +parent `MethodInstance` or from a world age not included in the `min_world`/`max_world` range. +It is undefined behavior to invoke a `CodeInstance` whose behavior does not match the constraints specified in its fields. For some code instances with `owner !== nothing` (i.e. those generated by external compilers), it may be an error to invoke them after passing through precompilation. This is an advanced interface intended for use with external compiler plugins. diff --git a/base/optimized_generics.jl b/base/optimized_generics.jl index c0b953777ca94..6b1d146b6172b 100644 --- a/base/optimized_generics.jl +++ b/base/optimized_generics.jl @@ -61,7 +61,7 @@ end Implements a pair of functions `typeinf`/`typeinf_edge`. When the optimizer sees a call to `typeinf`, it has license to instead call `typeinf_edge`, supplying the current inference stack in `parent_frame` (but otherwise supplying the arguments -to `typeinf`). typeinf_edge will return the `CodeInstance` that `typeinf` would +to `typeinf`). `typeinf_edge` will return the `CodeInstance` that `typeinf` would have returned at runtime. The optimizer may perform a non-IPO replacement of the call to `typeinf` by the result of `typeinf_edge`. In addition, the IPO-safe fields of the `CodeInstance` may be propagated in IPO mode. diff --git a/src/builtins.c b/src/builtins.c index 3f555da9d2a83..4b9c1ad6043c2 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1590,8 +1590,10 @@ JL_CALLABLE(jl_f_invoke) } else if (jl_is_code_instance(argtypes)) { jl_code_instance_t *codeinst = (jl_code_instance_t*)args[1]; jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke); - if (jl_tuple1_isa(args[0], &args[2], nargs - 2, (jl_datatype_t*)codeinst->def->specTypes)) { - jl_type_error("invoke: argument type error", codeinst->def->specTypes, arg_tuple(args[0], &args[2], nargs - 2)); + // N.B.: specTypes need not be a subtype of the method signature. We need to check both. + if (!jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)codeinst->def->specTypes) || + (jl_is_method(codeinst->def->def.value) && !jl_tuple1_isa(args[0], &args[2], nargs - 1, (jl_datatype_t*)codeinst->def->def.method->sig))) { + jl_type_error("invoke: argument type error", codeinst->def->specTypes, arg_tuple(args[0], &args[2], nargs - 1)); } if (jl_atomic_load_relaxed(&codeinst->min_world) > jl_current_task->world_age || jl_current_task->world_age > jl_atomic_load_relaxed(&codeinst->max_world)) { @@ -1604,10 +1606,10 @@ JL_CALLABLE(jl_f_invoke) if (invoke) { return invoke(args[0], &args[2], nargs - 2, codeinst); } else { - if (codeinst->owner != jl_nothing || !jl_is_method(codeinst->def->def.value)) { + if (codeinst->owner != jl_nothing) { jl_error("Failed to invoke or compile external codeinst"); } - return jl_gf_invoke_by_method(codeinst->def->def.method, args[0], &args[2], nargs - 1); + return jl_invoke(args[0], &args[2], nargs - 1, codeinst->def); } } if (!jl_is_tuple_type(jl_unwrap_unionall(argtypes))) From 184ad5bcc955a86638cbc4cc3147b12f0eafb513 Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Fri, 6 Dec 2024 22:09:44 +0000 Subject: [PATCH 21/25] Add per-task metrics (#56320) Close https://github.com/JuliaLang/julia/issues/47351 (builds on top of https://github.com/JuliaLang/julia/pull/48416) Adds two per-task metrics: - running time = amount of time the task was actually running (according to our scheduler). Note: currently inclusive of GC time, but would be good to be able to separate that out (in a future PR) - wall time = amount of time between the scheduler becoming aware of this task and the task entering a terminal state (i.e. done or failed). We record running time in `wait()`, where the scheduler stops running the task as well as in `yield(t)`, `yieldto(t)` and `throwto(t)`, which bypass the scheduler. Other places where a task stops running (for `Channel`, `ReentrantLock`, `Event`, `Timer` and `Semaphore` are all implemented in terms of `wait(Condition)`, which in turn calls `wait()`. `LibuvStream` similarly calls `wait()`. This should capture everything (albeit, slightly over-counting task CPU time by including any enqueuing work done before we hit `wait()`). The various metrics counters could be a separate inlined struct if we think that's a useful abstraction, but for now i've just put them directly in `jl_task_t`. They are all atomic, except the `metrics_enabled` flag itself (which we now have to check on task start/switch/done even if metrics are not enabled) which is set on task construction and marked `const` on the julia side. In future PRs we could add more per-task metrics, e.g. compilation time, GC time, allocations, potentially a wait-time breakdown (time waiting on locks, channels, in the scheduler run queue, etc.), potentially the number of yields. Perhaps in future there could be ways to enable this on a per-thread and per-task basis. And potentially in future these same timings could be used by `@time` (e.g. writing this same timing data to a ScopedValue like in https://github.com/JuliaLang/julia/pull/55103 but only for tasks lexically scoped to inside the `@time` block). Timings are off by default but can be turned on globally via starting Julia with `--task-metrics=yes` or calling `Base.Experimental.task_metrics(true)`. Metrics are collected for all tasks created when metrics are enabled. In other words, enabling/disabling timings via `Base.Experimental.task_metrics` does not affect existing `Task`s, only new `Task`s. The other new APIs are `Base.Experimental.task_running_time_ns(::Task)` and `Base.Experimental.task_wall_time_ns(::Task)` for retrieving the new metrics. These are safe to call on any task (including the current task, or a task running on another thread). All these are in `Base.Experimental` to give us room to change up the APIs as we add more metrics in future PRs (without worrying about release timelines). cc @NHDaly @kpamnany @d-netto --------- Co-authored-by: Pete Vilter Co-authored-by: K Pamnany Co-authored-by: Nathan Daly Co-authored-by: Valentin Churavy --- NEWS.md | 5 + base/boot.jl | 26 ++- base/experimental.jl | 74 ++++++++ base/options.jl | 1 + base/task.jl | 53 +++++- doc/man/julia.1 | 4 + doc/src/base/multi-threading.md | 8 + doc/src/manual/command-line-interface.md | 1 + src/init.c | 4 + src/jlapi.c | 22 +++ src/jloptions.c | 12 ++ src/jloptions.h | 1 + src/jltypes.c | 46 ++++- src/julia.h | 26 ++- src/julia_internal.h | 3 + src/task.c | 31 ++++ src/threading.c | 2 + test/cmdlineargs.jl | 9 + test/core.jl | 3 +- test/threads_exec.jl | 224 +++++++++++++++++++++++ 20 files changed, 529 insertions(+), 26 deletions(-) diff --git a/NEWS.md b/NEWS.md index b77a786c24823..641b84b480bb4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -17,6 +17,11 @@ New language features - atomic set once (`@atomiconce v[3] = 2`), - atomic swap (`x = @atomicswap v[3] = 2`), and - atomic replace (`x = @atomicreplace v[3] 2=>5`). +- New option `--task-metrics=yes` to enable the collection of per-task timing information, + which can also be enabled/disabled at runtime with `Base.Experimental.task_metrics(::Bool)`. ([#56320]) + The available metrics are: + - actual running time for the task (`Base.Experimental.task_running_time_ns`), and + - wall-time for the task (`Base.Experimental.task_wall_time_ns`). Language changes ---------------- diff --git a/base/boot.jl b/base/boot.jl index f66ee69780193..4badedae3cfb7 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -175,15 +175,33 @@ #end #mutable struct Task -# parent::Task +# next::Any +# queue::Any # storage::Any -# state::Symbol # donenotify::Any # result::Any -# exception::Any -# backtrace::Any # scope::Any # code::Any +# @atomic _state::UInt8 +# sticky::UInt8 +# priority::UInt16 +# @atomic _isexception::UInt8 +# pad00::UInt8 +# pad01::UInt8 +# pad02::UInt8 +# rngState0::UInt64 +# rngState1::UInt64 +# rngState2::UInt64 +# rngState3::UInt64 +# rngState4::UInt64 +# const metrics_enabled::Bool +# pad10::UInt8 +# pad11::UInt8 +# pad12::UInt8 +# @atomic first_enqueued_at::UInt64 +# @atomic last_started_running_at::UInt64 +# @atomic running_time_ns::UInt64 +# @atomic finished_at::UInt64 #end export diff --git a/base/experimental.jl b/base/experimental.jl index 411bb2407cdc5..17871b4f346d6 100644 --- a/base/experimental.jl +++ b/base/experimental.jl @@ -503,4 +503,78 @@ usage, by eliminating the tracking of those possible invalidation. """ disable_new_worlds() = ccall(:jl_disable_new_worlds, Cvoid, ()) +### Task metrics + +""" + Base.Experimental.task_metrics(::Bool) + +Enable or disable the collection of per-task metrics. +A `Task` created when `Base.Experimental.task_metrics(true)` is in effect will have +[`Base.Experimental.task_running_time_ns`](@ref) and [`Base.Experimental.task_wall_time_ns`](@ref) +timing information available. + +!!! note + Task metrics can be enabled at start-up via the `--task-metrics=yes` command line option. +""" +function task_metrics(b::Bool) + if b + ccall(:jl_task_metrics_enable, Cvoid, ()) + else + ccall(:jl_task_metrics_disable, Cvoid, ()) + end + return nothing end + +""" + Base.Experimental.task_running_time_ns(t::Task) -> Union{UInt64, Nothing} + +Return the total nanoseconds that the task `t` has spent running. +This metric is only updated when `t` yields or completes unless `t` is the current task, in +which it will be updated continuously. +See also [`Base.Experimental.task_wall_time_ns`](@ref). + +Returns `nothing` if task timings are not enabled. +See [`Base.Experimental.task_metrics`](@ref). + +!!! note "This metric is from the Julia scheduler" + A task may be running on an OS thread that is descheduled by the OS + scheduler, this time still counts towards the metric. + +!!! compat "Julia 1.12" + This method was added in Julia 1.12. +""" +function task_running_time_ns(t::Task=current_task()) + t.metrics_enabled || return nothing + if t == current_task() + # These metrics fields can't update while we're running. + # But since we're running we need to include the time since we last started running! + return t.running_time_ns + (time_ns() - t.last_started_running_at) + else + return t.running_time_ns + end +end + +""" + Base.Experimental.task_wall_time_ns(t::Task) -> Union{UInt64, Nothing} + +Return the total nanoseconds that the task `t` was runnable. +This is the time since the task first entered the run queue until the time at which it +completed, or until the current time if the task has not yet completed. +See also [`Base.Experimental.task_running_time_ns`](@ref). + +Returns `nothing` if task timings are not enabled. +See [`Base.Experimental.task_metrics`](@ref). + +!!! compat "Julia 1.12" + This method was added in Julia 1.12. +""" +function task_wall_time_ns(t::Task=current_task()) + t.metrics_enabled || return nothing + start_at = t.first_enqueued_at + start_at == 0 && return UInt64(0) + end_at = t.finished_at + end_at == 0 && return time_ns() - start_at + return end_at - start_at +end + +end # module diff --git a/base/options.jl b/base/options.jl index 07baa3b51f65b..7e7808bd5c047 100644 --- a/base/options.jl +++ b/base/options.jl @@ -61,6 +61,7 @@ struct JLOptions heap_size_hint::UInt64 trace_compile_timing::Int8 trim::Int8 + task_metrics::Int8 end # This runs early in the sysimage != is not defined yet diff --git a/base/task.jl b/base/task.jl index 2a922c4b85f24..951e980ee903c 100644 --- a/base/task.jl +++ b/base/task.jl @@ -977,7 +977,11 @@ function enq_work(t::Task) return t end -schedule(t::Task) = enq_work(t) +function schedule(t::Task) + # [task] created -scheduled-> wait_time + maybe_record_enqueued!(t) + enq_work(t) +end """ schedule(t::Task, [val]; error=false) @@ -1031,6 +1035,8 @@ function schedule(t::Task, @nospecialize(arg); error=false) t.queue === nothing || Base.error("schedule: Task not runnable") setfield!(t, :result, arg) end + # [task] created -scheduled-> wait_time + maybe_record_enqueued!(t) enq_work(t) return t end @@ -1064,11 +1070,15 @@ immediately yields to `t` before calling the scheduler. Throws a `ConcurrencyViolationError` if `t` is the currently running task. """ function yield(t::Task, @nospecialize(x=nothing)) - current = current_task() - t === current && throw(ConcurrencyViolationError("Cannot yield to currently running task!")) + ct = current_task() + t === ct && throw(ConcurrencyViolationError("Cannot yield to currently running task!")) (t._state === task_state_runnable && t.queue === nothing) || throw(ConcurrencyViolationError("yield: Task not runnable")) + # [task] user_time -yield-> wait_time + record_running_time!(ct) + # [task] created -scheduled-> wait_time + maybe_record_enqueued!(t) t.result = x - enq_work(current) + enq_work(ct) set_next_task(t) return try_yieldto(ensure_rescheduled) end @@ -1082,6 +1092,7 @@ call to `yieldto`. This is a low-level call that only switches tasks, not consid or scheduling in any way. Its use is discouraged. """ function yieldto(t::Task, @nospecialize(x=nothing)) + ct = current_task() # TODO: these are legacy behaviors; these should perhaps be a scheduler # state error instead. if t._state === task_state_done @@ -1089,6 +1100,10 @@ function yieldto(t::Task, @nospecialize(x=nothing)) elseif t._state === task_state_failed throw(t.result) end + # [task] user_time -yield-> wait_time + record_running_time!(ct) + # [task] created -scheduled-unfairly-> wait_time + maybe_record_enqueued!(t) t.result = x set_next_task(t) return try_yieldto(identity) @@ -1102,6 +1117,10 @@ function try_yieldto(undo) rethrow() end ct = current_task() + # [task] wait_time -(re)started-> user_time + if ct.metrics_enabled + @atomic :monotonic ct.last_started_running_at = time_ns() + end if ct._isexception exc = ct.result ct.result = nothing @@ -1115,6 +1134,11 @@ end # yield to a task, throwing an exception in it function throwto(t::Task, @nospecialize exc) + ct = current_task() + # [task] user_time -yield-> wait_time + record_running_time!(ct) + # [task] created -scheduled-unfairly-> wait_time + maybe_record_enqueued!(t) t.result = exc t._isexception = true set_next_task(t) @@ -1167,6 +1191,9 @@ checktaskempty = Partr.multiq_check_empty end function wait() + ct = current_task() + # [task] user_time -yield-or-done-> wait_time + record_running_time!(ct) GC.safepoint() W = workqueue_for(Threads.threadid()) poptask(W) @@ -1181,3 +1208,21 @@ if Sys.iswindows() else pause() = ccall(:pause, Cvoid, ()) end + +# update the `running_time_ns` field of `t` to include the time since it last started running. +function record_running_time!(t::Task) + if t.metrics_enabled && !istaskdone(t) + @atomic :monotonic t.running_time_ns += time_ns() - t.last_started_running_at + end + return t +end + +# if this is the first time `t` has been added to the run queue +# (or the first time it has been unfairly yielded to without being added to the run queue) +# then set the `first_enqueued_at` field to the current time. +function maybe_record_enqueued!(t::Task) + if t.metrics_enabled && t.first_enqueued_at == 0 + @atomic :monotonic t.first_enqueued_at = time_ns() + end + return t +end diff --git a/doc/man/julia.1 b/doc/man/julia.1 index 56cb690d66eeb..2da11ae1b3f18 100644 --- a/doc/man/julia.1 +++ b/doc/man/julia.1 @@ -294,6 +294,10 @@ If --trace-compile is enabled show how long each took to compile in ms --trace-dispatch={stderr|name} Print precompile statements for methods dispatched during execution or save to stderr or a path. +.TP +--task-metrics={yes|no*} +Enable the collection of per-task metrics. + .TP -image-codegen Force generate code in imaging mode diff --git a/doc/src/base/multi-threading.md b/doc/src/base/multi-threading.md index 9e3bc49acf6dc..81d1d83d765ac 100644 --- a/doc/src/base/multi-threading.md +++ b/doc/src/base/multi-threading.md @@ -65,3 +65,11 @@ These building blocks are used to create the regular synchronization objects. ```@docs Base.Threads.SpinLock ``` + +## Task metrics (Experimental) + +```@docs +Base.Experimental.task_metrics +Base.Experimental.task_running_time_ns +Base.Experimental.task_wall_time_ns +``` diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md index 734d7031db5e8..9b06deaf0ea8a 100644 --- a/doc/src/manual/command-line-interface.md +++ b/doc/src/manual/command-line-interface.md @@ -203,6 +203,7 @@ The following is a complete list of command-line switches available when launchi |`--code-coverage=tracefile.info` |Append coverage information to the LCOV tracefile (filename supports format tokens).| |`--track-allocation[={none*\|user\|all}]` |Count bytes allocated by each source line (omitting setting is equivalent to "user")| |`--track-allocation=@` |Count bytes but only in files that fall under the given file path/directory. The `@` prefix is required to select this option. A `@` with no path will track the current directory.| +|`--task-metrics={yes\|no*}` |Enable the collection of per-task metrics| |`--bug-report=KIND` |Launch a bug report session. It can be used to start a REPL, run a script, or evaluate expressions. It first tries to use BugReporting.jl installed in current environment and falls back to the latest compatible BugReporting.jl if not. For more information, see `--bug-report=help`.| |`--heap-size-hint=` |Forces garbage collection if memory usage is higher than the given value. The value may be specified as a number of bytes, optionally in units of KB, MB, GB, or TB, or as a percentage of physical memory with %.| |`--compile={yes*\|no\|all\|min}` |Enable or disable JIT compiler, or request exhaustive or minimal compilation| diff --git a/src/init.c b/src/init.c index 1cd14e8556cc6..7b41e63e98455 100644 --- a/src/init.c +++ b/src/init.c @@ -849,6 +849,10 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) #if defined(_COMPILER_GCC_) && __GNUC__ >= 12 #pragma GCC diagnostic ignored "-Wdangling-pointer" #endif + if (jl_options.task_metrics == JL_OPTIONS_TASK_METRICS_ON) { + // enable before creating the root task so it gets timings too. + jl_atomic_fetch_add(&jl_task_metrics_enabled, 1); + } // warning: this changes `jl_current_task`, so be careful not to call that from this function jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi); #pragma GCC diagnostic pop diff --git a/src/jlapi.c b/src/jlapi.c index a3621385a437e..defb2db6ac911 100644 --- a/src/jlapi.c +++ b/src/jlapi.c @@ -809,6 +809,28 @@ JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void) return jl_atomic_load_relaxed(&jl_cumulative_recompile_time); } +/** + * @brief Enable per-task timing. + */ +JL_DLLEXPORT void jl_task_metrics_enable(void) +{ + // Increment the flag to allow reentrant callers. + jl_atomic_fetch_add(&jl_task_metrics_enabled, 1); +} + +/** + * @brief Disable per-task timing. + */ +JL_DLLEXPORT void jl_task_metrics_disable(void) +{ + // Prevent decrementing the counter below zero + uint8_t enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled); + while (enabled > 0) { + if (jl_atomic_cmpswap(&jl_task_metrics_enabled, &enabled, enabled-1)) + break; + } +} + /** * @brief Retrieve floating-point environment constants. * diff --git a/src/jloptions.c b/src/jloptions.c index f81cf0453db21..c68b5ce193d98 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -152,6 +152,7 @@ JL_DLLEXPORT void jl_init_options(void) 0, // heap-size-hint 0, // trace_compile_timing JL_TRIM_NO, // trim + 0, // task_metrics }; jl_options_initialized = 1; } @@ -316,6 +317,7 @@ static const char opts_hidden[] = " comment if color is not supported\n" " --trace-compile-timing If --trace-compile is enabled show how long each took to\n" " compile in ms\n" + " --task-metrics={yes|no*} Enable collection of per-task timing data.\n" " --image-codegen Force generate code in imaging mode\n" " --permalloc-pkgimg={yes|no*} Copy the data section of package images into memory\n" " --trim={no*|safe|unsafe|unsafe-warn}\n" @@ -347,6 +349,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) opt_trace_compile, opt_trace_compile_timing, opt_trace_dispatch, + opt_task_metrics, opt_math_mode, opt_worker, opt_bind_to, @@ -427,6 +430,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) { "trace-compile", required_argument, 0, opt_trace_compile }, { "trace-compile-timing", no_argument, 0, opt_trace_compile_timing }, { "trace-dispatch", required_argument, 0, opt_trace_dispatch }, + { "task-metrics", required_argument, 0, opt_task_metrics }, { "math-mode", required_argument, 0, opt_math_mode }, { "handle-signals", required_argument, 0, opt_handle_signals }, // hidden command line options @@ -978,6 +982,14 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) else jl_errorf("julia: invalid argument to --trim={safe|no|unsafe|unsafe-warn} (%s)", optarg); break; + case opt_task_metrics: + if (!strcmp(optarg, "no")) + jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_OFF; + else if (!strcmp(optarg, "yes")) + jl_options.task_metrics = JL_OPTIONS_TASK_METRICS_ON; + else + jl_errorf("julia: invalid argument to --task-metrics={yes|no} (%s)", optarg); + break; default: jl_errorf("julia: unhandled option -- %c\n" "This is a bug, please report it.", c); diff --git a/src/jloptions.h b/src/jloptions.h index b9910702f3f9b..211122242cbbd 100644 --- a/src/jloptions.h +++ b/src/jloptions.h @@ -65,6 +65,7 @@ typedef struct { uint64_t heap_size_hint; int8_t trace_compile_timing; int8_t trim; + int8_t task_metrics; } jl_options_t; #endif diff --git a/src/jltypes.c b/src/jltypes.c index 6c6325d84a5ff..abd1f62092035 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -3746,7 +3746,7 @@ void jl_init_types(void) JL_GC_DISABLED NULL, jl_any_type, jl_emptysvec, - jl_perm_symsvec(16, + jl_perm_symsvec(27, "next", "queue", "storage", @@ -3754,16 +3754,27 @@ void jl_init_types(void) JL_GC_DISABLED "result", "scope", "code", + "_state", + "sticky", + "priority", + "_isexception", + "pad00", + "pad01", + "pad02", "rngState0", "rngState1", "rngState2", "rngState3", "rngState4", - "_state", - "sticky", - "_isexception", - "priority"), - jl_svec(16, + "metrics_enabled", + "pad10", + "pad11", + "pad12", + "first_enqueued_at", + "last_started_running_at", + "running_time_ns", + "finished_at"), + jl_svec(27, jl_any_type, jl_any_type, jl_any_type, @@ -3771,21 +3782,36 @@ void jl_init_types(void) JL_GC_DISABLED jl_any_type, jl_any_type, jl_any_type, + jl_uint8_type, + jl_bool_type, + jl_uint16_type, + jl_bool_type, + jl_uint8_type, + jl_uint8_type, + jl_uint8_type, jl_uint64_type, jl_uint64_type, jl_uint64_type, jl_uint64_type, jl_uint64_type, - jl_uint8_type, jl_bool_type, - jl_bool_type, - jl_uint16_type), + jl_uint8_type, + jl_uint8_type, + jl_uint8_type, + jl_uint64_type, + jl_uint64_type, + jl_uint64_type, + jl_uint64_type), jl_emptysvec, 0, 1, 6); XX(task); jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type); jl_svecset(jl_task_type->types, 0, listt); - const static uint32_t task_atomicfields[1] = {0x00001000}; // Set fields 13 as atomic + // Set field 20 (metrics_enabled) as const + // Set fields 8 (_state) and 24-27 (metric counters) as atomic + const static uint32_t task_constfields[1] = { 0b00000000000010000000000000000000 }; + const static uint32_t task_atomicfields[1] = { 0b00000111100000000000000010000000 }; + jl_task_type->name->constfields = task_constfields; jl_task_type->name->atomicfields = task_atomicfields; tv = jl_svec2(tvar("A"), tvar("R")); diff --git a/src/julia.h b/src/julia.h index 6c0dd700f9472..a9864aad16ccc 100644 --- a/src/julia.h +++ b/src/julia.h @@ -2276,16 +2276,25 @@ typedef struct _jl_task_t { jl_value_t *result; jl_value_t *scope; jl_function_t *start; - // 4 byte padding on 32-bit systems - // uint32_t padding0; - uint64_t rngState[JL_RNG_SIZE]; _Atomic(uint8_t) _state; uint8_t sticky; // record whether this Task can be migrated to a new thread - _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with - // 1 byte padding - // uint8_t padding1; - // multiqueue priority uint16_t priority; + _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with + uint8_t pad0[3]; + // === 64 bytes (cache line) + uint64_t rngState[JL_RNG_SIZE]; + // flag indicating whether or not to record timing metrics for this task + uint8_t metrics_enabled; + uint8_t pad1[3]; + // timestamp this task first entered the run queue + _Atomic(uint64_t) first_enqueued_at; + // timestamp this task was most recently scheduled to run + _Atomic(uint64_t) last_started_running_at; + // time this task has spent running; updated when it yields or finishes. + _Atomic(uint64_t) running_time_ns; + // === 64 bytes (cache line) + // timestamp this task finished (i.e. entered state DONE or FAILED). + _Atomic(uint64_t) finished_at; // hidden state: // cached floating point environment @@ -2612,6 +2621,9 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT; #define JL_TRIM_UNSAFE 2 #define JL_TRIM_UNSAFE_WARN 3 +#define JL_OPTIONS_TASK_METRICS_OFF 0 +#define JL_OPTIONS_TASK_METRICS_ON 1 + // Version information #include // Generated file diff --git a/src/julia_internal.h b/src/julia_internal.h index 2178f603441e0..4741316093f95 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -316,6 +316,9 @@ extern JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled; extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time; extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time; +// Global *atomic* integer controlling *process-wide* task timing. +extern JL_DLLEXPORT _Atomic(uint8_t) jl_task_metrics_enabled; + #define jl_return_address() ((uintptr_t)__builtin_return_address(0)) STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a) diff --git a/src/task.c b/src/task.c index 5e1172a96a409..1a50d6fcbcf65 100644 --- a/src/task.c +++ b/src/task.c @@ -313,6 +313,13 @@ void JL_NORETURN jl_finish_task(jl_task_t *ct) { JL_PROBE_RT_FINISH_TASK(ct); JL_SIGATOMIC_BEGIN(); + if (ct->metrics_enabled) { + // [task] user_time -finished-> wait_time + assert(jl_atomic_load_relaxed(&ct->first_enqueued_at) != 0); + uint64_t now = jl_hrtime(); + jl_atomic_store_relaxed(&ct->finished_at, now); + jl_atomic_fetch_add_relaxed(&ct->running_time_ns, now - jl_atomic_load_relaxed(&ct->last_started_running_at)); + } if (jl_atomic_load_relaxed(&ct->_isexception)) jl_atomic_store_release(&ct->_state, JL_TASK_STATE_FAILED); else @@ -1146,6 +1153,11 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->ptls = NULL; t->world_age = ct->world_age; t->reentrant_timing = 0; + t->metrics_enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled) != 0; + jl_atomic_store_relaxed(&t->first_enqueued_at, 0); + jl_atomic_store_relaxed(&t->last_started_running_at, 0); + jl_atomic_store_relaxed(&t->running_time_ns, 0); + jl_atomic_store_relaxed(&t->finished_at, 0); jl_timing_task_init(t); if (t->ctx.copy_stack) @@ -1245,6 +1257,12 @@ CFI_NORETURN fesetenv(&ct->fenv); ct->ctx.started = 1; + if (ct->metrics_enabled) { + // [task] wait_time -started-> user_time + assert(jl_atomic_load_relaxed(&ct->first_enqueued_at) != 0); + assert(jl_atomic_load_relaxed(&ct->last_started_running_at) == 0); + jl_atomic_store_relaxed(&ct->last_started_running_at, jl_hrtime()); + } JL_PROBE_RT_START_TASK(ct); jl_timing_block_task_enter(ct, ptls, NULL); if (jl_atomic_load_relaxed(&ct->_isexception)) { @@ -1596,6 +1614,19 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) ct->ptls = ptls; ct->world_age = 1; // OK to run Julia code on this task ct->reentrant_timing = 0; + jl_atomic_store_relaxed(&ct->running_time_ns, 0); + jl_atomic_store_relaxed(&ct->finished_at, 0); + ct->metrics_enabled = jl_atomic_load_relaxed(&jl_task_metrics_enabled) != 0; + if (ct->metrics_enabled) { + // [task] created -started-> user_time + uint64_t now = jl_hrtime(); + jl_atomic_store_relaxed(&ct->first_enqueued_at, now); + jl_atomic_store_relaxed(&ct->last_started_running_at, now); + } + else { + jl_atomic_store_relaxed(&ct->first_enqueued_at, 0); + jl_atomic_store_relaxed(&ct->last_started_running_at, 0); + } ptls->root_task = ct; jl_atomic_store_relaxed(&ptls->current_task, ct); JL_GC_PROMISE_ROOTED(ct); diff --git a/src/threading.c b/src/threading.c index 8f0dfb3330885..ac9cc276d613a 100644 --- a/src/threading.c +++ b/src/threading.c @@ -49,6 +49,8 @@ JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0; JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0; JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time = 0; +JL_DLLEXPORT _Atomic(uint8_t) jl_task_metrics_enabled = 0; + JL_DLLEXPORT void *jl_get_ptls_states(void) { // mostly deprecated: use current_task instead diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index cc3f8950f0dc0..5df174694049d 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -783,6 +783,15 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` "Int(Base.JLOptions().fast_math)"`)) == JL_OPTIONS_FAST_MATH_DEFAULT end + let JL_OPTIONS_TASK_METRICS_OFF = 0, JL_OPTIONS_TASK_METRICS_ON = 1 + @test parse(Int,readchomp(`$exename -E + "Int(Base.JLOptions().task_metrics)"`)) == JL_OPTIONS_TASK_METRICS_OFF + @test parse(Int, readchomp(`$exename --task-metrics=yes -E + "Int(Base.JLOptions().task_metrics)"`)) == JL_OPTIONS_TASK_METRICS_ON + @test !parse(Bool, readchomp(`$exename -E "current_task().metrics_enabled"`)) + @test parse(Bool, readchomp(`$exename --task-metrics=yes -E "current_task().metrics_enabled"`)) + end + # --worker takes default / custom as argument (default/custom arguments # tested in test/parallel.jl) @test errors_not_signals(`$exename --worker=true`) diff --git a/test/core.jl b/test/core.jl index 63952e8728e1e..ba1803a137392 100644 --- a/test/core.jl +++ b/test/core.jl @@ -25,6 +25,7 @@ for (T, c) in ( (TypeVar, [:name, :ub, :lb]), (Core.Memory, [:length, :ptr]), (Core.GenericMemoryRef, [:mem, :ptr_or_offset]), + (Task, [:metrics_enabled]), ) @test Set((fieldname(T, i) for i in 1:fieldcount(T) if isconst(T, i))) == Set(c) end @@ -42,7 +43,7 @@ for (T, c) in ( (DataType, [:types, :layout]), (Core.Memory, []), (Core.GenericMemoryRef, []), - (Task, [:_state]) + (Task, [:_state, :running_time_ns, :finished_at, :first_enqueued_at, :last_started_running_at]), ) @test Set((fieldname(T, i) for i in 1:fieldcount(T) if Base.isfieldatomic(T, i))) == Set(c) end diff --git a/test/threads_exec.jl b/test/threads_exec.jl index ac54dd009390c..d77cf06905f44 100644 --- a/test/threads_exec.jl +++ b/test/threads_exec.jl @@ -3,6 +3,7 @@ using Test using Base.Threads using Base.Threads: SpinLock, threadpoolsize +using LinearAlgebra: peakflops # for cfunction_closure include("testenv.jl") @@ -1312,4 +1313,227 @@ end end end end + +@testset "Base.Experimental.task_metrics" begin + t = Task(() -> nothing) + @test_throws "const field" t.metrics_enabled = true + is_task_metrics_enabled() = fetch(Threads.@spawn current_task().metrics_enabled) + @test !is_task_metrics_enabled() + try + @testset "once" begin + Base.Experimental.task_metrics(true) + @test is_task_metrics_enabled() + Base.Experimental.task_metrics(false) + @test !is_task_metrics_enabled() + end + @testset "multiple" begin + Base.Experimental.task_metrics(true) # 1 + Base.Experimental.task_metrics(true) # 2 + Base.Experimental.task_metrics(true) # 3 + @test is_task_metrics_enabled() + Base.Experimental.task_metrics(false) # 2 + @test is_task_metrics_enabled() + Base.Experimental.task_metrics(false) # 1 + @test is_task_metrics_enabled() + @sync for i in 1:5 # 0 (not negative) + Threads.@spawn Base.Experimental.task_metrics(false) + end + @test !is_task_metrics_enabled() + Base.Experimental.task_metrics(true) # 1 + @test is_task_metrics_enabled() + end + finally + while is_task_metrics_enabled() + Base.Experimental.task_metrics(false) + end + end +end + +@testset "task time counters" begin + @testset "enabled" begin + try + Base.Experimental.task_metrics(true) + start_time = time_ns() + t = Threads.@spawn peakflops() + wait(t) + end_time = time_ns() + wall_time_delta = end_time - start_time + @test t.metrics_enabled + @test Base.Experimental.task_running_time_ns(t) > 0 + @test Base.Experimental.task_wall_time_ns(t) > 0 + @test Base.Experimental.task_wall_time_ns(t) >= Base.Experimental.task_running_time_ns(t) + @test wall_time_delta > Base.Experimental.task_wall_time_ns(t) + finally + Base.Experimental.task_metrics(false) + end + end + @testset "disabled" begin + t = Threads.@spawn peakflops() + wait(t) + @test !t.metrics_enabled + @test isnothing(Base.Experimental.task_running_time_ns(t)) + @test isnothing(Base.Experimental.task_wall_time_ns(t)) + end + @testset "task not run" begin + t1 = Task(() -> nothing) + @test !t1.metrics_enabled + @test isnothing(Base.Experimental.task_running_time_ns(t1)) + @test isnothing(Base.Experimental.task_wall_time_ns(t1)) + try + Base.Experimental.task_metrics(true) + t2 = Task(() -> nothing) + @test t2.metrics_enabled + @test Base.Experimental.task_running_time_ns(t2) == 0 + @test Base.Experimental.task_wall_time_ns(t2) == 0 + finally + Base.Experimental.task_metrics(false) + end + end + @testset "task failure" begin + try + Base.Experimental.task_metrics(true) + t = Threads.@spawn error("this task failed") + @test_throws "this task failed" wait(t) + @test Base.Experimental.task_running_time_ns(t) > 0 + @test Base.Experimental.task_wall_time_ns(t) > 0 + @test Base.Experimental.task_wall_time_ns(t) >= Base.Experimental.task_running_time_ns(t) + finally + Base.Experimental.task_metrics(false) + end + end + @testset "direct yield(t)" begin + try + Base.Experimental.task_metrics(true) + start = time_ns() + t_outer = Threads.@spawn begin + t_inner = Task(() -> peakflops()) + t_inner.sticky = false + # directly yield to `t_inner` rather calling `schedule(t_inner)` + yield(t_inner) + wait(t_inner) + @test Base.Experimental.task_running_time_ns(t_inner) > 0 + @test Base.Experimental.task_wall_time_ns(t_inner) > 0 + @test Base.Experimental.task_wall_time_ns(t_inner) >= Base.Experimental.task_running_time_ns(t_inner) + end + wait(t_outer) + delta = time_ns() - start + @test Base.Experimental.task_running_time_ns(t_outer) > 0 + @test Base.Experimental.task_wall_time_ns(t_outer) > 0 + @test Base.Experimental.task_wall_time_ns(t_outer) >= Base.Experimental.task_running_time_ns(t_outer) + @test Base.Experimental.task_wall_time_ns(t_outer) < delta + finally + Base.Experimental.task_metrics(false) + end + end + @testset "bad schedule" begin + try + Base.Experimental.task_metrics(true) + t1 = Task((x) -> 1) + schedule(t1) # MethodError + yield() + @assert istaskfailed(t1) + @test Base.Experimental.task_running_time_ns(t1) > 0 + @test Base.Experimental.task_wall_time_ns(t1) > 0 + foo(a, b) = a + b + t2 = Task(() -> (peakflops(); foo(wait()))) + schedule(t2) + yield() + @assert istaskstarted(t1) && !istaskdone(t2) + schedule(t2, 1) + yield() + @assert istaskfailed(t2) + @test Base.Experimental.task_running_time_ns(t2) > 0 + @test Base.Experimental.task_wall_time_ns(t2) > 0 + finally + Base.Experimental.task_metrics(false) + end + end + @testset "continuously update until task done" begin + try + Base.Experimental.task_metrics(true) + last_running_time = Ref(typemax(Int)) + last_wall_time = Ref(typemax(Int)) + t = Threads.@spawn begin + running_time = Base.Experimental.task_running_time_ns() + wall_time = Base.Experimental.task_wall_time_ns() + for _ in 1:5 + x = time_ns() + while time_ns() < x + 100 + end + new_running_time = Base.Experimental.task_running_time_ns() + new_wall_time = Base.Experimental.task_wall_time_ns() + @test new_running_time > running_time + @test new_wall_time > wall_time + running_time = new_running_time + wall_time = new_wall_time + end + last_running_time[] = running_time + last_wall_time[] = wall_time + end + wait(t) + final_running_time = Base.Experimental.task_running_time_ns(t) + final_wall_time = Base.Experimental.task_wall_time_ns(t) + @test last_running_time[] < final_running_time + @test last_wall_time[] < final_wall_time + # ensure many more tasks are run to make sure the counters are + # not being updated after a task is done e.g. only when a new task is found + @sync for _ in 1:Threads.nthreads() + Threads.@spawn rand() + end + @test final_running_time == Base.Experimental.task_running_time_ns(t) + @test final_wall_time == Base.Experimental.task_wall_time_ns(t) + finally + Base.Experimental.task_metrics(false) + end + end +end + +@testset "task time counters: lots of spawns" begin + using Dates + try + Base.Experimental.task_metrics(true) + # create more tasks than we have threads. + # - all tasks must have: cpu time <= wall time + # - some tasks must have: cpu time < wall time + # - summing across all tasks we must have: total cpu time <= available cpu time + n_tasks = 2 * Threads.nthreads(:default) + cpu_times = Vector{UInt64}(undef, n_tasks) + wall_times = Vector{UInt64}(undef, n_tasks) + start_time = time_ns() + @sync begin + for i in 1:n_tasks + start_time_i = time_ns() + task_i = Threads.@spawn peakflops() + Threads.@spawn begin + wait(task_i) + end_time_i = time_ns() + wall_time_delta_i = end_time_i - start_time_i + cpu_times[$i] = cpu_time_i = Base.Experimental.task_running_time_ns(task_i) + wall_times[$i] = wall_time_i = Base.Experimental.task_wall_time_ns(task_i) + # task should have recorded some cpu-time and some wall-time + @test cpu_time_i > 0 + @test wall_time_i > 0 + # task cpu-time cannot be greater than its wall-time + @test wall_time_i >= cpu_time_i + # task wall-time must be less than our manually measured wall-time + # between calling `@spawn` and returning from `wait`. + @test wall_time_delta_i > wall_time_i + end + end + end + end_time = time_ns() + wall_time_delta = (end_time - start_time) + available_cpu_time = wall_time_delta * Threads.nthreads(:default) + summed_cpu_time = sum(cpu_times) + # total CPU time from all tasks can't exceed what was actually available. + @test available_cpu_time > summed_cpu_time + # some tasks must have cpu-time less than their wall-time, because we had more tasks + # than threads. + summed_wall_time = sum(wall_times) + @test summed_wall_time > summed_cpu_time + finally + Base.Experimental.task_metrics(false) + end +end + end # main testset From 3a68b035ccbc4837448119099e059defaa6d5b98 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Fri, 6 Dec 2024 21:58:52 -0700 Subject: [PATCH 22/25] Fully outline all GlobalRefs (#56746) This is an alternative to #56714 that goes in the opposite direction - just outline all GlobalRefs during lowering. It is a lot simpler that #56714 at the cost of some size increase. Numbers: sys.so .ldata size: This PR: 159.8 MB Master: 158.9 MB I don't have numbers of #56714, because it's not fully complete. Additionally, it's possible that restricting GlobalRefs from arguments position would let us use a more efficient encoding in the future. --- Compiler/test/inference.jl | 9 +++--- doc/src/base/reflection.md | 10 ++++--- src/ast.c | 37 ----------------------- src/jlfrontend.scm | 1 - src/julia-syntax.scm | 1 - stdlib/Test/src/Test.jl | 5 ++-- test/core.jl | 61 ++++++++++++++++++++++++-------------- test/staged.jl | 12 ++++---- 8 files changed, 57 insertions(+), 79 deletions(-) diff --git a/Compiler/test/inference.jl b/Compiler/test/inference.jl index b3099897faf51..c8b599adb1323 100644 --- a/Compiler/test/inference.jl +++ b/Compiler/test/inference.jl @@ -3031,14 +3031,13 @@ end # issue #28279 # ensure that lowering doesn't move these into statement position, which would require renumbering -using Base: +, - -function f28279(b::Bool) +@eval function f28279(b::Bool) let i = 1 - while i > b - i -= 1 + while $(>)(i, b) + i = $(-)(i, 1) end if b end - return i + 1 + return $(+)(i, 1) end end code28279 = code_lowered(f28279, (Bool,))[1].code diff --git a/doc/src/base/reflection.md b/doc/src/base/reflection.md index 9228fb38322df..d88c3c8b0d0cf 100644 --- a/doc/src/base/reflection.md +++ b/doc/src/base/reflection.md @@ -100,10 +100,12 @@ as assignments, branches, and calls: ```jldoctest; setup = (using Base: +, sin) julia> Meta.lower(@__MODULE__, :( [1+2, sin(0.5)] )) :($(Expr(:thunk, CodeInfo( -1 ─ %1 = dynamic 1 + 2 -│ %2 = dynamic sin(0.5) -│ %3 = dynamic Base.vect(%1, %2) -└── return %3 +1 ─ %1 = :+ +│ %2 = dynamic (%1)(1, 2) +│ %3 = sin +│ %4 = dynamic (%3)(0.5) +│ %5 = dynamic Base.vect(%2, %4) +└── return %5 )))) ``` diff --git a/src/ast.c b/src/ast.c index 474c0661f5230..959bac26c2c15 100644 --- a/src/ast.c +++ b/src/ast.c @@ -181,42 +181,6 @@ static value_t fl_defined_julia_global(fl_context_t *fl_ctx, value_t *args, uint return (bpart != NULL && decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_GLOBAL) ? fl_ctx->T : fl_ctx->F; } -static value_t fl_nothrow_julia_global(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) -{ - // tells whether a var is defined, in the sense that accessing it is nothrow - // can take either a symbol or a module and a symbol - jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx); - jl_module_t *mod = ctx->module; - jl_sym_t *var = NULL; - if (nargs == 1) { - (void)tosymbol(fl_ctx, args[0], "nothrow-julia-global"); - var = scmsym_to_julia(fl_ctx, args[0]); - } - else { - argcount(fl_ctx, "nothrow-julia-global", nargs, 2); - value_t argmod = args[0]; - if (iscvalue(argmod) && cv_class((cvalue_t*)ptr(argmod)) == jl_ast_ctx(fl_ctx)->jvtype) { - mod = *(jl_module_t**)cv_data((cvalue_t*)ptr(argmod)); - JL_GC_PROMISE_ROOTED(mod); - } else { - if (!iscons(argmod) || !issymbol(car_(argmod)) || scmsym_to_julia(fl_ctx, car_(argmod)) != jl_thismodule_sym) { - lerrorf(fl_ctx, fl_ctx->ArgError, "nothrow-julia-global: Unknown globalref module kind"); - } - } - (void)tosymbol(fl_ctx, args[1], "nothrow-julia-global"); - var = scmsym_to_julia(fl_ctx, args[1]); - } - jl_binding_t *b = jl_get_module_binding(mod, var, 0); - jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age); - jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age); - if (!bpart) - return fl_ctx->F; - if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) - return fl_ctx->F; - return (jl_bkind_is_some_constant(decode_restriction_kind(pku)) ? - decode_restriction_value(pku) : jl_atomic_load_relaxed(&b->value)) != NULL ? fl_ctx->T : fl_ctx->F; -} - // Used to generate a unique suffix for a given symbol (e.g. variable or type name) // first argument contains a stack of method definitions seen so far by `closure-convert` in flisp. // if the top of the stack is non-NIL, we use it to augment the suffix so that it becomes @@ -288,7 +252,6 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m static const builtinspec_t julia_flisp_ast_ext[] = { { "defined-julia-global", fl_defined_julia_global }, // TODO: can we kill this safepoint - { "nothrow-julia-global", fl_nothrow_julia_global }, { "current-julia-module-counter", fl_module_unique_name }, { "julia-scalar?", fl_julia_scalar }, { NULL, NULL } diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm index 3d46940d6fcbb..9c69da199c0cd 100644 --- a/src/jlfrontend.scm +++ b/src/jlfrontend.scm @@ -31,7 +31,6 @@ ;; this is overwritten when we run in actual julia (define (defined-julia-global v) #f) -(define (nothrow-julia-global v) #f) ;; parser entry points diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index c7ca5d553bb31..c522a565f462a 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -4356,7 +4356,6 @@ f(x) = yt(x) (define (valid-ir-argument? e) (or (simple-atom? e) - (and (globalref? e) (nothrow-julia-global (cadr e) (caddr e))) (and (pair? e) (memq (car e) '(quote inert top core slot static_parameter))))) diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl index 7c985828d47f2..464cb2ac9de9c 100644 --- a/stdlib/Test/src/Test.jl +++ b/stdlib/Test/src/Test.jl @@ -1965,8 +1965,9 @@ Arguments #self#::Core.Const(f) a::Int64 Body::UNION{FLOAT64, INT64} -1 ─ %1 = (a > 1)::Bool -└── goto #3 if not %1 +1 ─ %1 = :>::Core.Const(>) +│ %2 = (%1)(a, 1)::Bool +└── goto #3 if not %2 2 ─ return 1 3 ─ return 1.0 diff --git a/test/core.jl b/test/core.jl index ba1803a137392..7e4f655222ea5 100644 --- a/test/core.jl +++ b/test/core.jl @@ -7435,6 +7435,7 @@ end @test isa(Core.eval(@__MODULE__, :(Bar31062(()))), Bar31062) @test precompile(identity, (Foo31062,)) +using Core: SSAValue ftype_eval = Ref(0) FieldTypeA = String FieldTypeE = UInt32 @@ -7458,27 +7459,41 @@ let fc = FieldConvert(1.0, [2.0], 0x3, 0x4, 0x5) end @test ftype_eval[] == 1 let code = code_lowered(FieldConvert)[1].code - local fc_global_ssa, sp1_ssa, apply_type_ssa, field_type_ssa, - field_type2_ssa, field_type4_ssa, field_type5_ssa, - slot_read_1, slot_read_2, slot_read_3, slot_read_4, - new_ssa - @test code[(fc_global_ssa = 1;)] == GlobalRef(@__MODULE__, :FieldConvert) - @test code[(sp1_ssa = 2;)] == Expr(:static_parameter, 1) - @test code[(apply_type_ssa = 3;)] == Expr(:call, GlobalRef(Core, :apply_type), Core.SSAValue(fc_global_ssa), GlobalRef(@__MODULE__, :FieldTypeA), Core.SSAValue(sp1_ssa)) - @test code[(field_type_ssa = 4;)] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(apply_type_ssa), 1) - @test code[10] == Expr(:(=), Core.SlotNumber(10), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(field_type_ssa), Core.SlotNumber(10))) - @test code[(slot_read_1 = 11;)] == Core.SlotNumber(10) - @test code[(field_type2_ssa = 12;)] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(apply_type_ssa), 2) - @test code[18] == Expr(:(=), Core.SlotNumber(9), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(field_type2_ssa), Core.SlotNumber(9))) - @test code[(slot_read_2 = 19;)] == Core.SlotNumber(9) - @test code[(field_type4_ssa = 20;)] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(apply_type_ssa), 4) - @test code[26] == Expr(:(=), Core.SlotNumber(8), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(field_type4_ssa), Core.SlotNumber(8))) - @test code[(slot_read_3 = 27;)] == Core.SlotNumber(8) - @test code[(field_type5_ssa = 28;)] == Expr(:call, GlobalRef(Core, :fieldtype), Core.SSAValue(apply_type_ssa), 5) - @test code[34] == Expr(:(=), Core.SlotNumber(7), Expr(:call, GlobalRef(Base, :convert), Core.SSAValue(field_type5_ssa), Core.SlotNumber(7))) - @test code[(slot_read_4 = 35;)] == Core.SlotNumber(7) - @test code[(new_ssa = 36;)] == Expr(:new, Core.SSAValue(apply_type_ssa), Core.SSAValue(slot_read_1), Core.SSAValue(slot_read_2), Core.SlotNumber(4), Core.SSAValue(slot_read_3), Core.SSAValue(slot_read_4)) - @test code[37] == Core.ReturnNode(Core.SSAValue(new_ssa)) + calls = Vector{Pair{SSAValue, Expr}}(undef, 0) + for i = 1:length(code) + expr = code[i] + if Meta.isexpr(expr, :call) || (Meta.isexpr(expr, :(=)) && Meta.isexpr(expr.args[2], :call)) + push!(calls, SSAValue(i)=>expr) + end + end + + function is_globalref(arg, gr) + while isa(arg, SSAValue) + arg = code[arg.id] + end + arg == gr + end + + # calls[1] + @test all(is_globalref.(calls[1][2].args[1:3], (GlobalRef(Core, :apply_type), GlobalRef(@__MODULE__, :FieldConvert), GlobalRef(@__MODULE__, :FieldTypeA)))) + + # calls[2] + @test all(is_globalref.(calls[2][2].args[1:1], (GlobalRef(Core, :fieldtype),))) + @test all(calls[2][2].args[2:3] .== (calls[1][1], 1)) + + # calls[3] - isa + + # calls[4] + let calle = calls[4][2] + @test Meta.isexpr(calle, :(=)) + call = calle.args[2] + @test is_globalref(call.args[1], GlobalRef(Base, :convert)) + @test call.args[2] == calls[2][1] + end + + # calls[5] + @test all(is_globalref.(calls[5][2].args[1:1], (GlobalRef(Core, :fieldtype),))) + @test all(calls[5][2].args[2:3] .== (calls[1][1], 2)) end # Issue #32820 @@ -8156,7 +8171,7 @@ end @test Core.Compiler.is_foldable(Base.infer_effects(length, (Core.SimpleVector,))) @test Core.Compiler.is_foldable(Base.infer_effects(getindex, (Core.SimpleVector,Int))) -# Test that a nothrow-globalref doesn't get outlined during lowering +# Test that a the lowering of nothrow globalref module WellKnownGlobal global well_known = 1 end @@ -8165,7 +8180,7 @@ macro insert_global() end check_globalref_lowering() = @insert_global let src = code_lowered(check_globalref_lowering)[1] - @test length(src.code) == 2 + @test length(src.code) == 4 end # Test correctness of widen_diagonal diff --git a/test/staged.jl b/test/staged.jl index 1b28144639f97..6cb99950a7bb2 100644 --- a/test/staged.jl +++ b/test/staged.jl @@ -270,12 +270,12 @@ end # PR #23168 -function f23168(a, x) +@eval function f23168(a, x) push!(a, 1) if @generated - :(y = x + x) + :(y = $(+)(x, x)) else - y = 2x + y = $(*)(2, x) end push!(a, y) if @generated @@ -290,9 +290,9 @@ end let a = Any[] @test f23168(a, 3) == (6, Int) @test a == [1, 6, 3] - @test occursin(" + ", string(code_lowered(f23168, (Vector{Any},Int)))) - @test occursin("2 * ", string(Base.uncompressed_ir(first(methods(f23168))))) - @test occursin("2 * ", string(code_lowered(f23168, (Vector{Any},Int), generated=false))) + @test occursin("(+)(", string(code_lowered(f23168, (Vector{Any},Int)))) + @test occursin("(*)(2", string(Base.uncompressed_ir(first(methods(f23168))))) + @test occursin("(*)(2", string(code_lowered(f23168, (Vector{Any},Int), generated=false))) @test occursin("Base.add_int", string(code_typed(f23168, (Vector{Any},Int)))) end From 6cb9f04c1ec562f800669ba13a9a096203aea7cf Mon Sep 17 00:00:00 2001 From: Julius Krumbiegel <22495855+jkrumbiegel@users.noreply.github.com> Date: Sat, 7 Dec 2024 19:56:09 +0100 Subject: [PATCH 23/25] Add #54800 to NEWS (#56774) Show glyphs for latex or emoji shortcodes being suggested in the REPL --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 641b84b480bb4..2ad92701dd638 100644 --- a/NEWS.md +++ b/NEWS.md @@ -171,6 +171,7 @@ Standard library changes in the REPL will now issue a warning the first time occurs. ([#54872]) - When an object is printed automatically (by being returned in the REPL), its display is now truncated after printing 20 KiB. This does not affect manual calls to `show`, `print`, and so forth. ([#53959]) +- Backslash completions now print the respective glyph or emoji next to each matching backslash shortcode. ([#54800]) #### SuiteSparse From c897a13c45c1222b4b16cf941348beef25f97ee0 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Sat, 7 Dec 2024 19:43:15 -0300 Subject: [PATCH 24/25] Add the actual datatype to the heapsnapshot. This groups objects of the same type together (#56596) --- src/gc-heap-snapshot.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 72eb17115f4c7..f3793939610b5 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -380,7 +380,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT ios_mem(&str_, 0); JL_STREAM* str = (JL_STREAM*)&str_; jl_static_show(str, (jl_value_t*)type); - + node_type = StringRef((const char*)str_.buf, str_.size); name = StringRef((const char*)str_.buf, str_.size); } From 54755adb1d96539fdd125ea9c442d0fcf206d5cc Mon Sep 17 00:00:00 2001 From: clonefetch <166312854+clonefetch@users.noreply.github.com> Date: Sun, 8 Dec 2024 21:21:58 +0800 Subject: [PATCH 25/25] Fix typos in docstring, comments, and news (#56778) --- NEWS.md | 2 +- base/deprecated.jl | 2 +- base/pointer.jl | 2 +- base/sysimg.jl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2ad92701dd638..78d1d9b4bc7b8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -36,7 +36,7 @@ Language changes may pave the way for inference to be able to intelligently re-use the old results, once the new method is deleted. ([#53415]) - - Macro expansion will no longer eagerly recurse into into `Expr(:toplevel)` + - Macro expansion will no longer eagerly recurse into `Expr(:toplevel)` expressions returned from macros. Instead, macro expansion of `:toplevel` expressions will be delayed until evaluation time. This allows a later expression within a given `:toplevel` expression to make use of macros diff --git a/base/deprecated.jl b/base/deprecated.jl index 84ef89e44b473..cffff05d954d1 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -4,7 +4,7 @@ # Instructions for Julia Core Developers: # 1. When making a breaking change that is known to be depnedet upon by an # important and closely coupled package, decide on a unique `change_name` -# for your PR and add it to the list below. In general, is is better to +# for your PR and add it to the list below. In general, it is better to # err on the side of caution and assign a `change_name` even if it is not # clear that it is required. `change_name`s may also be assigned after the # fact in a separate PR. (Note that this may cause packages to misbehave diff --git a/base/pointer.jl b/base/pointer.jl index b1580ef17d562..de2f413d8f881 100644 --- a/base/pointer.jl +++ b/base/pointer.jl @@ -169,7 +169,7 @@ The `unsafe` prefix on this function indicates that no validation is performed o pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring that referenced memory is not freed or garbage collected while invoking this function. Incorrect usage may segfault your program. Unlike C, storing memory region allocated as -different type may be valid provided that that the types are compatible. +different type may be valid provided that the types are compatible. !!! compat "Julia 1.10" The `order` argument is available as of Julia 1.10. diff --git a/base/sysimg.jl b/base/sysimg.jl index e57ec1c99bfe6..42f54a849f157 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -# Can be be loaded on top of either an existing system image built from +# Can be loaded on top of either an existing system image built from # `Base_compiler.jl` or standalone, in which case we will build it now. let had_compiler = isdefined(Main, :Base) if had_compiler; else