From eefaeb8cd850caf56d7435a188c052c4aa7b3e52 Mon Sep 17 00:00:00 2001 From: liach Date: Sun, 26 May 2024 20:53:08 -0500 Subject: [PATCH] Make a blog --- .gitignore | 4 + 404.html | 25 ++++++ Gemfile | 33 +++++++ Gemfile.lock | 95 ++++++++++++++++++++ _config.yml | 54 +++++++++++- _posts/2024-05-25-invoke-intro.md | 142 ++++++++++++++++++++++++++++++ about.md | 11 +++ index.md | 3 + index.mkd | 4 - 9 files changed, 366 insertions(+), 5 deletions(-) create mode 100644 404.html create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 _posts/2024-05-25-invoke-intro.md create mode 100644 about.md create mode 100644 index.md delete mode 100644 index.mkd diff --git a/.gitignore b/.gitignore index ca35be0..f40fbd8 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ _site +.sass-cache +.jekyll-cache +.jekyll-metadata +vendor diff --git a/404.html b/404.html new file mode 100644 index 0000000..086a5c9 --- /dev/null +++ b/404.html @@ -0,0 +1,25 @@ +--- +permalink: /404.html +layout: default +--- + + + +
+

404

+ +

Page not found :(

+

The requested page could not be found.

+
diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..08e9e0a --- /dev/null +++ b/Gemfile @@ -0,0 +1,33 @@ +source "https://rubygems.org" +# Hello! This is where you manage which Jekyll version is used to run. +# When you want to use a different version, change it below, save the +# file and run `bundle install`. Run Jekyll with `bundle exec`, like so: +# +# bundle exec jekyll serve +# +# This will help ensure the proper Jekyll version is running. +# Happy Jekylling! +gem "jekyll", "~> 4.3.3" +# This is the default theme for new Jekyll sites. You may change this to anything you like. +gem "minima", "~> 2.5" +# If you want to use GitHub Pages, remove the "gem "jekyll"" above and +# uncomment the line below. To upgrade, run `bundle update github-pages`. +# gem "github-pages", group: :jekyll_plugins +# If you have any plugins, put them here! +group :jekyll_plugins do + gem "jekyll-feed", "~> 0.12" +end + +# Windows and JRuby does not include zoneinfo files, so bundle the tzinfo-data gem +# and associated library. +platforms :mingw, :x64_mingw, :mswin, :jruby do + gem "tzinfo", ">= 1", "< 3" + gem "tzinfo-data" +end + +# Performance-booster for watching directories on Windows +gem "wdm", "~> 0.1.1", :platforms => [:mingw, :x64_mingw, :mswin] + +# Lock `http_parser.rb` gem to `v0.6.x` on JRuby builds since newer versions of the gem +# do not have a Java counterpart. +gem "http_parser.rb", "~> 0.6.0", :platforms => [:jruby] diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..a82753d --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,95 @@ +GEM + remote: https://rubygems.org/ + specs: + addressable (2.8.6) + public_suffix (>= 2.0.2, < 6.0) + bigdecimal (3.1.8) + colorator (1.1.0) + concurrent-ruby (1.2.3) + em-websocket (0.5.3) + eventmachine (>= 0.12.9) + http_parser.rb (~> 0) + eventmachine (1.2.7) + ffi (1.16.3) + forwardable-extended (2.6.0) + google-protobuf (4.27.0-x64-mingw-ucrt) + bigdecimal + rake (>= 13) + http_parser.rb (0.8.0) + i18n (1.14.5) + concurrent-ruby (~> 1.0) + jekyll (4.3.3) + addressable (~> 2.4) + colorator (~> 1.0) + em-websocket (~> 0.5) + i18n (~> 1.0) + jekyll-sass-converter (>= 2.0, < 4.0) + jekyll-watch (~> 2.0) + kramdown (~> 2.3, >= 2.3.1) + kramdown-parser-gfm (~> 1.0) + liquid (~> 4.0) + mercenary (>= 0.3.6, < 0.5) + pathutil (~> 0.9) + rouge (>= 3.0, < 5.0) + safe_yaml (~> 1.0) + terminal-table (>= 1.8, < 4.0) + webrick (~> 1.7) + jekyll-feed (0.17.0) + jekyll (>= 3.7, < 5.0) + jekyll-sass-converter (3.0.0) + sass-embedded (~> 1.54) + jekyll-seo-tag (2.8.0) + jekyll (>= 3.8, < 5.0) + jekyll-watch (2.2.1) + listen (~> 3.0) + kramdown (2.4.0) + rexml + kramdown-parser-gfm (1.1.0) + kramdown (~> 2.0) + liquid (4.0.4) + listen (3.9.0) + rb-fsevent (~> 0.10, >= 0.10.3) + rb-inotify (~> 0.9, >= 0.9.10) + mercenary (0.4.0) + minima (2.5.1) + jekyll (>= 3.5, < 5.0) + jekyll-feed (~> 0.9) + jekyll-seo-tag (~> 2.1) + pathutil (0.16.2) + forwardable-extended (~> 2.6) + public_suffix (5.0.5) + rake (13.2.1) + rb-fsevent (0.11.2) + rb-inotify (0.11.1) + ffi (~> 1.0) + rexml (3.2.8) + strscan (>= 3.0.9) + rouge (4.2.1) + safe_yaml (1.0.5) + sass-embedded (1.77.2-x64-mingw-ucrt) + google-protobuf (>= 3.25, < 5.0) + strscan (3.1.0) + terminal-table (3.0.2) + unicode-display_width (>= 1.1.1, < 3) + tzinfo (2.0.6) + concurrent-ruby (~> 1.0) + tzinfo-data (1.2024.1) + tzinfo (>= 1.0.0) + unicode-display_width (2.5.0) + wdm (0.1.1) + webrick (1.8.1) + +PLATFORMS + x64-mingw-ucrt + +DEPENDENCIES + http_parser.rb (~> 0.6.0) + jekyll (~> 4.3.3) + jekyll-feed (~> 0.12) + minima (~> 2.5) + tzinfo (>= 1, < 3) + tzinfo-data + wdm (~> 0.1.1) + +BUNDLED WITH + 2.5.10 diff --git a/_config.yml b/_config.yml index b5ea2f7..e5feedf 100644 --- a/_config.yml +++ b/_config.yml @@ -1 +1,53 @@ -auto: true +# Welcome to Jekyll! +# +# This config file is meant for settings that affect your whole blog, values +# which you are expected to set up once and rarely edit after that. If you find +# yourself editing this file very often, consider using Jekyll's data files +# feature for the data you need to update frequently. +# +# For technical reasons, this file is *NOT* reloaded automatically when you use +# 'bundle exec jekyll serve'. If you change this file, please restart the server process. +# +# If you need help with YAML syntax, here are some quick references for you: +# https://learn-the-web.algonquindesign.ca/topics/markdown-yaml-cheat-sheet/#yaml +# https://learnxinyminutes.com/docs/yaml/ +# +# Site settings +# These are used to personalize your new site. If you look in the HTML files, +# you will see them accessed via {{ site.title }}, {{ site.email }}, and so on. +# You can create any custom variable you would like, and they will be accessible +# in the templates via {{ site.myvariable }}. + +title: Blog +email: "" +description: >- # this means to ignore newlines until "baseurl:" + A blog +baseurl: "" # the subpath of your site, e.g. /blog +url: "https://liachmodded.github.io" # the base hostname & protocol for your site, e.g. http://example.com +twitter_username: "" +github_username: liachmodded + +# Build settings +theme: minima +plugins: + - jekyll-feed + +# Exclude from processing. +# The following items will not be processed, by default. +# Any item listed under the `exclude:` key here will be automatically added to +# the internal "default list". +# +# Excluded items can be processed by explicitly listing the directories or +# their entries' file path in the `include:` list. +# +# exclude: +# - .sass-cache/ +# - .jekyll-cache/ +# - gemfiles/ +# - Gemfile +# - Gemfile.lock +# - node_modules/ +# - vendor/bundle/ +# - vendor/cache/ +# - vendor/gems/ +# - vendor/ruby/ diff --git a/_posts/2024-05-25-invoke-intro.md b/_posts/2024-05-25-invoke-intro.md new file mode 100644 index 0000000..5f6e64c --- /dev/null +++ b/_posts/2024-05-25-invoke-intro.md @@ -0,0 +1,142 @@ +--- +layout: post +title: "A brief overview of java.lang.invoke" +categories: java +tags: invoke +--- + +--- + +Note: This page is still under construction + +--- + +`java.lang.invoke`, also known as JSR 292, is known for MethodHandles and invokedynamic. It is known for the support of dynamic programming languages, yet it is crucial to Java itself as time goes on. Let's take a look at its history and its implications. + +## Before `java.lang.invoke` + +We all know that the most usual way to get a `MethodHandle` is through `MethodHandles.lookup()`, which can find field accessors and methods. But didn't reflection exist before that? Why couldn't reflection be used? + +### Reflection and Unsafe + +Before the appearance of invoke, reflection did exist, and this is how they were implemented: + - Method accessors used ad-hoc bytecode generation that was only removed in favor of MethodHandle in JEP 416; as of JDK 23, the infrastructure still exists to support old serialization constructor generation. + - Field accessors used Unsafe, which soon becomes notorious as a major blocker for upgrades past Java 9. [Back then](https://github.com/openjdk/jdk/blob/7a94d5e47faaf4c99a6c02279dbce4099a2f2a79/jdk/src/share/classes/sun/misc/Unsafe.java), it was much simpler, with only field access methods using a long offset. + +So what does MethodHandle do in comparison? Each MethodHandle has a fixed MethodType; a MethodType can speed up calls significantly compared to argument conversions performed by reflection. And indeed, each invokedynamic instruction has a fixed MethodType passed to the bootstrap method. + +## Reading the `java.lang.invoke` code + +### Entrypoints from the VM + +Since invocation happens from the VM, it would be helpful to find where the call sequences start. The entrypoints to the whole invoke system are these 3 methods in `MethodHandleNatives`: + - `linkCallSite`: Links a CallSite, i.e. an invokedynamic instruction + - `linkMethod`: Links a signature-polymorphic method in `MethodHandle` (`invokeExact` or `invoke`) or `VarHandle` (access methods) + - `linkDynamicConstant`: Resolves a CONSTANT_Dynamic to a constant value + +`linkCallSite` and `linkMethod` return `MemberName` which points to infrastructure static methods, mostly in dynamically-generated `LambdaForm`s (see `InvokerBytecodeGenerator` and `Invokers` too). They can also point to pregenerated bytecode, such as to `VarHandleGuards` methods for `VarHandle`, or to `Invokers$Holder` from pregeneration (via CDS or jlink) + +### Back into the VM + +The execution of course comes back into JVM. The hooks are all in `MethodHandle`: + - `invokeBasic`: Used by `LambdaForm` code generation to easily invoke nested `MethodHandle`s, such as ones with bound arguments (`BoundMethodHandle`); essentially same as `invokeExact` or `invoke` but without type conversions, as all types are "basic types" (loadable types) + - `linkToVirtual`, `linkToStatic`, `linkToSpecial`, `linkToInterface`: The most basic calls used by `java.lang.invoke`. Used by `DirectMethodHandle.preparedLambdaForm` to simulate invokevirtual, invokestatic, invokespecial, invokeinterface calls. However, they are more powerful, as they can link to [hidden classes](#hidden-classes) with the trailing `MemberName` argument while Java bytecode cannot. + - In addition, `linkToStatic` is explicitly used in `VarHandleGuards` to invoke static methods when there are many `MemberName` possibilities. + - `linkToNative` works much like the other link methods, except it takes a trailing `NativeEntryPoint`. Used by `NativeMethodHandle.preparedLambdaForm`. + +### `LambdaForm` + +Being thousands of lines long, `LambdaForm` is daunting to dig through. However, if you are a bytecode guru, you can check out `InvokerBytecodeGenerator` which converts `LambdaForm` to hidden classes. Also check out `preparedLambdaForm` in a few `MethodHandle` implementations. Luckily, `LambdaForm` is a well encapsulated class, so understanding its upstream and downstream can give you a good grasp of what it does before you dive in. + +## `MethodType` + +`MethodType` seems simple on the surface: just a return type plus an array of parameters. What good does it do so we need it? + +Turns out `MethodType` encapsulates some complex logic too: one is its `invokers`, which dictates how polymorphic methods with its type should be invoked; in addition, it is interned, just like the String for method and class names in reflection. It also has some logic for erasure to "basic types" (similar to the loadable types in bytecode) to reduce LambdaForms and code generation. + +## Best practices + +### `MethodHandle` and `VarHandle` +When using `MethodHandle` and `VarHandle`, prefer to keep them as constants (another good topic to dive into later), such as in `static final` fields. + +Always prefer calling `invokeExact`; this methods is the fastest. A call to `invoke`, in contrast, may call `asType` every time when the handle's invoked, and even if the `asTypeCache` doesn't miss, since it's a soft reference instead of a constant, it cannot be inlined. + +Similarly, when declaring a `VarHandle`, finish the declaration with a `withInvokeExactBehavior`. Otherwise, the `VarHandle` will suffer from similar performance penalties if called with a suboptimal type ([JDK-8160821](https://bugs.openjdk.org/browse/JDK-8160821)). + +### Dynamic constants + +Compared to invokedynamic bootstrap methods scattered across many classes (`LambdaMetafactory`, `StringConcatFactory`), the `ConstantBootstraps` method provide a lot of bootstrap methods for general-purpose dynamic constants otherwise not representable in the constant pool, such as `nullConstant`, `primitiveClass`, for use in bootstrap method arguments. There are two useful ones, `getStaticFinal` and `invoke`, which can translate otherwise eagerly initialized static final fields in a class to a lazy constant to reduce class initialization cost. + +## Hidden classes + +Hidden classes began with `Unsafe.defineAnonymousClass`, which defined "VM anonymous classes"; they indeed began with invoke, as they were first used for LambdaForm implementations. Now, they have been promoted to a standalone Hidden Classes feature usable by all Java programs. + +### NestMates + +From [JEP 181](https://openjdk.org/jeps/181): + +> The notion of a common access control context arises in other places as well, such as the host class mechanism in `Unsafe.defineAnonymousClass()`, where a dynamically loaded class can use the access control context of a host. A formal notion of nest membership would put this mechanism on firmer ground (but actually providing a supported replacement for `defineAnonymousClass()` would be a separate effort.) + +How unexpected! Nestmates come from VM anonymous classes. Indeed, in current invoke, the generated `LambdaForm$` hidden classes still have `LambdaForm` as their host class, though they are not nestmates. + +An anecdote about nest is that they were created to enable generic specialization by subclassing in project Valhalla. (Treat this message with doubt, since I forgot about the source) + +The nests also greatly simplified some Java design patterns. For example, before nests: +```java +private static class Holder { + static final Object instance; +} +``` +The field declaration avoided `private` because java compiler has to generate accessor to access the instance; it had always generated bridge methdos to access private members in enclosing and inner classes, because these concepts don't exist in the JVM (only packages exist). + +Another anecdote is that a `MethodHandle` can be created for a nested enum constructor and can be called without any problem, while doing so is prohibited by reflection. + +### ClassData + +Class data is any object passed to `MethodHandles$Lookup.defineHiddenClassWithClassData()`. Compared to passing the data elsewhere such as via `ThreadLocal`, using class data is more thread safe and less costly. + +Since there are hidden classes, class data becomes necessary, as not all MethodHandle instances are representable by bytecode instructions. `LambdaForm` classes use class data to represent other hidden classes and `MemberName` for hidden class members. + +Class data is usually accessed in generated code with `MethodHandles.classData`. It's intentionally compatible as a bootstrap method to facilitate usage as a dynamic constant and using that constant as opposed to calling this method on each site. (Note that `InvokerBytecodeGenerator` does not use condy, as `LambdaForm` has to be ready before condy is available for use, so it stores the values in static final fields instead) + +There's an additional `MethodHandles.classDataAt`, but calling `List.get(int)Object` is preferable in actual bytecode to prevent spamming up the constant pool; `classDataAt` is mostly for supplying bootstrap method arguments. + +### Other attributes + +Other important attributes of hidden classes include: + - Omission in stack traces by default + - Not modifiable by instrumentation + - Final fields are automatically "trusted" (part of constants) + - Class no longer discoverable by `Class.forName` + +These pose risks for migration of regular generated classes to hidden classes. + +## Impact of java.lang.invoke + +### invokedynamic + +Initially created to allow dynamic programming languages to better resolve calls (like Gradle's closures), indy is also noted for its ability to provide distinct implementations on different VMs; just like library methods that evolve over time, older code using indy will use the modern code shape provided by indy, enjoying improved performance. + +For example, `LambdaMetafactory` can try using shared-class approach (storing MemberName or MethodHandle in final fields and create a class only if the interface differs) to reduce class loading pressure when a few interfaces have a lot of implementations. Already in action is `ObjectMethods` where record's object methods are being improved over time, and `StringConcatFactory` that relays back to `StringBuilder` if the concatenation is too complex. + +### Reflection + +We have discussed how reflection was before invoke - ad-hoc classes generated for each different method. This creates a lot of classes. In comparison, [JEP 416](https://openjdk.org/jeps/416) creates a `MethodHandle` that may use shared `LambdaForm` if possible; this change might explain the slowdown observed with reflection for non-constant field/method objects. Yet it's a good tradeoff, as it significantly reduces classloading pressure. + + + \ No newline at end of file diff --git a/about.md b/about.md new file mode 100644 index 0000000..ec7a5ac --- /dev/null +++ b/about.md @@ -0,0 +1,11 @@ +--- +layout: page +title: About +--- + +Please contact me [here](https://github.com/liachmodded/talk/issues/new?template=talking.md), either issues or discussions are fine. Remember to leave me a ping so I actually get a notification! + +Recommended reading: + - [https://cr.openjdk.org/~jrose](https://cr.openjdk.org/~jrose) - John Rose's blog, insights on latest Java designs + - [https://inside.java](https://inside.java) - News about Java development + diff --git a/index.md b/index.md new file mode 100644 index 0000000..e4d427d --- /dev/null +++ b/index.md @@ -0,0 +1,3 @@ +--- +layout: home +--- diff --git a/index.mkd b/index.mkd deleted file mode 100644 index b249ef6..0000000 --- a/index.mkd +++ /dev/null @@ -1,4 +0,0 @@ ---- -layout: redirect -redirect_url: https://github.com/liachmodded/talk ----