diff --git a/go.mod b/go.mod index a7c6ac62f..d119a5afe 100644 --- a/go.mod +++ b/go.mod @@ -9,8 +9,8 @@ require ( github.com/coreos/go-semver v0.3.1 github.com/go-logr/logr v1.4.2 github.com/google/go-cmp v0.6.0 - github.com/netobserv/flowlogs-pipeline v1.7.0-community.0.20241126155124-18e017f67001 - github.com/netobserv/netobserv-ebpf-agent v1.6.1-crc2.0.20241008130234-a20397fb8f88 + github.com/netobserv/flowlogs-pipeline v1.7.0-community.0.20241217113023-fa0540a1658e + github.com/netobserv/netobserv-ebpf-agent v1.7.0-community.0.20241213165959-7e7f8c42a3f6 github.com/onsi/ginkgo/v2 v2.22.2 github.com/onsi/gomega v1.36.2 github.com/openshift/api v0.0.0-20240722135205-ae4f370f361f @@ -72,19 +72,19 @@ require ( github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect github.com/google/uuid v1.6.0 // indirect github.com/gopacket/gopacket v1.2.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect github.com/heptiolabs/healthcheck v0.0.0-20211123025425-613501dd5deb // indirect github.com/ip2location/ip2location-go/v9 v9.7.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.17.9 // indirect + github.com/klauspost/compress v1.17.11 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/libp2p/go-reuseport v0.3.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mdlayher/ethernet v0.0.0-20220221185849-529eae5b6118 // indirect github.com/minio/md5-simd v1.1.2 // indirect - github.com/minio/minio-go/v7 v7.0.77 // indirect + github.com/minio/minio-go/v7 v7.0.82 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect @@ -121,16 +121,16 @@ require ( github.com/xdg-go/scram v1.1.2 // indirect github.com/xdg-go/stringprep v1.0.4 // indirect github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect - go.opentelemetry.io/otel v1.31.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.29.0 // indirect + go.opentelemetry.io/otel v1.32.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.29.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.26.0 // indirect - go.opentelemetry.io/otel/metric v1.31.0 // indirect - go.opentelemetry.io/otel/sdk v1.29.0 // indirect - go.opentelemetry.io/otel/sdk/metric v1.29.0 // indirect - go.opentelemetry.io/otel/trace v1.31.0 // indirect + go.opentelemetry.io/otel/metric v1.32.0 // indirect + go.opentelemetry.io/otel/sdk v1.32.0 // indirect + go.opentelemetry.io/otel/sdk/metric v1.32.0 // indirect + go.opentelemetry.io/otel/trace v1.32.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.11.0 // indirect @@ -144,8 +144,8 @@ require ( golang.org/x/time v0.7.0 // indirect golang.org/x/tools v0.28.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect google.golang.org/grpc v1.67.1 // indirect google.golang.org/protobuf v1.36.1 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect @@ -163,4 +163,4 @@ require ( replace github.com/prometheus/common v0.55.0 => github.com/netobserv/prometheus-common v0.55.0-netobserv -replace github.com/netobserv/netobserv-ebpf-agent => github.com/msherif1234/netobserv-ebpf-agent v0.0.0-20241213142900-5c07db3a6c0b +replace github.com/netobserv/netobserv-ebpf-agent => github.com/msherif1234/netobserv-ebpf-agent v0.0.0-20250107120019-7532f1cb527c diff --git a/go.sum b/go.sum index 3522e5c3b..52b4216c8 100644 --- a/go.sum +++ b/go.sum @@ -452,8 +452,8 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.15.0/go.mod h1:vO11I9oWA+KsxmfFQPhLnnIb1VDE24M+pdxZFiuZcA8= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 h1:ad0vkEBuk23VJzZR9nkLVG0YAoN9coASF1GusYX6AlU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0/go.mod h1:igFoXX2ELCW06bol23DWPB5BEWfZISOzSP5K2sbLea0= github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= github.com/hashicorp/consul/api v1.7.0/go.mod h1:1NSuaUUkFaJzMasbfq/11wKYWSR67Xn6r2DXKhuDNFg= github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= @@ -560,8 +560,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.9.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= @@ -639,8 +639,8 @@ github.com/miekg/dns v1.1.43 h1:JKfpVSCB84vrAmHzyrsxB5NAr5kLoMXZArPSw7Qlgyg= github.com/miekg/dns v1.1.43/go.mod h1:+evo5L0630/F6ca/Z9+GAqzhjGyn8/c+TBaOyfEl0V4= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= -github.com/minio/minio-go/v7 v7.0.77 h1:GaGghJRg9nwDVlNbwYjSDJT1rqltQkBFDsypWX1v3Bw= -github.com/minio/minio-go/v7 v7.0.77/go.mod h1:AVM3IUN6WwKzmwBxVdjzhH8xq+f57JSbbvzqvUzR6eg= +github.com/minio/minio-go/v7 v7.0.82 h1:tWfICLhmp2aFPXL8Tli0XDTHj2VB/fNf0PC1f/i1gRo= +github.com/minio/minio-go/v7 v7.0.82/go.mod h1:84gmIilaX4zcvAWWzJ5Z1WI5axN+hAbM5w25xf8xvC0= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= @@ -668,8 +668,8 @@ github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjY github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg= -github.com/msherif1234/netobserv-ebpf-agent v0.0.0-20241213142900-5c07db3a6c0b h1:E8vhCqGnKNO++9HFup4rXqpl9I1uyTJYpaKetV1elAA= -github.com/msherif1234/netobserv-ebpf-agent v0.0.0-20241213142900-5c07db3a6c0b/go.mod h1:20e1OPAs7h3k9PvNZWS9D6BnXEtkTk2LlfzD66uhvxY= +github.com/msherif1234/netobserv-ebpf-agent v0.0.0-20250107120019-7532f1cb527c h1:I2N7M0pkobJM2Bmo1iixae2ZI6cHhYCIRIvQlpi0pvc= +github.com/msherif1234/netobserv-ebpf-agent v0.0.0-20250107120019-7532f1cb527c/go.mod h1:2UT/wN+zmU+pCajg0crx8+0KOmzS8pBNql5VqXAGY5I= github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= @@ -684,8 +684,8 @@ github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzE github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= -github.com/netobserv/flowlogs-pipeline v1.7.0-community.0.20241126155124-18e017f67001 h1:Hb5RxMfFafng2+QbnkiEWTS4TUUz7zcpUS8RiauzWXw= -github.com/netobserv/flowlogs-pipeline v1.7.0-community.0.20241126155124-18e017f67001/go.mod h1:wnCpWttAFkLSSxOcfCkd9zA5pwV/1OcxS5tAfAxNWEc= +github.com/netobserv/flowlogs-pipeline v1.7.0-community.0.20241217113023-fa0540a1658e h1:MdCBEv8sbkhWwoKmp99oeg7eS0tRlD1FTZKRg7r5xWA= +github.com/netobserv/flowlogs-pipeline v1.7.0-community.0.20241217113023-fa0540a1658e/go.mod h1:P8Gf2JTrvyHpTVZs/N2M1M9nMk1Uu/uvwhVe11c3b9Y= github.com/netobserv/gopipes v0.3.0 h1:IYmPnnAVCdSK7VmHmpFhrVBOEm45qpgbZmJz1sSW+60= github.com/netobserv/gopipes v0.3.0/go.mod h1:N7/Gz05EOF0CQQSKWsv3eof22Cj2PB08Pbttw98YFYU= github.com/netobserv/loki-client-go v0.0.0-20220927092034-f37122a54500 h1:RmnoJe/ci5q+QdM7upFdxiU+D8F3L3qTd5wXCwwHefw= @@ -830,8 +830,8 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.2.2/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= @@ -963,10 +963,10 @@ go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= -go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= -go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.29.0 h1:k6fQVDQexDE+3jG2SfCQjnHS7OamcP73YMoxEVq5B6k= -go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.29.0/go.mod h1:t4BrYLHU450Zo9fnydWlIuswB1bm7rM8havDpWOJeDo= +go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U= +go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0 h1:j7ZSD+5yn+lo3sGV69nW04rRR0jhYnBwjuX3r0HvnK0= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0/go.mod h1:WXbYJTUaZXAbYd8lbgGuvih0yuCfOFC5RJoYnoLcGz8= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.29.0 h1:xvhQxJ/C9+RTnAj5DpTg7LSM1vbbMTiXt7e9hsfqHNw= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.29.0/go.mod h1:Fcvs2Bz1jkDM+Wf5/ozBGmi3tQ/c9zPKLnsipnfhGAo= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 h1:3Q/xZUyC1BBkualc9ROb4G8qkH90LXEIICcs5zv1OYY= @@ -975,14 +975,14 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 h1:qFffA go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0/go.mod h1:MOiCmryaYtc+V0Ei+Tx9o5S1ZjA7kzLucuVuyzBZloQ= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.26.0 h1:1wp/gyxsuYtuE/JFxsQRtcCDtMrO2qMvlfXALU5wkzI= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.26.0/go.mod h1:gbTHmghkGgqxMomVQQMur1Nba4M0MQ8AYThXDUjsJ38= -go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= -go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= -go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo= -go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok= -go.opentelemetry.io/otel/sdk/metric v1.29.0 h1:K2CfmJohnRgvZ9UAj2/FhIf/okdWcNdBwe1m8xFXiSY= -go.opentelemetry.io/otel/sdk/metric v1.29.0/go.mod h1:6zZLdCl2fkauYoZIOn/soQIDSWFmNSRcICarHfuhNJQ= -go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= -go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= +go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M= +go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8= +go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4= +go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU= +go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU= +go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ= +go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM= +go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= @@ -1395,10 +1395,10 @@ google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20200904004341-0bd0a958aa1d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 h1:YcyjlL1PRr2Q17/I0dPk2JmYS5CDXfcdb2Z3YRioEbw= -google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:OCdP9MfskevB/rbYvHTsXTtKC+3bHWajPdoKgjcYkfo= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 h1:2035KHhUv+EpyB+hWgJnaWKJOdX1E95w2S8Rr4uWKTs= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 h1:M0KvPgPmDZHPlbRbaNU1APr28TvwvvdUPlSv7PUvy8g= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:dguCy7UOdZhTvLzDyt15+rOrawrpM4q7DD9dQ1P11P4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/convert.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/convert.go index d7b15fcfb..2e50082ad 100644 --- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/convert.go +++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/convert.go @@ -94,7 +94,7 @@ func Int64(val string) (int64, error) { } // Int64Slice converts 'val' where individual integers are separated by -// 'sep' into a int64 slice. +// 'sep' into an int64 slice. func Int64Slice(val, sep string) ([]int64, error) { s := strings.Split(val, sep) values := make([]int64, len(s)) @@ -118,7 +118,7 @@ func Int32(val string) (int32, error) { } // Int32Slice converts 'val' where individual integers are separated by -// 'sep' into a int32 slice. +// 'sep' into an int32 slice. func Int32Slice(val, sep string) ([]int32, error) { s := strings.Split(val, sep) values := make([]int32, len(s)) @@ -190,7 +190,7 @@ func Bytes(val string) ([]byte, error) { } // BytesSlice converts 'val' where individual bytes sequences, encoded in URL-safe -// base64 without padding, are separated by 'sep' into a slice of bytes slices slice. +// base64 without padding, are separated by 'sep' into a slice of byte slices. func BytesSlice(val, sep string) ([][]byte, error) { s := strings.Split(val, sep) values := make([][]byte, len(s)) diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/errors.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/errors.go index 01f573419..41cd4f503 100644 --- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/errors.go +++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/errors.go @@ -81,6 +81,21 @@ func HTTPError(ctx context.Context, mux *ServeMux, marshaler Marshaler, w http.R mux.errorHandler(ctx, mux, marshaler, w, r, err) } +// HTTPStreamError uses the mux-configured stream error handler to notify error to the client without closing the connection. +func HTTPStreamError(ctx context.Context, mux *ServeMux, marshaler Marshaler, w http.ResponseWriter, r *http.Request, err error) { + st := mux.streamErrorHandler(ctx, err) + msg := errorChunk(st) + buf, err := marshaler.Marshal(msg) + if err != nil { + grpclog.Errorf("Failed to marshal an error: %v", err) + return + } + if _, err := w.Write(buf); err != nil { + grpclog.Errorf("Failed to notify error to client: %v", err) + return + } +} + // DefaultHTTPErrorHandler is the default error handler. // If "err" is a gRPC Status, the function replies with the status code mapped by HTTPStatusFromCode. // If "err" is a HTTPStatusError, the function replies with the status code provide by that struct. This is diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/fieldmask.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/fieldmask.go index 9005d6a0b..2fcd7af3c 100644 --- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/fieldmask.go +++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/fieldmask.go @@ -155,7 +155,7 @@ func buildPathsBlindly(name string, in interface{}) []string { return paths } -// fieldMaskPathItem stores a in-progress deconstruction of a path for a fieldmask +// fieldMaskPathItem stores an in-progress deconstruction of a path for a fieldmask type fieldMaskPathItem struct { // the list of prior fields leading up to node connected by dots path string diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/marshaler_registry.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/marshaler_registry.go index 0b051e6e8..07c28112c 100644 --- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/marshaler_registry.go +++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/marshaler_registry.go @@ -86,8 +86,8 @@ func (m marshalerRegistry) add(mime string, marshaler Marshaler) error { // It allows for a mapping of case-sensitive Content-Type MIME type string to runtime.Marshaler interfaces. // // For example, you could allow the client to specify the use of the runtime.JSONPb marshaler -// with a "application/jsonpb" Content-Type and the use of the runtime.JSONBuiltin marshaler -// with a "application/json" Content-Type. +// with an "application/jsonpb" Content-Type and the use of the runtime.JSONBuiltin marshaler +// with an "application/json" Content-Type. // "*" can be used to match any Content-Type. // This can be attached to a ServerMux with the marshaler option. func makeMarshalerMIMERegistry() marshalerRegistry { diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/proto2_convert.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/proto2_convert.go index d549407f2..f710036b3 100644 --- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/proto2_convert.go +++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/proto2_convert.go @@ -40,7 +40,7 @@ func Float32P(val string) (*float32, error) { } // Int64P parses the given string representation of an integer -// and returns a pointer to a int64 whose value is same as the parsed integer. +// and returns a pointer to an int64 whose value is same as the parsed integer. func Int64P(val string) (*int64, error) { i, err := Int64(val) if err != nil { @@ -50,7 +50,7 @@ func Int64P(val string) (*int64, error) { } // Int32P parses the given string representation of an integer -// and returns a pointer to a int32 whose value is same as the parsed integer. +// and returns a pointer to an int32 whose value is same as the parsed integer. func Int32P(val string) (*int32, error) { i, err := Int32(val) if err != nil { diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/pattern.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/pattern.go index dfe7de486..38ca39cc5 100644 --- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/pattern.go +++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/pattern.go @@ -1,6 +1,6 @@ package utilities -// An OpCode is a opcode of compiled path patterns. +// OpCode is an opcode of compiled path patterns. type OpCode int // These constants are the valid values of OpCode. diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/string_array_flag.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/string_array_flag.go index d224ab776..66aa5f2dc 100644 --- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/string_array_flag.go +++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/string_array_flag.go @@ -5,7 +5,7 @@ import ( "strings" ) -// flagInterface is an cut down interface to `flag` +// flagInterface is a cut down interface to `flag` type flagInterface interface { Var(value flag.Value, name string, usage string) } diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml index a22953805..4528059ca 100644 --- a/vendor/github.com/klauspost/compress/.goreleaser.yml +++ b/vendor/github.com/klauspost/compress/.goreleaser.yml @@ -1,5 +1,5 @@ -# This is an example goreleaser.yaml file with some sane defaults. -# Make sure to check the documentation at http://goreleaser.com +version: 2 + before: hooks: - ./gen.sh @@ -99,7 +99,7 @@ archives: checksum: name_template: 'checksums.txt' snapshot: - name_template: "{{ .Tag }}-next" + version_template: "{{ .Tag }}-next" changelog: sort: asc filters: diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 05c7359e4..de264c85a 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -16,6 +16,27 @@ This package provides various compression algorithms. # changelog +* Sep 23rd, 2024 - [1.17.10](https://github.com/klauspost/compress/releases/tag/v1.17.10) + * gzhttp: Add TransportAlwaysDecompress option. https://github.com/klauspost/compress/pull/978 + * gzhttp: Add supported decompress request body by @mirecl in https://github.com/klauspost/compress/pull/1002 + * s2: Add EncodeBuffer buffer recycling callback https://github.com/klauspost/compress/pull/982 + * zstd: Improve memory usage on small streaming encodes https://github.com/klauspost/compress/pull/1007 + * flate: read data written with partial flush by @vajexal in https://github.com/klauspost/compress/pull/996 + +* Jun 12th, 2024 - [1.17.9](https://github.com/klauspost/compress/releases/tag/v1.17.9) + * s2: Reduce ReadFrom temporary allocations https://github.com/klauspost/compress/pull/949 + * flate, zstd: Shave some bytes off amd64 matchLen by @greatroar in https://github.com/klauspost/compress/pull/963 + * Upgrade zip/zlib to 1.22.4 upstream https://github.com/klauspost/compress/pull/970 https://github.com/klauspost/compress/pull/971 + * zstd: BuildDict fails with RLE table https://github.com/klauspost/compress/pull/951 + +* Apr 9th, 2024 - [1.17.8](https://github.com/klauspost/compress/releases/tag/v1.17.8) + * zstd: Reject blocks where reserved values are not 0 https://github.com/klauspost/compress/pull/885 + * zstd: Add RLE detection+encoding https://github.com/klauspost/compress/pull/938 + +* Feb 21st, 2024 - [1.17.7](https://github.com/klauspost/compress/releases/tag/v1.17.7) + * s2: Add AsyncFlush method: Complete the block without flushing by @Jille in https://github.com/klauspost/compress/pull/927 + * s2: Fix literal+repeat exceeds dst crash https://github.com/klauspost/compress/pull/930 + * Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6) * zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923 * s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925 @@ -81,7 +102,7 @@ https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/comp * zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795 * s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779 * s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780 - * gzhttp: Suppport ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799 + * gzhttp: Support ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799 * Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1) * zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776 @@ -136,7 +157,7 @@ https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/comp * zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649 * Add Go 1.19 - deprecate Go 1.16 https://github.com/klauspost/compress/pull/651 * flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656 - * zstd: Improve "better" compresssion https://github.com/klauspost/compress/pull/657 + * zstd: Improve "better" compression https://github.com/klauspost/compress/pull/657 * s2: Improve "best" compression https://github.com/klauspost/compress/pull/658 * s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635 * s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646 @@ -339,7 +360,7 @@ While the release has been extensively tested, it is recommended to testing when * s2: Fix binaries. * Feb 25, 2021 (v1.11.8) - * s2: Fixed occational out-of-bounds write on amd64. Upgrade recommended. + * s2: Fixed occasional out-of-bounds write on amd64. Upgrade recommended. * s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315) * s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322) * zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314) @@ -518,7 +539,7 @@ While the release has been extensively tested, it is recommended to testing when * Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster. * Feb 19, 2016: Handle small payloads faster in level 1-3. * Feb 19, 2016: Added faster level 2 + 3 compression modes. -* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progresssion in terms of compression. New default level is 5. +* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progression in terms of compression. New default level is 5. * Feb 14, 2016: Snappy: Merge upstream changes. * Feb 14, 2016: Snappy: Fix aggressive skipping. * Feb 14, 2016: Snappy: Update benchmark. diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 66d1657d2..af53fb860 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -861,7 +861,7 @@ func (d *compressor) reset(w io.Writer) { } switch d.compressionLevel.chain { case 0: - // level was NoCompression or ConstantCompresssion. + // level was NoCompression or ConstantCompression. d.windowEnd = 0 default: s := d.state diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go index 2f410d64f..0d7b437f1 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate.go +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -298,6 +298,14 @@ const ( huffmanGenericReader ) +// flushMode tells decompressor when to return data +type flushMode uint8 + +const ( + syncFlush flushMode = iota // return data after sync flush block + partialFlush // return data after each block +) + // Decompress state. type decompressor struct { // Input source. @@ -332,6 +340,8 @@ type decompressor struct { nb uint final bool + + flushMode flushMode } func (f *decompressor) nextBlock() { @@ -618,7 +628,10 @@ func (f *decompressor) dataBlock() { } if n == 0 { - f.toRead = f.dict.readFlush() + if f.flushMode == syncFlush { + f.toRead = f.dict.readFlush() + } + f.finishBlock() return } @@ -657,8 +670,12 @@ func (f *decompressor) finishBlock() { if f.dict.availRead() > 0 { f.toRead = f.dict.readFlush() } + f.err = io.EOF + } else if f.flushMode == partialFlush && f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() } + f.step = nextBlock } @@ -789,15 +806,25 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error { return nil } -// NewReader returns a new ReadCloser that can be used -// to read the uncompressed version of r. -// If r does not also implement io.ByteReader, -// the decompressor may read more data than necessary from r. -// It is the caller's responsibility to call Close on the ReadCloser -// when finished reading. -// -// The ReadCloser returned by NewReader also implements Resetter. -func NewReader(r io.Reader) io.ReadCloser { +type ReaderOpt func(*decompressor) + +// WithPartialBlock tells decompressor to return after each block, +// so it can read data written with partial flush +func WithPartialBlock() ReaderOpt { + return func(f *decompressor) { + f.flushMode = partialFlush + } +} + +// WithDict initializes the reader with a preset dictionary +func WithDict(dict []byte) ReaderOpt { + return func(f *decompressor) { + f.dict.init(maxMatchOffset, dict) + } +} + +// NewReaderOpts returns new reader with provided options +func NewReaderOpts(r io.Reader, opts ...ReaderOpt) io.ReadCloser { fixedHuffmanDecoderInit() var f decompressor @@ -806,9 +833,26 @@ func NewReader(r io.Reader) io.ReadCloser { f.codebits = new([numCodes]int) f.step = nextBlock f.dict.init(maxMatchOffset, nil) + + for _, opt := range opts { + opt(&f) + } + return &f } +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser +// when finished reading. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReader(r io.Reader) io.ReadCloser { + return NewReaderOpts(r) +} + // NewReaderDict is like NewReader but initializes the reader // with a preset dictionary. The returned Reader behaves as if // the uncompressed data stream started with the given dictionary, @@ -817,13 +861,5 @@ func NewReader(r io.Reader) io.ReadCloser { // // The ReadCloser returned by NewReader also implements Resetter. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { - fixedHuffmanDecoderInit() - - var f decompressor - f.r = makeReader(r) - f.bits = new([maxNumLit + maxNumDist]int) - f.codebits = new([numCodes]int) - f.step = nextBlock - f.dict.init(maxMatchOffset, dict) - return &f + return NewReaderOpts(r, WithDict(dict)) } diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go index cc05d0f7e..0c7dd4ffe 100644 --- a/vendor/github.com/klauspost/compress/fse/decompress.go +++ b/vendor/github.com/klauspost/compress/fse/decompress.go @@ -15,7 +15,7 @@ const ( // It is possible, but by no way guaranteed that corrupt data will // return an error. // It is up to the caller to verify integrity of the returned data. -// Use a predefined Scrach to set maximum acceptable output size. +// Use a predefined Scratch to set maximum acceptable output size. func Decompress(b []byte, s *Scratch) ([]byte, error) { s, err := s.prepare(b) if err != nil { diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go index 54bd08b25..0f56b02d7 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress.go @@ -1136,7 +1136,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) { errs++ } if errs > 0 { - fmt.Fprintf(w, "%d errros in base, stopping\n", errs) + fmt.Fprintf(w, "%d errors in base, stopping\n", errs) continue } // Ensure that all combinations are covered. @@ -1152,7 +1152,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) { errs++ } if errs > 20 { - fmt.Fprintf(w, "%d errros, stopping\n", errs) + fmt.Fprintf(w, "%d errors, stopping\n", errs) break } } diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go index 0c9088adf..20b802270 100644 --- a/vendor/github.com/klauspost/compress/s2/encode.go +++ b/vendor/github.com/klauspost/compress/s2/encode.go @@ -9,6 +9,9 @@ import ( "encoding/binary" "math" "math/bits" + "sync" + + "github.com/klauspost/compress/internal/race" ) // Encode returns the encoded form of src. The returned slice may be a sub- @@ -52,6 +55,8 @@ func Encode(dst, src []byte) []byte { return dst[:d] } +var estblockPool [2]sync.Pool + // EstimateBlockSize will perform a very fast compression // without outputting the result and return the compressed output size. // The function returns -1 if no improvement could be achieved. @@ -61,9 +66,25 @@ func EstimateBlockSize(src []byte) (d int) { return -1 } if len(src) <= 1024 { - d = calcBlockSizeSmall(src) + const sz, pool = 2048, 0 + tmp, ok := estblockPool[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer estblockPool[pool].Put(tmp) + + d = calcBlockSizeSmall(src, tmp) } else { - d = calcBlockSize(src) + const sz, pool = 32768, 1 + tmp, ok := estblockPool[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer estblockPool[pool].Put(tmp) + + d = calcBlockSize(src, tmp) } if d == 0 { diff --git a/vendor/github.com/klauspost/compress/s2/encode_amd64.go b/vendor/github.com/klauspost/compress/s2/encode_amd64.go index 4f45206a4..7aadd255f 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encode_amd64.go @@ -3,10 +3,16 @@ package s2 -import "github.com/klauspost/compress/internal/race" +import ( + "sync" + + "github.com/klauspost/compress/internal/race" +) const hasAmd64Asm = true +var encPools [4]sync.Pool + // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -29,23 +35,60 @@ func encodeBlock(dst, src []byte) (d int) { ) if len(src) >= 4<<20 { - return encodeBlockAsm(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeBlockAsm4MB(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm4MB(dst, src, tmp) } if len(src) >= limit10B { - return encodeBlockAsm12B(dst, src) + const sz, pool = 16384, 1 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeBlockAsm10B(dst, src) + const sz, pool = 4096, 2 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeBlockAsm8B(dst, src) + const sz, pool = 1024, 3 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeBlockAsm8B(dst, src, tmp) } +var encBetterPools [5]sync.Pool + // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It // assumes that the varint-encoded length of the decompressed bytes has already // been written. @@ -68,21 +111,59 @@ func encodeBlockBetter(dst, src []byte) (d int) { ) if len(src) > 4<<20 { - return encodeBetterBlockAsm(dst, src) + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeBetterBlockAsm4MB(dst, src) + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeBetterBlockAsm4MB(dst, src, tmp) } if len(src) >= limit10B { - return encodeBetterBlockAsm12B(dst, src) + const sz, pool = 81920, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeBetterBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeBetterBlockAsm10B(dst, src) + const sz, pool = 20480, 1 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeBetterBlockAsm8B(dst, src) + + const sz, pool = 5120, 2 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeBetterBlockAsm8B(dst, src, tmp) } // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It @@ -105,22 +186,57 @@ func encodeBlockSnappy(dst, src []byte) (d int) { // Use 8 bit table when less than... limit8B = 512 ) - if len(src) >= 64<<10 { - return encodeSnappyBlockAsm(dst, src) + if len(src) > 65536 { + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm(dst, src, tmp) } if len(src) >= limit12B { - return encodeSnappyBlockAsm64K(dst, src) + const sz, pool = 65536, 0 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm64K(dst, src, tmp) } if len(src) >= limit10B { - return encodeSnappyBlockAsm12B(dst, src) + const sz, pool = 16384, 1 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeSnappyBlockAsm10B(dst, src) + const sz, pool = 4096, 2 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeSnappyBlockAsm8B(dst, src) + const sz, pool = 1024, 3 + tmp, ok := encPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encPools[pool].Put(tmp) + return encodeSnappyBlockAsm8B(dst, src, tmp) } // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It @@ -143,20 +259,59 @@ func encodeBlockBetterSnappy(dst, src []byte) (d int) { // Use 8 bit table when less than... limit8B = 512 ) - if len(src) >= 64<<10 { - return encodeSnappyBetterBlockAsm(dst, src) + if len(src) > 65536 { + const sz, pool = 589824, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm(dst, src, tmp) } + if len(src) >= limit12B { - return encodeSnappyBetterBlockAsm64K(dst, src) + const sz, pool = 294912, 4 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeSnappyBetterBlockAsm64K(dst, src, tmp) } if len(src) >= limit10B { - return encodeSnappyBetterBlockAsm12B(dst, src) + const sz, pool = 81920, 0 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + + return encodeSnappyBetterBlockAsm12B(dst, src, tmp) } if len(src) >= limit8B { - return encodeSnappyBetterBlockAsm10B(dst, src) + const sz, pool = 20480, 1 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm10B(dst, src, tmp) } if len(src) < minNonLiteralBlockSize { return 0 } - return encodeSnappyBetterBlockAsm8B(dst, src) + + const sz, pool = 5120, 2 + tmp, ok := encBetterPools[pool].Get().(*[sz]byte) + if !ok { + tmp = &[sz]byte{} + } + race.WriteSlice(tmp[:]) + defer encBetterPools[pool].Put(tmp) + return encodeSnappyBetterBlockAsm8B(dst, src, tmp) } diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go index 6b393c34d..dd1c973ca 100644 --- a/vendor/github.com/klauspost/compress/s2/encode_go.go +++ b/vendor/github.com/klauspost/compress/s2/encode_go.go @@ -317,7 +317,7 @@ func matchLen(a []byte, b []byte) int { } // input must be > inputMargin -func calcBlockSize(src []byte) (d int) { +func calcBlockSize(src []byte, _ *[32768]byte) (d int) { // Initialize the hash table. const ( tableBits = 13 @@ -503,7 +503,7 @@ emitRemainder: } // length must be > inputMargin. -func calcBlockSizeSmall(src []byte) (d int) { +func calcBlockSizeSmall(src []byte, _ *[2048]byte) (d int) { // Initialize the hash table. const ( tableBits = 9 diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go index 297e41501..f43aa8154 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go @@ -11,154 +11,154 @@ func _dummy_() // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm(dst []byte, src []byte) int +func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4194304 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm4MB(dst []byte, src []byte) int +func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int // encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm12B(dst []byte, src []byte) int +func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm10B(dst []byte, src []byte) int +func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBlockAsm8B(dst []byte, src []byte) int +func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm(dst []byte, src []byte) int +func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4194304 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm4MB(dst []byte, src []byte) int +func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int // encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm12B(dst []byte, src []byte) int +func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm10B(dst []byte, src []byte) int +func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeBetterBlockAsm8B(dst []byte, src []byte) int +func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm(dst []byte, src []byte) int +func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 65535 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm64K(dst []byte, src []byte) int +func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int // encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm12B(dst []byte, src []byte) int +func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm10B(dst []byte, src []byte) int +func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBlockAsm8B(dst []byte, src []byte) int +func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 65535 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int // encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func calcBlockSize(src []byte) int +func calcBlockSize(src []byte, tmp *[32768]byte) int // calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst. // Maximum input 1024 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape -func calcBlockSizeSmall(src []byte) int +func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int // emitLiteral writes a literal chunk and returns the number of bytes written. // diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s index 2ff5b3340..df9be687b 100644 --- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s +++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s @@ -13,1270 +13,1271 @@ TEXT ·_dummy_(SB), $0 #endif RET -// func encodeBlockAsm(dst []byte, src []byte) int +// func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm repeat_extend_back_loop_encodeBlockAsm: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm repeat_extend_back_end_encodeBlockAsm: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_repeat_emit_encodeBlockAsm four_bytes_repeat_emit_encodeBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeBlockAsm three_bytes_repeat_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm two_bytes_repeat_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm JMP memmove_long_repeat_emit_encodeBlockAsm one_byte_repeat_emit_encodeBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm memmove_long_repeat_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm matchlen_bsf_16repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_match8_repeat_extend_encodeBlockAsm: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm matchlen_bsf_8_repeat_extend_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm matchlen_match4_repeat_extend_encodeBlockAsm: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm JB repeat_extend_forward_end_encodeBlockAsm - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm matchlen_match1_repeat_extend_encodeBlockAsm: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_match_repeat_encodeBlockAsm: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm cant_repeat_two_offset_match_repeat_encodeBlockAsm: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_match_repeat_encodeBlockAsm - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_match_repeat_encodeBlockAsm - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_repeat_encodeBlockAsm repeat_five_match_repeat_encodeBlockAsm: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_match_repeat_encodeBlockAsm: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_match_repeat_encodeBlockAsm: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_match_repeat_encodeBlockAsm: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_match_repeat_encodeBlockAsm: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_as_copy_encodeBlockAsm: // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeBlockAsm - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeBlockAsm // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm four_bytes_remain_repeat_as_copy_encodeBlockAsm: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_repeat_as_copy_encodeBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - MOVL SI, R8 - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + MOVL DI, R9 + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm long_offset_short_repeat_as_copy_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short: - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short - CMPL BX, $0x0100ffff + CMPL SI, $0x0100ffff JB repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short - LEAL -16842747(BX), BX - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(SI), SI + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm two_byte_offset_short_repeat_as_copy_encodeBlockAsm: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm emit_copy_three_repeat_as_copy_encodeBlockAsm: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm no_repeat_found_encodeBlockAsm: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm candidate3_match_encodeBlockAsm: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm candidate2_match_encodeBlockAsm: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm match_extend_back_loop_encodeBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm JMP match_extend_back_loop_encodeBlockAsm match_extend_back_end_encodeBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeBlockAsm - CMPL DI, $0x01000000 + CMPL R8, $0x01000000 JB four_bytes_match_emit_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeBlockAsm four_bytes_match_emit_encodeBlockAsm: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBlockAsm three_bytes_match_emit_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm two_bytes_match_emit_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm JMP memmove_long_match_emit_encodeBlockAsm one_byte_match_emit_encodeBlockAsm: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm memmove_long_match_emit_encodeBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm: match_nolit_loop_encodeBlockAsm: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm matchlen_bsf_16match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_match8_match_nolit_encodeBlockAsm: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm matchlen_bsf_8_match_nolit_encodeBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm matchlen_match4_match_nolit_encodeBlockAsm: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm JB match_nolit_end_encodeBlockAsm - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm matchlen_match1_match_nolit_encodeBlockAsm: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeBlockAsm - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeBlockAsm - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy repeat_five_match_nolit_encodeBlockAsm_emit_copy: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm four_bytes_remain_match_nolit_encodeBlockAsm: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_match_nolit_encodeBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - MOVL BX, DI - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + MOVL SI, R8 + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm long_offset_short_match_nolit_encodeBlockAsm: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm_emit_copy_short - CMPL R9, $0x0100ffff + CMPL R10, $0x0100ffff JB repeat_five_match_nolit_encodeBlockAsm_emit_copy_short - LEAL -16842747(R9), R9 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R10), R10 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_four_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_three_match_nolit_encodeBlockAsm_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_match_nolit_encodeBlockAsm_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm two_byte_offset_short_match_nolit_encodeBlockAsm: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm emit_copy_three_match_nolit_encodeBlockAsm: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm - INCL CX + INCL DX JMP search_loop_encodeBlockAsm emit_remainder_encodeBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm @@ -1286,41 +1287,41 @@ emit_remainder_ok_encodeBlockAsm: JB three_bytes_emit_remainder_encodeBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeBlockAsm four_bytes_emit_remainder_encodeBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBlockAsm three_bytes_emit_remainder_encodeBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm two_bytes_emit_remainder_encodeBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm JMP memmove_long_emit_remainder_encodeBlockAsm one_byte_emit_remainder_encodeBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -1336,73 +1337,73 @@ memmove_emit_remainder_encodeBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm memmove_long_emit_remainder_encodeBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -1416,1199 +1417,1200 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm4MB(dst []byte, src []byte) int +// func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm4MB(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm4MB(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm4MB: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm4MB MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm4MB: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm4MB - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm4MB - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm4MB repeat_extend_back_loop_encodeBlockAsm4MB: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm4MB - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm4MB repeat_extend_back_end_encodeBlockAsm4MB: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 4(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 4(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm4MB: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeBlockAsm4MB three_bytes_repeat_emit_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm4MB two_bytes_repeat_emit_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm4MB JMP memmove_long_repeat_emit_encodeBlockAsm4MB one_byte_repeat_emit_encodeBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm4MB emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm4MB memmove_long_repeat_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm4MB: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm4MB - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm4MB - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB matchlen_bsf_16repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match8_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm4MB - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm4MB matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match4_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm4MB - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm4MB - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm4MB: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm4MB JB repeat_extend_forward_end_encodeBlockAsm4MB - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm4MB - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm4MB matchlen_match1_repeat_extend_encodeBlockAsm4MB: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm4MB - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm4MB: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm4MB - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm4MB cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm4MB - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_match_repeat_encodeBlockAsm4MB - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_match_repeat_encodeBlockAsm4MB: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_match_repeat_encodeBlockAsm4MB: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_match_repeat_encodeBlockAsm4MB: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_match_repeat_encodeBlockAsm4MB: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_as_copy_encodeBlockAsm4MB: // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeBlockAsm4MB - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_repeat_as_copy_encodeBlockAsm4MB: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm4MB - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB long_offset_short_repeat_as_copy_encodeBlockAsm4MB: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - CMPL BX, $0x00010100 + CMPL SI, $0x00010100 JB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(BX), BX - MOVL BX, SI - MOVW $0x001d, (AX) - MOVW BX, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(SI), SI + MOVL SI, DI + MOVW $0x001d, (CX) + MOVW SI, 2(CX) + SARL $0x10, DI + MOVB DI, 4(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm4MB - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm4MB emit_copy_three_repeat_as_copy_encodeBlockAsm4MB: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm4MB: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm4MB no_repeat_found_encodeBlockAsm4MB: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm4MB - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm4MB - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm4MB - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm4MB candidate3_match_encodeBlockAsm4MB: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm4MB candidate2_match_encodeBlockAsm4MB: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm4MB: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm4MB match_extend_back_loop_encodeBlockAsm4MB: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm4MB - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm4MB - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm4MB JMP match_extend_back_loop_encodeBlockAsm4MB match_extend_back_end_encodeBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 4(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 4(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm4MB: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm4MB - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm4MB - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeBlockAsm4MB - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBlockAsm4MB three_bytes_match_emit_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm4MB two_bytes_match_emit_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm4MB JMP memmove_long_match_emit_encodeBlockAsm4MB one_byte_match_emit_encodeBlockAsm4MB: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm4MB emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm4MB: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm4MB memmove_long_match_emit_encodeBlockAsm4MB: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm4MB: match_nolit_loop_encodeBlockAsm4MB: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm4MB - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm4MB - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm4MB matchlen_bsf_16match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_match8_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm4MB - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm4MB - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm4MB matchlen_bsf_8_match_nolit_encodeBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm4MB matchlen_match4_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm4MB - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm4MB - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm4MB: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm4MB JB match_nolit_end_encodeBlockAsm4MB - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm4MB - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm4MB matchlen_match1_match_nolit_encodeBlockAsm4MB: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm4MB - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm4MB: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeBlockAsm4MB - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeBlockAsm4MB - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeBlockAsm4MB // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB four_bytes_remain_match_nolit_encodeBlockAsm4MB: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeBlockAsm4MB - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_match_nolit_encodeBlockAsm4MB: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm4MB - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm4MB - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 - + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 + // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB long_offset_short_match_nolit_encodeBlockAsm4MB: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short - CMPL R9, $0x00010100 + CMPL R10, $0x00010100 JB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short - LEAL -65536(R9), R9 - MOVL R9, BX - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BX - MOVB BL, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R10), R10 + MOVL R10, SI + MOVW $0x001d, (CX) + MOVW R10, 2(CX) + SARL $0x10, SI + MOVB SI, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB two_byte_offset_short_match_nolit_encodeBlockAsm4MB: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm4MB - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm4MB - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm4MB emit_copy_three_match_nolit_encodeBlockAsm4MB: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm4MB: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm4MB - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm4MB: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm4MB - INCL CX + INCL DX JMP search_loop_encodeBlockAsm4MB emit_remainder_encodeBlockAsm4MB: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 4(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 4(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm4MB: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm4MB - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm4MB @@ -2618,33 +2620,33 @@ emit_remainder_ok_encodeBlockAsm4MB: JB three_bytes_emit_remainder_encodeBlockAsm4MB MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBlockAsm4MB three_bytes_emit_remainder_encodeBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm4MB two_bytes_emit_remainder_encodeBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm4MB JMP memmove_long_emit_remainder_encodeBlockAsm4MB one_byte_emit_remainder_encodeBlockAsm4MB: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -2660,73 +2662,73 @@ memmove_emit_remainder_encodeBlockAsm4MB: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm4MB: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm4MB memmove_long_emit_remainder_encodeBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 @@ -2740,967 +2742,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm4MB: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm12B(dst []byte, src []byte) int +// func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm12B(SB), $16408-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000080, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000080, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x18, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x18, R11 + IMULQ R9, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x18, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm12B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm12B repeat_extend_back_loop_encodeBlockAsm12B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm12B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm12B repeat_extend_back_end_encodeBlockAsm12B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm12B JB three_bytes_repeat_emit_encodeBlockAsm12B three_bytes_repeat_emit_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm12B two_bytes_repeat_emit_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm12B JMP memmove_long_repeat_emit_encodeBlockAsm12B one_byte_repeat_emit_encodeBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm12B emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm12B memmove_long_repeat_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm12B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm12B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm12B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm12B matchlen_bsf_16repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_match8_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm12B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm12B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm12B matchlen_bsf_8_repeat_extend_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm12B matchlen_match4_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm12B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm12B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm12B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm12B JB repeat_extend_forward_end_encodeBlockAsm12B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm12B matchlen_match1_repeat_extend_encodeBlockAsm12B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm12B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm12B // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm12B - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm12B cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm12B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_match_repeat_encodeBlockAsm12B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_match_repeat_encodeBlockAsm12B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_match_repeat_encodeBlockAsm12B: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm12B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B long_offset_short_repeat_as_copy_encodeBlockAsm12B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm12B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm12B emit_copy_three_repeat_as_copy_encodeBlockAsm12B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm12B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm12B no_repeat_found_encodeBlockAsm12B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm12B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm12B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm12B candidate3_match_encodeBlockAsm12B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm12B candidate2_match_encodeBlockAsm12B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm12B match_extend_back_loop_encodeBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm12B JMP match_extend_back_loop_encodeBlockAsm12B match_extend_back_end_encodeBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm12B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm12B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm12B JB three_bytes_match_emit_encodeBlockAsm12B three_bytes_match_emit_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm12B two_bytes_match_emit_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm12B JMP memmove_long_match_emit_encodeBlockAsm12B one_byte_match_emit_encodeBlockAsm12B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm12B emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm12B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm12B memmove_long_match_emit_encodeBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm12B: match_nolit_loop_encodeBlockAsm12B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm12B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm12B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm12B matchlen_bsf_16match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_match8_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm12B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm12B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm12B matchlen_bsf_8_match_nolit_encodeBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm12B matchlen_match4_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm12B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm12B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm12B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm12B JB match_nolit_end_encodeBlockAsm12B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm12B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm12B matchlen_match1_match_nolit_encodeBlockAsm12B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm12B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm12B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm12B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B long_offset_short_match_nolit_encodeBlockAsm12B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B - + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX + JMP match_nolit_emitcopy_end_encodeBlockAsm12B + two_byte_offset_short_match_nolit_encodeBlockAsm12B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm12B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm12B emit_copy_three_match_nolit_encodeBlockAsm12B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm12B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x18, DI - IMULQ R8, DI - SHRQ $0x34, DI - SHLQ $0x18, BX - IMULQ R8, BX - SHRQ $0x34, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x18, R8 + IMULQ R9, R8 + SHRQ $0x34, R8 + SHLQ $0x18, SI + IMULQ R9, SI + SHRQ $0x34, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm12B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm12B emit_remainder_encodeBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm12B @@ -3709,26 +3712,26 @@ emit_remainder_ok_encodeBlockAsm12B: JB three_bytes_emit_remainder_encodeBlockAsm12B three_bytes_emit_remainder_encodeBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm12B two_bytes_emit_remainder_encodeBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm12B JMP memmove_long_emit_remainder_encodeBlockAsm12B one_byte_emit_remainder_encodeBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -3744,73 +3747,73 @@ memmove_emit_remainder_encodeBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm12B emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm12B memmove_long_emit_remainder_encodeBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -3824,967 +3827,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm10B(dst []byte, src []byte) int +// func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm10B(SB), $4120-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000020, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000020, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeBlockAsm10B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm10B repeat_extend_back_loop_encodeBlockAsm10B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm10B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm10B repeat_extend_back_end_encodeBlockAsm10B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm10B JB three_bytes_repeat_emit_encodeBlockAsm10B three_bytes_repeat_emit_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm10B two_bytes_repeat_emit_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm10B JMP memmove_long_repeat_emit_encodeBlockAsm10B one_byte_repeat_emit_encodeBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm10B emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm10B memmove_long_repeat_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm10B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm10B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm10B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm10B matchlen_bsf_16repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_match8_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm10B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm10B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm10B matchlen_bsf_8_repeat_extend_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm10B matchlen_match4_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm10B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm10B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm10B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm10B JB repeat_extend_forward_end_encodeBlockAsm10B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm10B matchlen_match1_repeat_extend_encodeBlockAsm10B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm10B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm10B // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm10B - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_match_repeat_encodeBlockAsm10B cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm10B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_match_repeat_encodeBlockAsm10B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_match_repeat_encodeBlockAsm10B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_match_repeat_encodeBlockAsm10B: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_as_copy_encodeBlockAsm10B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm10B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B long_offset_short_repeat_as_copy_encodeBlockAsm10B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, DI - LEAL -4(BX), BX - CMPL DI, $0x08 + MOVL SI, R8 + LEAL -4(SI), SI + CMPL R8, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL DI, $0x0c + CMPL R8, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JB repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeBlockAsm10B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm10B emit_copy_three_repeat_as_copy_encodeBlockAsm10B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm10B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm10B no_repeat_found_encodeBlockAsm10B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm10B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm10B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm10B candidate3_match_encodeBlockAsm10B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm10B candidate2_match_encodeBlockAsm10B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm10B match_extend_back_loop_encodeBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm10B JMP match_extend_back_loop_encodeBlockAsm10B match_extend_back_end_encodeBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm10B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm10B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm10B JB three_bytes_match_emit_encodeBlockAsm10B three_bytes_match_emit_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm10B two_bytes_match_emit_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm10B JMP memmove_long_match_emit_encodeBlockAsm10B one_byte_match_emit_encodeBlockAsm10B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm10B emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm10B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm10B memmove_long_match_emit_encodeBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm10B: match_nolit_loop_encodeBlockAsm10B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm10B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm10B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm10B matchlen_bsf_16match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_match8_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm10B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm10B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm10B matchlen_bsf_8_match_nolit_encodeBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm10B matchlen_match4_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm10B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm10B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm10B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm10B JB match_nolit_end_encodeBlockAsm10B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm10B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm10B matchlen_match1_match_nolit_encodeBlockAsm10B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm10B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm10B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm10B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B long_offset_short_match_nolit_encodeBlockAsm10B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 + MOVL R10, DI + LEAL -4(R10), R10 + CMPL DI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JB repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B two_byte_offset_short_match_nolit_encodeBlockAsm10B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeBlockAsm10B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm10B emit_copy_three_match_nolit_encodeBlockAsm10B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm10B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm10B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x36, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x36, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x36, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x36, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm10B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm10B emit_remainder_encodeBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm10B @@ -4793,26 +4797,26 @@ emit_remainder_ok_encodeBlockAsm10B: JB three_bytes_emit_remainder_encodeBlockAsm10B three_bytes_emit_remainder_encodeBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm10B two_bytes_emit_remainder_encodeBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm10B JMP memmove_long_emit_remainder_encodeBlockAsm10B one_byte_emit_remainder_encodeBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -4828,73 +4832,73 @@ memmove_emit_remainder_encodeBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm10B emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm10B memmove_long_emit_remainder_encodeBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -4908,943 +4912,944 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBlockAsm8B(dst []byte, src []byte) int +// func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBlockAsm8B(SB), $1048-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000008, CX - LEAQ 24(SP), DX +TEXT ·encodeBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000008, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 - JNE no_repeat_found_encodeBlockAsm8B - LEAL 1(CX), SI - MOVL 12(SP), DI - MOVL SI, BX - SUBL 16(SP), BX + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x38, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 + JNE no_repeat_found_encodeBlockAsm8B + LEAL 1(DX), DI + MOVL 12(SP), R8 + MOVL DI, SI + SUBL 16(SP), SI JZ repeat_extend_back_end_encodeBlockAsm8B repeat_extend_back_loop_encodeBlockAsm8B: - CMPL SI, DI + CMPL DI, R8 JBE repeat_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(BX*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(SI*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeBlockAsm8B - LEAL -1(SI), SI - DECL BX + LEAL -1(DI), DI + DECL SI JNZ repeat_extend_back_loop_encodeBlockAsm8B repeat_extend_back_end_encodeBlockAsm8B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeBlockAsm8B JB three_bytes_repeat_emit_encodeBlockAsm8B three_bytes_repeat_emit_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeBlockAsm8B two_bytes_repeat_emit_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeBlockAsm8B JMP memmove_long_repeat_emit_encodeBlockAsm8B one_byte_repeat_emit_encodeBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_repeat_emit_encodeBlockAsm8B emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_repeat_emit_encodeBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeBlockAsm8B memmove_long_repeat_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R11 - SHRQ $0x05, R11 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R12 - SUBQ R10, R12 - DECQ R11 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R12 + SHRQ $0x05, R12 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R13 + SUBQ R11, R13 + DECQ R12 JA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R12*1), R10 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R10)(R13*1), R11 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R11 ADDQ $0x20, R13 - ADDQ $0x20, R10 - ADDQ $0x20, R12 - DECQ R11 + DECQ R12 JNA emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R12*1), X4 - MOVOU -16(R9)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ R8, R12 + MOVOU -32(R10)(R13*1), X4 + MOVOU -16(R10)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R9, R13 JAE emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeBlockAsm8B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), R8 - SUBL CX, R8 - LEAQ (DX)(CX*1), R9 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R9 + SUBL DX, R9 + LEAQ (BX)(DX*1), R10 + LEAQ (BX)(SI*1), SI // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x10 + CMPL R9, $0x10 JB matchlen_match8_repeat_extend_encodeBlockAsm8B - MOVQ (R9)(R11*1), R10 - MOVQ 8(R9)(R11*1), R12 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + MOVQ 8(R10)(R12*1), R13 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B - XORQ 8(BX)(R11*1), R12 + XORQ 8(SI)(R12*1), R13 JNZ matchlen_bsf_16repeat_extend_encodeBlockAsm8B - LEAL -16(R8), R8 - LEAL 16(R11), R11 + LEAL -16(R9), R9 + LEAL 16(R12), R12 JMP matchlen_loopback_16_repeat_extend_encodeBlockAsm8B matchlen_bsf_16repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_match8_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x08 + CMPL R9, $0x08 JB matchlen_match4_repeat_extend_encodeBlockAsm8B - MOVQ (R9)(R11*1), R10 - XORQ (BX)(R11*1), R10 + MOVQ (R10)(R12*1), R11 + XORQ (SI)(R12*1), R11 JNZ matchlen_bsf_8_repeat_extend_encodeBlockAsm8B - LEAL -8(R8), R8 - LEAL 8(R11), R11 + LEAL -8(R9), R9 + LEAL 8(R12), R12 JMP matchlen_match4_repeat_extend_encodeBlockAsm8B matchlen_bsf_8_repeat_extend_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP repeat_extend_forward_end_encodeBlockAsm8B matchlen_match4_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x04 + CMPL R9, $0x04 JB matchlen_match2_repeat_extend_encodeBlockAsm8B - MOVL (R9)(R11*1), R10 - CMPL (BX)(R11*1), R10 + MOVL (R10)(R12*1), R11 + CMPL (SI)(R12*1), R11 JNE matchlen_match2_repeat_extend_encodeBlockAsm8B - LEAL -4(R8), R8 - LEAL 4(R11), R11 + LEAL -4(R9), R9 + LEAL 4(R12), R12 matchlen_match2_repeat_extend_encodeBlockAsm8B: - CMPL R8, $0x01 + CMPL R9, $0x01 JE matchlen_match1_repeat_extend_encodeBlockAsm8B JB repeat_extend_forward_end_encodeBlockAsm8B - MOVW (R9)(R11*1), R10 - CMPW (BX)(R11*1), R10 + MOVW (R10)(R12*1), R11 + CMPW (SI)(R12*1), R11 JNE matchlen_match1_repeat_extend_encodeBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, R8 + LEAL 2(R12), R12 + SUBL $0x02, R9 JZ repeat_extend_forward_end_encodeBlockAsm8B matchlen_match1_repeat_extend_encodeBlockAsm8B: - MOVB (R9)(R11*1), R10 - CMPB (BX)(R11*1), R10 + MOVB (R10)(R12*1), R11 + CMPB (SI)(R12*1), R11 JNE repeat_extend_forward_end_encodeBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 repeat_extend_forward_end_encodeBlockAsm8B: - ADDL R11, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI - TESTL DI, DI + ADDL R12, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI + TESTL R8, R8 JZ repeat_as_copy_encodeBlockAsm8B // emitRepeat - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_match_repeat_encodeBlockAsm8B - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_match_repeat_encodeBlockAsm8B - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_match_repeat_encodeBlockAsm8B: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_match_repeat_encodeBlockAsm8B: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_as_copy_encodeBlockAsm8B: // emitCopy - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE long_offset_short_repeat_as_copy_encodeBlockAsm8B - MOVL $0x00000001, DI - LEAL 16(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX - SUBL $0x08, BX + MOVL $0x00000001, R8 + LEAL 16(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX + SUBL $0x08, SI // emitRepeat - LEAL -4(BX), BX + LEAL -4(SI), SI JMP cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B long_offset_short_repeat_as_copy_encodeBlockAsm8B: - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX // emitRepeat - MOVL BX, SI - LEAL -4(BX), BX - CMPL SI, $0x08 + MOVL SI, DI + LEAL -4(SI), SI + CMPL DI, $0x08 JBE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - CMPL SI, $0x0c + CMPL DI, $0x0c JAE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - CMPL BX, $0x00000104 + CMPL SI, $0x00000104 JB repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - LEAL -256(BX), BX - MOVW $0x0019, (AX) - MOVW BX, 2(AX) - ADDQ $0x04, AX + LEAL -256(SI), SI + MOVW $0x0019, (CX) + MOVW SI, 2(CX) + ADDQ $0x04, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - LEAL -4(BX), BX - MOVW $0x0015, (AX) - MOVB BL, 2(AX) - ADDQ $0x03, AX + LEAL -4(SI), SI + MOVW $0x0015, (CX) + MOVB SI, 2(CX) + ADDQ $0x03, CX JMP repeat_end_emit_encodeBlockAsm8B repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, BX - ORL $0x01, BX - MOVW BX, (AX) - ADDQ $0x02, AX + SHLL $0x02, SI + ORL $0x01, SI + MOVW SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B - XORQ DI, DI - LEAL 1(DI)(BX*4), BX - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + XORQ R8, R8 + LEAL 1(R8)(SI*4), SI + MOVB DI, 1(CX) + SARL $0x08, DI + SHLL $0x05, DI + ORL DI, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeBlockAsm8B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeBlockAsm8B emit_copy_three_repeat_as_copy_encodeBlockAsm8B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeBlockAsm8B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeBlockAsm8B no_repeat_found_encodeBlockAsm8B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBlockAsm8B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeBlockAsm8B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBlockAsm8B candidate3_match_encodeBlockAsm8B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeBlockAsm8B candidate2_match_encodeBlockAsm8B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBlockAsm8B match_extend_back_loop_encodeBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBlockAsm8B JMP match_extend_back_loop_encodeBlockAsm8B match_extend_back_end_encodeBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBlockAsm8B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeBlockAsm8B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeBlockAsm8B JB three_bytes_match_emit_encodeBlockAsm8B three_bytes_match_emit_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBlockAsm8B two_bytes_match_emit_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeBlockAsm8B JMP memmove_long_match_emit_encodeBlockAsm8B one_byte_match_emit_encodeBlockAsm8B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBlockAsm8B emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBlockAsm8B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeBlockAsm8B memmove_long_match_emit_encodeBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) - ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 + ADDQ $0x20, R12 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeBlockAsm8B: match_nolit_loop_encodeBlockAsm8B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeBlockAsm8B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeBlockAsm8B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeBlockAsm8B matchlen_bsf_16match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_match8_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeBlockAsm8B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeBlockAsm8B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeBlockAsm8B matchlen_bsf_8_match_nolit_encodeBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeBlockAsm8B matchlen_match4_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeBlockAsm8B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeBlockAsm8B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeBlockAsm8B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeBlockAsm8B JB match_nolit_end_encodeBlockAsm8B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeBlockAsm8B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeBlockAsm8B matchlen_match1_match_nolit_encodeBlockAsm8B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeBlockAsm8B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeBlockAsm8B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeBlockAsm8B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE long_offset_short_match_nolit_encodeBlockAsm8B - MOVL $0x00000001, SI - LEAL 16(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX - SUBL $0x08, R9 + MOVL $0x00000001, DI + LEAL 16(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R10 // emitRepeat - LEAL -4(R9), R9 + LEAL -4(R10), R10 JMP cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - MOVL R9, BX - LEAL -4(R9), R9 - CMPL BX, $0x08 + MOVL R10, SI + LEAL -4(R10), R10 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B long_offset_short_match_nolit_encodeBlockAsm8B: - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX // emitRepeat - MOVL R9, BX - LEAL -4(R9), R9 - CMPL BX, $0x08 + MOVL R10, SI + LEAL -4(R10), R10 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: - CMPL R9, $0x00000104 + CMPL R10, $0x00000104 JB repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX + LEAL -256(R10), R10 + MOVW $0x0019, (CX) + MOVW R10, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX + LEAL -4(R10), R10 + MOVW $0x0015, (CX) + MOVB R10, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX + SHLL $0x02, R10 + ORL $0x01, R10 + MOVW R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BL, 1(AX) - SARL $0x08, BX - SHLL $0x05, BX - ORL BX, R9 - MOVB R9, (AX) - ADDQ $0x02, AX + XORQ DI, DI + LEAL 1(DI)(R10*4), R10 + MOVB SI, 1(CX) + SARL $0x08, SI + SHLL $0x05, SI + ORL SI, R10 + MOVB R10, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B two_byte_offset_short_match_nolit_encodeBlockAsm8B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeBlockAsm8B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBlockAsm8B emit_copy_three_match_nolit_encodeBlockAsm8B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBlockAsm8B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBlockAsm8B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x38, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x38, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x38, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x38, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeBlockAsm8B - INCL CX + INCL DX JMP search_loop_encodeBlockAsm8B emit_remainder_encodeBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBlockAsm8B @@ -5853,26 +5858,26 @@ emit_remainder_ok_encodeBlockAsm8B: JB three_bytes_emit_remainder_encodeBlockAsm8B three_bytes_emit_remainder_encodeBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBlockAsm8B two_bytes_emit_remainder_encodeBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBlockAsm8B JMP memmove_long_emit_remainder_encodeBlockAsm8B one_byte_emit_remainder_encodeBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -5888,73 +5893,73 @@ memmove_emit_remainder_encodeBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBlockAsm8B emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBlockAsm8B memmove_long_emit_remainder_encodeBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -5968,961 +5973,962 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm(dst []byte, src []byte) int +// func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeBetterBlockAsm - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeBetterBlockAsm check_maxskip_ok_encodeBetterBlockAsm: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm no_short_found_encodeBetterBlockAsm: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm candidateS_match_encodeBetterBlockAsm: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm match_extend_back_loop_encodeBetterBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm JMP match_extend_back_loop_encodeBetterBlockAsm match_extend_back_end_encodeBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm matchlen_bsf_16match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_match8_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm matchlen_bsf_8_match_nolit_encodeBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm matchlen_match4_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm JB match_nolit_end_encodeBetterBlockAsm - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm matchlen_match1_match_nolit_encodeBetterBlockAsm: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeBetterBlockAsm - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeBetterBlockAsm - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeBetterBlockAsm match_length_ok_encodeBetterBlockAsm: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeBetterBlockAsm four_bytes_match_emit_encodeBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBetterBlockAsm three_bytes_match_emit_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm two_bytes_match_emit_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm JMP memmove_long_match_emit_encodeBetterBlockAsm one_byte_match_emit_encodeBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm memmove_long_match_emit_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeBetterBlockAsm - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeBetterBlockAsm // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm four_bytes_remain_match_nolit_encodeBetterBlockAsm: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_match_nolit_encodeBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - MOVL DI, R8 - SHRL $0x08, R8 - SHLL $0x05, R8 - ORL R8, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + MOVL R8, R9 + SHRL $0x08, R9 + SHLL $0x05, R9 + ORL R9, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm long_offset_short_match_nolit_encodeBetterBlockAsm: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm two_byte_offset_short_match_nolit_encodeBetterBlockAsm: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm emit_copy_three_match_nolit_encodeBetterBlockAsm: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm match_is_repeat_encodeBetterBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_repeat_encodeBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_repeat_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm four_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm three_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm two_bytes_match_emit_repeat_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm one_byte_match_emit_repeat_encodeBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm memmove_long_match_emit_repeat_encodeBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm: - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm - CMPL R11, $0x0100ffff + CMPL R12, $0x0100ffff JB repeat_five_match_nolit_repeat_encodeBetterBlockAsm - LEAL -16842747(R11), R11 - MOVL $0xfffb001d, (AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX + LEAL -16842747(R12), R12 + MOVL $0xfffb001d, (CX) + MOVB $0xff, 4(CX) + ADDQ $0x05, CX JMP emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm repeat_five_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_four_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_three_match_nolit_repeat_encodeBetterBlockAsm: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_match_nolit_repeat_encodeBetterBlockAsm: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm - CMPQ AX, (SP) - JB match_nolit_dst_ok_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) - RET - -match_nolit_dst_ok_encodeBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + CMPQ CX, (SP) + JB match_nolit_dst_ok_encodeBetterBlockAsm + MOVQ $0x00000000, ret+56(FP) + RET + +match_nolit_dst_ok_encodeBetterBlockAsm: + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm emit_remainder_encodeBetterBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm @@ -6932,41 +6938,41 @@ emit_remainder_ok_encodeBetterBlockAsm: JB three_bytes_emit_remainder_encodeBetterBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeBetterBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm four_bytes_emit_remainder_encodeBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm three_bytes_emit_remainder_encodeBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm two_bytes_emit_remainder_encodeBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm JMP memmove_long_emit_remainder_encodeBetterBlockAsm one_byte_emit_remainder_encodeBetterBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -6982,73 +6988,73 @@ memmove_emit_remainder_encodeBetterBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm memmove_long_emit_remainder_encodeBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -7062,903 +7068,904 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int +// func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm4MB(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm4MB: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm4MB MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm4MB: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeBetterBlockAsm4MB - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeBetterBlockAsm4MB check_maxskip_ok_encodeBetterBlockAsm4MB: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeBetterBlockAsm4MB: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm4MB - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm4MB - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm4MB no_short_found_encodeBetterBlockAsm4MB: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm4MB - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm4MB - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm4MB candidateS_match_encodeBetterBlockAsm4MB: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm4MB - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm4MB: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm4MB match_extend_back_loop_encodeBetterBlockAsm4MB: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm4MB - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm4MB - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm4MB JMP match_extend_back_loop_encodeBetterBlockAsm4MB match_extend_back_end_encodeBetterBlockAsm4MB: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 4(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 4(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm4MB: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_match8_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm4MB matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm4MB matchlen_match4_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm4MB - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm4MB: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB JB match_nolit_end_encodeBetterBlockAsm4MB - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm4MB - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm4MB matchlen_match1_match_nolit_encodeBetterBlockAsm4MB: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm4MB - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm4MB: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm4MB - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeBetterBlockAsm4MB - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeBetterBlockAsm4MB - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeBetterBlockAsm4MB match_length_ok_encodeBetterBlockAsm4MB: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeBetterBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB three_bytes_match_emit_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm4MB two_bytes_match_emit_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_encodeBetterBlockAsm4MB one_byte_match_emit_encodeBetterBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm4MB memmove_long_match_emit_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm4MB: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeBetterBlockAsm4MB - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_match_nolit_encodeBetterBlockAsm4MB: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm4MB - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB long_offset_short_match_nolit_encodeBetterBlockAsm4MB: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm4MB - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB emit_copy_three_match_nolit_encodeBetterBlockAsm4MB: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB match_is_repeat_encodeBetterBlockAsm4MB: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm4MB JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB one_byte_match_emit_repeat_encodeBetterBlockAsm4MB: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB - CMPL R11, $0x00010100 + CMPL R12, $0x00010100 JB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB - LEAL -65536(R11), R11 - MOVL R11, DI - MOVW $0x001d, (AX) - MOVW R11, 2(AX) - SARL $0x10, DI - MOVB DI, 4(AX) - ADDQ $0x05, AX + LEAL -65536(R12), R12 + MOVL R12, R8 + MOVW $0x001d, (CX) + MOVW R12, 2(CX) + SARL $0x10, R8 + MOVB R8, 4(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm4MB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm4MB: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm4MB - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm4MB: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm4MB: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm4MB - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm4MB emit_remainder_encodeBetterBlockAsm4MB: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 4(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 4(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm4MB - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm4MB: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm4MB @@ -7968,33 +7975,33 @@ emit_remainder_ok_encodeBetterBlockAsm4MB: JB three_bytes_emit_remainder_encodeBetterBlockAsm4MB MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB three_bytes_emit_remainder_encodeBetterBlockAsm4MB: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB two_bytes_emit_remainder_encodeBetterBlockAsm4MB: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm4MB JMP memmove_long_emit_remainder_encodeBetterBlockAsm4MB one_byte_emit_remainder_encodeBetterBlockAsm4MB: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -8010,73 +8017,73 @@ memmove_emit_remainder_encodeBetterBlockAsm4MB: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB memmove_long_emit_remainder_encodeBetterBlockAsm4MB: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: MOVOU (SI), X4 @@ -8090,756 +8097,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm12B(dst []byte, src []byte) int +// func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm12B(SB), $81944-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000280, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000280, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 65560(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 65560(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL 65536(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 65536(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm12B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm12B no_short_found_encodeBetterBlockAsm12B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm12B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm12B candidateS_match_encodeBetterBlockAsm12B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm12B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm12B match_extend_back_loop_encodeBetterBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm12B JMP match_extend_back_loop_encodeBetterBlockAsm12B match_extend_back_end_encodeBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm12B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_match8_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm12B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm12B matchlen_match4_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm12B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm12B JB match_nolit_end_encodeBetterBlockAsm12B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm12B matchlen_match1_match_nolit_encodeBetterBlockAsm12B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm12B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm12B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm12B JB three_bytes_match_emit_encodeBetterBlockAsm12B three_bytes_match_emit_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm12B two_bytes_match_emit_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm12B JMP memmove_long_match_emit_encodeBetterBlockAsm12B one_byte_match_emit_encodeBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm12B memmove_long_match_emit_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm12B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B long_offset_short_match_nolit_encodeBetterBlockAsm12B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm12B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B emit_copy_three_match_nolit_encodeBetterBlockAsm12B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B match_is_repeat_encodeBetterBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm12B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm12B three_bytes_match_emit_repeat_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B two_bytes_match_emit_repeat_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm12B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm12B one_byte_match_emit_repeat_encodeBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B memmove_long_match_emit_repeat_encodeBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm12B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm12B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x34, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x34, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 65560(SP)(R10*4) - MOVL R13, 65560(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x34, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 65536(AX)(R11*4) + MOVL R14, 65536(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm12B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm12B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x32, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm12B emit_remainder_encodeBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm12B @@ -8848,26 +8856,26 @@ emit_remainder_ok_encodeBetterBlockAsm12B: JB three_bytes_emit_remainder_encodeBetterBlockAsm12B three_bytes_emit_remainder_encodeBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B two_bytes_emit_remainder_encodeBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeBetterBlockAsm12B one_byte_emit_remainder_encodeBetterBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -8883,73 +8891,73 @@ memmove_emit_remainder_encodeBetterBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm12B memmove_long_emit_remainder_encodeBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -8963,756 +8971,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 - JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm10B(dst []byte, src []byte) int +// func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm10B(SB), $20504-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x000000a0, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x000000a0, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 16408(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 16408(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL 16384(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 16384(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm10B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm10B no_short_found_encodeBetterBlockAsm10B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm10B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm10B candidateS_match_encodeBetterBlockAsm10B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm10B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm10B match_extend_back_loop_encodeBetterBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm10B JMP match_extend_back_loop_encodeBetterBlockAsm10B match_extend_back_end_encodeBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm10B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_match8_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm10B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm10B matchlen_match4_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm10B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm10B JB match_nolit_end_encodeBetterBlockAsm10B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm10B matchlen_match1_match_nolit_encodeBetterBlockAsm10B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm10B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm10B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm10B JB three_bytes_match_emit_encodeBetterBlockAsm10B three_bytes_match_emit_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm10B two_bytes_match_emit_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm10B JMP memmove_long_match_emit_encodeBetterBlockAsm10B one_byte_match_emit_encodeBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm10B memmove_long_match_emit_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm10B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B long_offset_short_match_nolit_encodeBetterBlockAsm10B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeBetterBlockAsm10B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B emit_copy_three_match_nolit_encodeBetterBlockAsm10B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B match_is_repeat_encodeBetterBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm10B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm10B three_bytes_match_emit_repeat_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B two_bytes_match_emit_repeat_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm10B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm10B one_byte_match_emit_repeat_encodeBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B memmove_long_match_emit_repeat_encodeBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JB repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm10B repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B: - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm10B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x36, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x36, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 16408(SP)(R10*4) - MOVL R13, 16408(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x36, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 16384(AX)(R11*4) + MOVL R14, 16384(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm10B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm10B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x34, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm10B emit_remainder_encodeBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm10B @@ -9721,26 +9730,26 @@ emit_remainder_ok_encodeBetterBlockAsm10B: JB three_bytes_emit_remainder_encodeBetterBlockAsm10B three_bytes_emit_remainder_encodeBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B two_bytes_emit_remainder_encodeBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeBetterBlockAsm10B one_byte_emit_remainder_encodeBetterBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -9756,73 +9765,73 @@ memmove_emit_remainder_encodeBetterBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm10B memmove_long_emit_remainder_encodeBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -9836,742 +9845,743 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeBetterBlockAsm8B(dst []byte, src []byte) int +// func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // Requires: BMI, SSE2 -TEXT ·encodeBetterBlockAsm8B(SB), $5144-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000028, CX - LEAQ 24(SP), DX +TEXT ·encodeBetterBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000028, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeBetterBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeBetterBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -6(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -6(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeBetterBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeBetterBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 4120(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 4120(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL 4096(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 4096(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeBetterBlockAsm8B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeBetterBlockAsm8B no_short_found_encodeBetterBlockAsm8B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeBetterBlockAsm8B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeBetterBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeBetterBlockAsm8B candidateS_match_encodeBetterBlockAsm8B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeBetterBlockAsm8B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeBetterBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeBetterBlockAsm8B match_extend_back_loop_encodeBetterBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeBetterBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeBetterBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeBetterBlockAsm8B JMP match_extend_back_loop_encodeBetterBlockAsm8B match_extend_back_end_encodeBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeBetterBlockAsm8B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_match8_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeBetterBlockAsm8B matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeBetterBlockAsm8B matchlen_match4_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeBetterBlockAsm8B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeBetterBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeBetterBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeBetterBlockAsm8B JB match_nolit_end_encodeBetterBlockAsm8B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeBetterBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeBetterBlockAsm8B matchlen_match1_match_nolit_encodeBetterBlockAsm8B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeBetterBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeBetterBlockAsm8B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL 16(SP), DI + CMPL 16(SP), R8 JEQ match_is_repeat_encodeBetterBlockAsm8B - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeBetterBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeBetterBlockAsm8B JB three_bytes_match_emit_encodeBetterBlockAsm8B three_bytes_match_emit_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeBetterBlockAsm8B two_bytes_match_emit_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeBetterBlockAsm8B JMP memmove_long_match_emit_encodeBetterBlockAsm8B one_byte_match_emit_encodeBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x04 + CMPQ R9, $0x04 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ R8, $0x08 + CMPQ R9, $0x08 JB emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R9), R10 - MOVL R10, (AX) + MOVL (R10), R11 + MOVL R11, (CX) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R9), R10 - MOVL -4(R9)(R8*1), R9 - MOVL R10, (AX) - MOVL R9, -4(AX)(R8*1) + MOVL (R10), R11 + MOVL -4(R10)(R9*1), R10 + MOVL R11, (CX) + MOVL R10, -4(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeBetterBlockAsm8B memmove_long_match_emit_encodeBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE long_offset_short_match_nolit_encodeBetterBlockAsm8B - MOVL $0x00000001, BX - LEAL 16(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX - SUBL $0x08, R11 + MOVL $0x00000001, SI + LEAL 16(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX + SUBL $0x08, R12 // emitRepeat - LEAL -4(R11), R11 + LEAL -4(R12), R12 JMP cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B long_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B -repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX +repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short: + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX + JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeBetterBlockAsm8B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B emit_copy_three_match_nolit_encodeBetterBlockAsm8B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B match_is_repeat_encodeBetterBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_match_emit_repeat_encodeBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_repeat_encodeBetterBlockAsm8B JB three_bytes_match_emit_repeat_encodeBetterBlockAsm8B three_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B two_bytes_match_emit_repeat_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_repeat_encodeBetterBlockAsm8B JMP memmove_long_match_emit_repeat_encodeBetterBlockAsm8B one_byte_match_emit_repeat_encodeBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x04 + CMPQ R8, $0x04 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4 - CMPQ DI, $0x08 + CMPQ R8, $0x08 JB emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4: - MOVL (R8), R9 - MOVL R9, (AX) + MOVL (R9), R10 + MOVL R10, (CX) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (R8), R9 - MOVL -4(R8)(DI*1), R8 - MOVL R9, (AX) - MOVL R8, -4(AX)(DI*1) + MOVL (R9), R10 + MOVL -4(R9)(R8*1), R9 + MOVL R10, (CX) + MOVL R9, -4(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B memmove_long_match_emit_repeat_encodeBetterBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R12 - SUBQ R9, R12 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R13 + SUBQ R10, R13 + DECQ R11 JA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R12*1), R9 - LEAQ -32(AX)(R12*1), R13 + LEAQ -32(R9)(R13*1), R10 + LEAQ -32(CX)(R13*1), R14 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R13) - MOVOA X5, 16(R13) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R14) + MOVOA X5, 16(R14) + ADDQ $0x20, R14 + ADDQ $0x20, R10 ADDQ $0x20, R13 - ADDQ $0x20, R9 - ADDQ $0x20, R12 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R12*1), X4 - MOVOU -16(R8)(R12*1), X5 - MOVOA X4, -32(AX)(R12*1) - MOVOA X5, -16(AX)(R12*1) - ADDQ $0x20, R12 - CMPQ DI, R12 + MOVOU -32(R9)(R13*1), X4 + MOVOU -16(R9)(R13*1), X5 + MOVOA X4, -32(CX)(R13*1) + MOVOA X5, -16(CX)(R13*1) + ADDQ $0x20, R13 + CMPQ R8, R13 JAE emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitRepeat - MOVL R11, BX - LEAL -4(R11), R11 - CMPL BX, $0x08 + MOVL R12, SI + LEAL -4(R12), R12 + CMPL SI, $0x08 JBE repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B - CMPL BX, $0x0c + CMPL SI, $0x0c JAE cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B: - CMPL R11, $0x00000104 + CMPL R12, $0x00000104 JB repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B - LEAL -256(R11), R11 - MOVW $0x0019, (AX) - MOVW R11, 2(AX) - ADDQ $0x04, AX + LEAL -256(R12), R12 + MOVW $0x0019, (CX) + MOVW R12, 2(CX) + ADDQ $0x04, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B: - LEAL -4(R11), R11 - MOVW $0x0015, (AX) - MOVB R11, 2(AX) - ADDQ $0x03, AX + LEAL -4(R12), R12 + MOVW $0x0015, (CX) + MOVB R12, 2(CX) + ADDQ $0x03, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B: - SHLL $0x02, R11 - ORL $0x01, R11 - MOVW R11, (AX) - ADDQ $0x02, AX + SHLL $0x02, R12 + ORL $0x01, R12 + MOVW R12, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeBetterBlockAsm8B - XORQ BX, BX - LEAL 1(BX)(R11*4), R11 - MOVB DI, 1(AX) - SARL $0x08, DI - SHLL $0x05, DI - ORL DI, R11 - MOVB R11, (AX) - ADDQ $0x02, AX + XORQ SI, SI + LEAL 1(SI)(R12*4), R12 + MOVB R8, 1(CX) + SARL $0x08, R8 + SHLL $0x05, R8 + ORL R8, R12 + MOVB R12, (CX) + ADDQ $0x02, CX match_nolit_emitcopy_end_encodeBetterBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeBetterBlockAsm8B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x38, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x38, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 4120(SP)(R10*4) - MOVL R13, 4120(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x38, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 4096(AX)(R11*4) + MOVL R14, 4096(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeBetterBlockAsm8B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeBetterBlockAsm8B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x36, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeBetterBlockAsm8B emit_remainder_encodeBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeBetterBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeBetterBlockAsm8B @@ -10580,26 +10590,26 @@ emit_remainder_ok_encodeBetterBlockAsm8B: JB three_bytes_emit_remainder_encodeBetterBlockAsm8B three_bytes_emit_remainder_encodeBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B two_bytes_emit_remainder_encodeBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeBetterBlockAsm8B one_byte_emit_remainder_encodeBetterBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -10615,73 +10625,73 @@ memmove_emit_remainder_encodeBetterBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeBetterBlockAsm8B memmove_long_emit_remainder_encodeBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -10695,798 +10705,799 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeBetterBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm(dst []byte, src []byte) int +// func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm repeat_extend_back_loop_encodeSnappyBlockAsm: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm repeat_extend_back_end_encodeSnappyBlockAsm: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_encodeSnappyBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm four_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVL BX, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL SI, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm three_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm two_bytes_repeat_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm JMP memmove_long_repeat_emit_encodeSnappyBlockAsm one_byte_repeat_emit_encodeSnappyBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm memmove_long_repeat_emit_encodeSnappyBlockAsm: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match8_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match4_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm JB repeat_extend_forward_end_encodeSnappyBlockAsm - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm matchlen_match1_repeat_extend_encodeSnappyBlockAsm: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + MOVB $0xff, (CX) + MOVL DI, 1(CX) + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_encodeSnappyBlockAsm - XORL DI, DI - LEAL -1(DI)(BX*4), BX - MOVB BL, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX + XORL R8, R8 + LEAL -1(R8)(SI*4), SI + MOVB SI, (CX) + MOVL DI, 1(CX) + ADDQ $0x05, CX JMP repeat_end_emit_encodeSnappyBlockAsm two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm no_repeat_found_encodeSnappyBlockAsm: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm candidate3_match_encodeSnappyBlockAsm: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm candidate2_match_encodeSnappyBlockAsm: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm match_extend_back_loop_encodeSnappyBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm JMP match_extend_back_loop_encodeSnappyBlockAsm match_extend_back_end_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB three_bytes_match_emit_encodeSnappyBlockAsm - CMPL DI, $0x01000000 + CMPL R8, $0x01000000 JB four_bytes_match_emit_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm four_bytes_match_emit_encodeSnappyBlockAsm: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX + MOVL R8, R10 + SHRL $0x10, R10 + MOVB $0xf8, (CX) + MOVW R8, 1(CX) + MOVB R10, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm three_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm two_bytes_match_emit_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm JMP memmove_long_match_emit_encodeSnappyBlockAsm one_byte_match_emit_encodeSnappyBlockAsm: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm memmove_long_match_emit_encodeSnappyBlockAsm: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm: match_nolit_loop_encodeSnappyBlockAsm: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm matchlen_bsf_16match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_match8_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm matchlen_match4_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm JB match_nolit_end_encodeSnappyBlockAsm - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm matchlen_match1_match_nolit_encodeSnappyBlockAsm: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_encodeSnappyBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_encodeSnappyBlockAsm - MOVB $0xff, (AX) - MOVL BX, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + MOVB $0xff, (CX) + MOVL SI, 1(CX) + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_encodeSnappyBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm four_bytes_remain_match_nolit_encodeSnappyBlockAsm: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm - XORL SI, SI - LEAL -1(SI)(R9*4), R9 - MOVB R9, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + XORL DI, DI + LEAL -1(DI)(R10*4), R10 + MOVB R10, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm two_byte_offset_match_nolit_encodeSnappyBlockAsm: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBlockAsm: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm emit_copy_three_match_nolit_encodeSnappyBlockAsm: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm emit_remainder_encodeSnappyBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm @@ -11496,41 +11507,41 @@ emit_remainder_ok_encodeSnappyBlockAsm: JB three_bytes_emit_remainder_encodeSnappyBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeSnappyBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm four_bytes_emit_remainder_encodeSnappyBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm three_bytes_emit_remainder_encodeSnappyBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm two_bytes_emit_remainder_encodeSnappyBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBlockAsm one_byte_emit_remainder_encodeSnappyBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -11546,73 +11557,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm memmove_long_emit_remainder_encodeSnappyBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -11626,718 +11637,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm64K(dst []byte, src []byte) int +// func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm64K(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm64K: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm64K MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm64K: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm64K - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm64K - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm64K repeat_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm64K repeat_extend_back_end_encodeSnappyBlockAsm64K: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm64K: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm64K - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm64K JB three_bytes_repeat_emit_encodeSnappyBlockAsm64K three_bytes_repeat_emit_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K two_bytes_repeat_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm64K JMP memmove_long_repeat_emit_encodeSnappyBlockAsm64K one_byte_repeat_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K memmove_long_repeat_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K JB repeat_extend_forward_end_encodeSnappyBlockAsm64K - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm64K matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm64K - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm64K: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm64K emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm64K: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm64K no_repeat_found_encodeSnappyBlockAsm64K: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm64K - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm64K - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm64K - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm64K candidate3_match_encodeSnappyBlockAsm64K: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm64K candidate2_match_encodeSnappyBlockAsm64K: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm64K: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm64K match_extend_back_loop_encodeSnappyBlockAsm64K: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm64K - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm64K - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm64K JMP match_extend_back_loop_encodeSnappyBlockAsm64K match_extend_back_end_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm64K: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm64K - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm64K - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm64K JB three_bytes_match_emit_encodeSnappyBlockAsm64K three_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm64K two_bytes_match_emit_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBlockAsm64K one_byte_match_emit_encodeSnappyBlockAsm64K: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm64K: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm64K memmove_long_match_emit_encodeSnappyBlockAsm64K: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm64K: match_nolit_loop_encodeSnappyBlockAsm64K: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm64K - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_match8_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm64K - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm64K matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm64K - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm64K: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K JB match_nolit_end_encodeSnappyBlockAsm64K - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm64K - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBlockAsm64K: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm64K - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm64K: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm64K: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm64K - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBlockAsm64K: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm64K: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm64K - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm64K: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x32, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x32, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x32, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x32, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm64K - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm64K emit_remainder_encodeSnappyBlockAsm64K: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm64K: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm64K @@ -12346,26 +12358,26 @@ emit_remainder_ok_encodeSnappyBlockAsm64K: JB three_bytes_emit_remainder_encodeSnappyBlockAsm64K three_bytes_emit_remainder_encodeSnappyBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K two_bytes_emit_remainder_encodeSnappyBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBlockAsm64K one_byte_emit_remainder_encodeSnappyBlockAsm64K: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -12381,73 +12393,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm64K: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K -emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) +emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7: + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K memmove_long_emit_remainder_encodeSnappyBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 @@ -12461,718 +12473,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000080, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000080, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x18, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x18, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x18, R11 + IMULQ R9, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x18, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm12B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm12B repeat_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm12B repeat_extend_back_end_encodeSnappyBlockAsm12B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm12B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm12B JB three_bytes_repeat_emit_encodeSnappyBlockAsm12B three_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B two_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm12B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B one_byte_repeat_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B memmove_long_repeat_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B JB repeat_extend_forward_end_encodeSnappyBlockAsm12B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm12B matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm12B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm12B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm12B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm12B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm12B no_repeat_found_encodeSnappyBlockAsm12B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm12B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm12B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm12B candidate3_match_encodeSnappyBlockAsm12B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm12B candidate2_match_encodeSnappyBlockAsm12B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm12B match_extend_back_loop_encodeSnappyBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm12B JMP match_extend_back_loop_encodeSnappyBlockAsm12B match_extend_back_end_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm12B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm12B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm12B JB three_bytes_match_emit_encodeSnappyBlockAsm12B three_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm12B two_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBlockAsm12B one_byte_match_emit_encodeSnappyBlockAsm12B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm12B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm12B memmove_long_match_emit_encodeSnappyBlockAsm12B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm12B: match_nolit_loop_encodeSnappyBlockAsm12B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm12B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_match8_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm12B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm12B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm12B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm12B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B JB match_nolit_end_encodeSnappyBlockAsm12B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm12B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBlockAsm12B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm12B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm12B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm12B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBlockAsm12B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm12B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm12B: - MOVQ $0x000000cf1bbcdcbb, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x18, DI - IMULQ R8, DI - SHRQ $0x34, DI - SHLQ $0x18, BX - IMULQ R8, BX - SHRQ $0x34, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x000000cf1bbcdcbb, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x18, R8 + IMULQ R9, R8 + SHRQ $0x34, R8 + SHLQ $0x18, SI + IMULQ R9, SI + SHRQ $0x34, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm12B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm12B emit_remainder_encodeSnappyBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm12B @@ -13181,26 +13194,26 @@ emit_remainder_ok_encodeSnappyBlockAsm12B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm12B three_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B two_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B one_byte_emit_remainder_encodeSnappyBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -13216,73 +13229,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B memmove_long_emit_remainder_encodeSnappyBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -13296,718 +13309,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000020, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000020, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm10B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm10B repeat_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm10B repeat_extend_back_end_encodeSnappyBlockAsm10B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm10B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm10B JB three_bytes_repeat_emit_encodeSnappyBlockAsm10B three_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B two_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm10B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B one_byte_repeat_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B memmove_long_repeat_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B JB repeat_extend_forward_end_encodeSnappyBlockAsm10B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm10B matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm10B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm10B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm10B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm10B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm10B no_repeat_found_encodeSnappyBlockAsm10B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm10B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm10B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm10B candidate3_match_encodeSnappyBlockAsm10B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm10B candidate2_match_encodeSnappyBlockAsm10B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm10B match_extend_back_loop_encodeSnappyBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm10B JMP match_extend_back_loop_encodeSnappyBlockAsm10B match_extend_back_end_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm10B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm10B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm10B JB three_bytes_match_emit_encodeSnappyBlockAsm10B three_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm10B two_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBlockAsm10B one_byte_match_emit_encodeSnappyBlockAsm10B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm10B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm10B memmove_long_match_emit_encodeSnappyBlockAsm10B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm10B: match_nolit_loop_encodeSnappyBlockAsm10B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm10B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_match8_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm10B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm10B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm10B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm10B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B JB match_nolit_end_encodeSnappyBlockAsm10B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm10B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBlockAsm10B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm10B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm10B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm10B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBlockAsm10B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm10B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm10B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x36, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x36, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x36, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x36, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm10B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm10B emit_remainder_encodeSnappyBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm10B @@ -14016,26 +14030,26 @@ emit_remainder_ok_encodeSnappyBlockAsm10B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm10B three_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B two_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B one_byte_emit_remainder_encodeSnappyBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -14051,73 +14065,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B memmove_long_emit_remainder_encodeSnappyBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -14131,714 +14145,715 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int +// func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000008, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000008, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x38, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x38, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_encodeSnappyBlockAsm8B - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_encodeSnappyBlockAsm8B repeat_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_encodeSnappyBlockAsm8B repeat_extend_back_end_encodeSnappyBlockAsm8B: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET repeat_dst_size_check_encodeSnappyBlockAsm8B: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_encodeSnappyBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_encodeSnappyBlockAsm8B JB three_bytes_repeat_emit_encodeSnappyBlockAsm8B three_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B two_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_encodeSnappyBlockAsm8B JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B one_byte_repeat_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveShort - CMPQ DI, $0x08 + CMPQ R8, $0x08 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ DI, $0x10 + CMPQ R8, $0x10 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ DI, $0x20 + CMPQ R8, $0x20 JBE emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (R8), R9 - MOVQ R9, (AX) + MOVQ (R9), R10 + MOVQ R10, (CX) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (R8), R9 - MOVQ -8(R8)(DI*1), R8 - MOVQ R9, (AX) - MOVQ R8, -8(AX)(DI*1) + MOVQ (R9), R10 + MOVQ -8(R9)(R8*1), R9 + MOVQ R10, (CX) + MOVQ R9, -8(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (R8), X0 - MOVOU -16(R8)(DI*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU -16(R9)(R8*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R8*1) JMP memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B memmove_long_repeat_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(DI*1), BX + LEAQ (CX)(R8*1), SI // genMemMoveLong - MOVOU (R8), X0 - MOVOU 16(R8), X1 - MOVOU -32(R8)(DI*1), X2 - MOVOU -16(R8)(DI*1), X3 - MOVQ DI, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (R9), X0 + MOVOU 16(R9), X1 + MOVOU -32(R9)(R8*1), X2 + MOVOU -16(R9)(R8*1), X3 + MOVQ R8, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R8)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(R9)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R8)(R11*1), X4 - MOVOU -16(R8)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ DI, R11 + MOVOU -32(R9)(R12*1), X4 + MOVOU -16(R9)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R8, R12 JAE emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(DI*1) - MOVOU X3, -16(AX)(DI*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R8*1) + MOVOU X3, -16(CX)(R8*1) + MOVQ SI, CX emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B JB repeat_extend_forward_end_encodeSnappyBlockAsm8B - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_encodeSnappyBlockAsm8B matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_encodeSnappyBlockAsm8B - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_encodeSnappyBlockAsm8B: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW SI, 1(AX) - LEAL -60(BX), BX - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW DI, 1(CX) + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B - LEAL -15(DI), DI - MOVB SI, 1(AX) - SHRL $0x08, SI - SHLL $0x05, SI - ORL SI, DI - MOVB DI, (AX) - ADDQ $0x02, AX + LEAL -15(R8), R8 + MOVB DI, 1(CX) + SHRL $0x08, DI + SHLL $0x05, DI + ORL DI, R8 + MOVB R8, (CX) + ADDQ $0x02, CX JMP repeat_end_emit_encodeSnappyBlockAsm8B emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B: - LEAL -2(DI), DI - MOVB DI, (AX) - MOVW SI, 1(AX) - ADDQ $0x03, AX + LEAL -2(R8), R8 + MOVB R8, (CX) + MOVW DI, 1(CX) + ADDQ $0x03, CX repeat_end_emit_encodeSnappyBlockAsm8B: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_encodeSnappyBlockAsm8B no_repeat_found_encodeSnappyBlockAsm8B: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBlockAsm8B - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_encodeSnappyBlockAsm8B - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_encodeSnappyBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBlockAsm8B candidate3_match_encodeSnappyBlockAsm8B: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_encodeSnappyBlockAsm8B candidate2_match_encodeSnappyBlockAsm8B: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_encodeSnappyBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBlockAsm8B match_extend_back_loop_encodeSnappyBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBlockAsm8B JMP match_extend_back_loop_encodeSnappyBlockAsm8B match_extend_back_end_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBlockAsm8B: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_encodeSnappyBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), DI - CMPL DI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), R8 + CMPL R8, $0x3c JB one_byte_match_emit_encodeSnappyBlockAsm8B - CMPL DI, $0x00000100 + CMPL R8, $0x00000100 JB two_bytes_match_emit_encodeSnappyBlockAsm8B JB three_bytes_match_emit_encodeSnappyBlockAsm8B three_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBlockAsm8B two_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DI, 1(AX) - ADDQ $0x02, AX - CMPL DI, $0x40 + MOVB $0xf0, (CX) + MOVB R8, 1(CX) + ADDQ $0x02, CX + CMPL R8, $0x40 JB memmove_match_emit_encodeSnappyBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBlockAsm8B one_byte_match_emit_encodeSnappyBlockAsm8B: - SHLB $0x02, DI - MOVB DI, (AX) - ADDQ $0x01, AX + SHLB $0x02, R8 + MOVB R8, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8: - MOVQ (SI), R9 - MOVQ R9, (AX) + MOVQ (DI), R10 + MOVQ R10, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (SI), R9 - MOVQ -8(SI)(R8*1), SI - MOVQ R9, (AX) - MOVQ SI, -8(AX)(R8*1) + MOVQ (DI), R10 + MOVQ -8(DI)(R9*1), DI + MOVQ R10, (CX) + MOVQ DI, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (SI), X0 - MOVOU -16(SI)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU -16(DI)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBlockAsm8B: - MOVQ DI, AX + MOVQ R8, CX JMP emit_literal_done_match_emit_encodeSnappyBlockAsm8B memmove_long_match_emit_encodeSnappyBlockAsm8B: - LEAQ (AX)(R8*1), DI + LEAQ (CX)(R9*1), R8 // genMemMoveLong - MOVOU (SI), X0 - MOVOU 16(SI), X1 - MOVOU -32(SI)(R8*1), X2 - MOVOU -16(SI)(R8*1), X3 - MOVQ R8, R10 - SHRQ $0x05, R10 - MOVQ AX, R9 - ANDL $0x0000001f, R9 - MOVQ $0x00000040, R11 - SUBQ R9, R11 - DECQ R10 + MOVOU (DI), X0 + MOVOU 16(DI), X1 + MOVOU -32(DI)(R9*1), X2 + MOVOU -16(DI)(R9*1), X3 + MOVQ R9, R11 + SHRQ $0x05, R11 + MOVQ CX, R10 + ANDL $0x0000001f, R10 + MOVQ $0x00000040, R12 + SUBQ R10, R12 + DECQ R11 JA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(SI)(R11*1), R9 - LEAQ -32(AX)(R11*1), R12 + LEAQ -32(DI)(R12*1), R10 + LEAQ -32(CX)(R12*1), R13 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back: - MOVOU (R9), X4 - MOVOU 16(R9), X5 - MOVOA X4, (R12) - MOVOA X5, 16(R12) + MOVOU (R10), X4 + MOVOU 16(R10), X5 + MOVOA X4, (R13) + MOVOA X5, 16(R13) + ADDQ $0x20, R13 + ADDQ $0x20, R10 ADDQ $0x20, R12 - ADDQ $0x20, R9 - ADDQ $0x20, R11 - DECQ R10 + DECQ R11 JNA emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(SI)(R11*1), X4 - MOVOU -16(SI)(R11*1), X5 - MOVOA X4, -32(AX)(R11*1) - MOVOA X5, -16(AX)(R11*1) - ADDQ $0x20, R11 - CMPQ R8, R11 + MOVOU -32(DI)(R12*1), X4 + MOVOU -16(DI)(R12*1), X5 + MOVOA X4, -32(CX)(R12*1) + MOVOA X5, -16(CX)(R12*1) + ADDQ $0x20, R12 + CMPQ R9, R12 JAE emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ DI, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ R8, CX emit_literal_done_match_emit_encodeSnappyBlockAsm8B: match_nolit_loop_encodeSnappyBlockAsm8B: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBlockAsm8B - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_match8_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBlockAsm8B - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_encodeSnappyBlockAsm8B matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_encodeSnappyBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_encodeSnappyBlockAsm8B - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_encodeSnappyBlockAsm8B: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B JB match_nolit_end_encodeSnappyBlockAsm8B - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_encodeSnappyBlockAsm8B - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_encodeSnappyBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBlockAsm8B: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_encodeSnappyBlockAsm8B - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_encodeSnappyBlockAsm8B: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B - MOVB $0xee, (AX) - MOVW BX, 1(AX) - LEAL -60(R9), R9 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW SI, 1(CX) + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B - LEAL -15(SI), SI - MOVB BL, 1(AX) - SHRL $0x08, BX - SHLL $0x05, BX - ORL BX, SI - MOVB SI, (AX) - ADDQ $0x02, AX + LEAL -15(DI), DI + MOVB SI, 1(CX) + SHRL $0x08, SI + SHLL $0x05, SI + ORL SI, DI + MOVB DI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBlockAsm8B: - LEAL -2(SI), SI - MOVB SI, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + LEAL -2(DI), DI + MOVB DI, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBlockAsm8B - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBlockAsm8B: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x38, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x38, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x38, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x38, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_encodeSnappyBlockAsm8B - INCL CX + INCL DX JMP search_loop_encodeSnappyBlockAsm8B emit_remainder_encodeSnappyBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBlockAsm8B @@ -14847,26 +14862,26 @@ emit_remainder_ok_encodeSnappyBlockAsm8B: JB three_bytes_emit_remainder_encodeSnappyBlockAsm8B three_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B two_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B one_byte_emit_remainder_encodeSnappyBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -14882,73 +14897,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B memmove_long_emit_remainder_encodeSnappyBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -14962,520 +14977,521 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back: JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00001200, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00001200, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - CMPL BX, $0x63 + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + CMPL SI, $0x63 JBE check_maxskip_ok_encodeSnappyBetterBlockAsm - LEAL 100(CX), BX + LEAL 100(DX), SI JMP check_maxskip_cont_encodeSnappyBetterBlockAsm check_maxskip_ok_encodeSnappyBetterBlockAsm: - LEAL 1(CX)(BX*1), BX + LEAL 1(DX)(SI*1), SI check_maxskip_cont_encodeSnappyBetterBlockAsm: - CMPL BX, 8(SP) + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 524312(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 524312(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL (AX)(R10*4), SI + MOVL 524288(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 524288(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm no_short_found_encodeSnappyBetterBlockAsm: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm candidateS_match_encodeSnappyBetterBlockAsm: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x2f, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x2f, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm match_extend_back_loop_encodeSnappyBetterBlockAsm: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm JMP match_extend_back_loop_encodeSnappyBetterBlockAsm match_extend_back_end_encodeSnappyBetterBlockAsm: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm JB match_nolit_end_encodeSnappyBetterBlockAsm - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - CMPL R11, $0x01 + CMPL R12, $0x01 JA match_length_ok_encodeSnappyBetterBlockAsm - CMPL DI, $0x0000ffff + CMPL R8, $0x0000ffff JBE match_length_ok_encodeSnappyBetterBlockAsm - MOVL 20(SP), CX - INCL CX + MOVL 20(SP), DX + INCL DX JMP search_loop_encodeSnappyBetterBlockAsm match_length_ok_encodeSnappyBetterBlockAsm: - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_match_emit_encodeSnappyBetterBlockAsm - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_match_emit_encodeSnappyBetterBlockAsm - MOVB $0xfc, (AX) - MOVL BX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL SI, 1(CX) + ADDQ $0x05, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm four_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVL BX, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BX, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX + MOVL SI, R11 + SHRL $0x10, R11 + MOVB $0xf8, (CX) + MOVW SI, 1(CX) + MOVB R11, 3(CX) + ADDQ $0x04, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm three_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm two_bytes_match_emit_encodeSnappyBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm one_byte_match_emit_encodeSnappyBetterBlockAsm: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm memmove_long_match_emit_encodeSnappyBetterBlockAsm: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy - CMPL DI, $0x00010000 + CMPL R8, $0x00010000 JB two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0xff, (AX) - MOVL DI, 1(AX) - LEAL -64(R11), R11 - ADDQ $0x05, AX - CMPL R11, $0x04 + MOVB $0xff, (CX) + MOVL R8, 1(CX) + LEAL -64(R12), R12 + ADDQ $0x05, CX + CMPL R12, $0x04 JB four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm JMP four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm: - TESTL R11, R11 + TESTL R12, R12 JZ match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm - XORL BX, BX - LEAL -1(BX)(R11*4), R11 - MOVB R11, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX + XORL SI, SI + LEAL -1(SI)(R12*4), R12 + MOVB R12, (CX) + MOVL R8, 1(CX) + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x2f, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 524312(SP)(R10*4) - MOVL R13, 524312(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x2f, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x32, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x2f, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x32, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 524288(AX)(R11*4) + MOVL R14, 524288(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x2f, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x2f, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x2f, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm @@ -15485,41 +15501,41 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm CMPL DX, $0x01000000 JB four_bytes_emit_remainder_encodeSnappyBetterBlockAsm - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX + MOVB $0xfc, (CX) + MOVL DX, 1(CX) + ADDQ $0x05, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm four_bytes_emit_remainder_encodeSnappyBetterBlockAsm: MOVL DX, BX SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX + MOVB $0xf8, (CX) + MOVW DX, 1(CX) + MOVB BL, 3(CX) + ADDQ $0x04, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm three_bytes_emit_remainder_encodeSnappyBetterBlockAsm: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm two_bytes_emit_remainder_encodeSnappyBetterBlockAsm: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm one_byte_emit_remainder_encodeSnappyBetterBlockAsm: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -15535,73 +15551,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm memmove_long_emit_remainder_encodeSnappyBetterBlockAsm: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back: MOVOU (SI), X4 @@ -15615,463 +15631,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_ba JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000a00, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm64K(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000900, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm64K: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm64K MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm64K: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x07, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x07, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x00cf1bbcdcbfa563, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x30, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x32, R10 - MOVL 24(SP)(R9*4), BX - MOVL 262168(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 262168(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x00cf1bbcdcbfa563, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x30, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x33, R11 + MOVL (AX)(R10*4), SI + MOVL 262144(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 262144(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm64K - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm64K no_short_found_encodeSnappyBetterBlockAsm64K: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm64K - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm64K candidateS_match_encodeSnappyBetterBlockAsm64K: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x08, R9 - IMULQ R8, R9 - SHRQ $0x30, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x08, R10 + IMULQ R9, R10 + SHRQ $0x30, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm64K - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm64K: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K match_extend_back_loop_encodeSnappyBetterBlockAsm64K: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm64K - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm64K - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm64K JMP match_extend_back_loop_encodeSnappyBetterBlockAsm64K match_extend_back_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm64K: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K JB match_nolit_end_encodeSnappyBetterBlockAsm64K - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm64K matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm64K - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm64K: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm64K - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm64K JB three_bytes_match_emit_encodeSnappyBetterBlockAsm64K three_bytes_match_emit_encodeSnappyBetterBlockAsm64K: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K two_bytes_match_emit_encodeSnappyBetterBlockAsm64K: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm64K JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm64K one_byte_match_emit_encodeSnappyBetterBlockAsm64K: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K memmove_long_match_emit_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back - -emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + +emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm64K - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K: - MOVQ $0x00cf1bbcdcbfa563, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x30, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x32, R10 - SHLQ $0x08, R11 - IMULQ BX, R11 - SHRQ $0x30, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x32, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 262168(SP)(R10*4) - MOVL R13, 262168(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x00cf1bbcdcbfa563, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x08, R10 + IMULQ SI, R10 + SHRQ $0x30, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x33, R11 + SHLQ $0x08, R12 + IMULQ SI, R12 + SHRQ $0x30, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x33, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 262144(AX)(R11*4) + MOVL R14, 262144(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm64K: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm64K - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x08, R9 - IMULQ BX, R9 - SHRQ $0x30, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x08, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x30, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x08, R11 + IMULQ SI, R11 + SHRQ $0x30, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm64K emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm64K - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm64K: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K @@ -16080,26 +16097,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm64K: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm64K JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -16115,73 +16132,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm64K: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back: MOVOU (SI), X4 @@ -16195,463 +16212,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000280, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm12B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000280, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm12B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm12B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm12B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x06, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x06, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x34, R10 - MOVL 24(SP)(R9*4), BX - MOVL 65560(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 65560(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL (AX)(R10*4), SI + MOVL 65536(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 65536(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm12B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm12B no_short_found_encodeSnappyBetterBlockAsm12B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm12B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm12B candidateS_match_encodeSnappyBetterBlockAsm12B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x32, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x32, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm12B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm12B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B match_extend_back_loop_encodeSnappyBetterBlockAsm12B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm12B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm12B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm12B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm12B match_extend_back_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm12B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B JB match_nolit_end_encodeSnappyBetterBlockAsm12B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm12B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm12B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm12B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm12B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm12B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm12B three_bytes_match_emit_encodeSnappyBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B two_bytes_match_emit_encodeSnappyBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm12B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm12B one_byte_match_emit_encodeSnappyBetterBlockAsm12B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B memmove_long_match_emit_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm12B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x34, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x32, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x34, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 65560(SP)(R10*4) - MOVL R13, 65560(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x32, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x34, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x32, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x34, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 65536(AX)(R11*4) + MOVL R14, 65536(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm12B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm12B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x32, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x32, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x32, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm12B emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm12B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm12B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B @@ -16660,26 +16678,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm12B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm12B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -16695,73 +16713,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm12B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) - JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B - -emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) + JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B + +emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64: + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back: MOVOU (SI), X4 @@ -16775,463 +16793,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x000000a0, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm10B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x000000a0, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm10B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm10B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm10B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x36, R10 - MOVL 24(SP)(R9*4), BX - MOVL 16408(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 16408(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL (AX)(R10*4), SI + MOVL 16384(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 16384(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm10B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm10B no_short_found_encodeSnappyBetterBlockAsm10B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm10B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm10B candidateS_match_encodeSnappyBetterBlockAsm10B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x34, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x34, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm10B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm10B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B match_extend_back_loop_encodeSnappyBetterBlockAsm10B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm10B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm10B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm10B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm10B match_extend_back_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm10B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -4(DI), DI - LEAL 4(R11), R11 + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B JB match_nolit_end_encodeSnappyBetterBlockAsm10B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm10B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm10B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm10B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm10B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm10B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm10B three_bytes_match_emit_encodeSnappyBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B two_bytes_match_emit_encodeSnappyBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm10B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm10B one_byte_match_emit_encodeSnappyBetterBlockAsm10B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B memmove_long_match_emit_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - CMPL DI, $0x00000800 + CMPL R8, $0x00000800 JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm10B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x36, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x34, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x36, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 16408(SP)(R10*4) - MOVL R13, 16408(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x34, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x36, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x34, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x36, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 16384(AX)(R11*4) + MOVL R14, 16384(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm10B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm10B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x34, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x34, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x34, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm10B emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm10B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm10B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B @@ -17240,26 +17259,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm10B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm10B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -17275,73 +17294,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm10B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back: MOVOU (SI), X4 @@ -17355,461 +17374,462 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int +// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int // Requires: BMI, SSE2 -TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56 - MOVQ dst_base+0(FP), AX - MOVQ $0x00000028, CX - LEAQ 24(SP), DX +TEXT ·encodeSnappyBetterBlockAsm8B(SB), $24-64 + MOVQ tmp+48(FP), AX + MOVQ dst_base+0(FP), CX + MOVQ $0x00000028, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_encodeSnappyBetterBlockAsm8B: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_encodeSnappyBetterBlockAsm8B MOVL $0x00000000, 12(SP) - MOVQ src_len+32(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX + MOVQ src_len+32(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX MOVL $0x00000000, 16(SP) - MOVQ src_base+24(FP), DX + MOVQ src_base+24(FP), BX search_loop_encodeSnappyBetterBlockAsm8B: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 1(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 1(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ $0x9e3779b1, BX - MOVQ SI, R9 - MOVQ SI, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ BX, R10 - SHRQ $0x38, R10 - MOVL 24(SP)(R9*4), BX - MOVL 4120(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - MOVL CX, 4120(SP)(R10*4) - MOVQ (DX)(BX*1), R9 - MOVQ (DX)(DI*1), R10 - CMPQ R9, SI + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ $0x9e3779b1, SI + MOVQ DI, R10 + MOVQ DI, R11 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ SI, R11 + SHRQ $0x38, R11 + MOVL (AX)(R10*4), SI + MOVL 4096(AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + MOVL DX, 4096(AX)(R11*4) + MOVQ (BX)(SI*1), R10 + MOVQ (BX)(R8*1), R11 + CMPQ R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPQ R10, SI + CMPQ R11, DI JNE no_short_found_encodeSnappyBetterBlockAsm8B - MOVL DI, BX + MOVL R8, SI JMP candidate_match_encodeSnappyBetterBlockAsm8B no_short_found_encodeSnappyBetterBlockAsm8B: - CMPL R9, SI + CMPL R10, DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - CMPL R10, SI + CMPL R11, DI JEQ candidateS_match_encodeSnappyBetterBlockAsm8B - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_encodeSnappyBetterBlockAsm8B candidateS_match_encodeSnappyBetterBlockAsm8B: - SHRQ $0x08, SI - MOVQ SI, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x36, R9 - MOVL 24(SP)(R9*4), BX - INCL CX - MOVL CX, 24(SP)(R9*4) - CMPL (DX)(BX*1), SI + SHRQ $0x08, DI + MOVQ DI, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x36, R10 + MOVL (AX)(R10*4), SI + INCL DX + MOVL DX, (AX)(R10*4) + CMPL (BX)(SI*1), DI JEQ candidate_match_encodeSnappyBetterBlockAsm8B - DECL CX - MOVL DI, BX + DECL DX + MOVL R8, SI candidate_match_encodeSnappyBetterBlockAsm8B: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B match_extend_back_loop_encodeSnappyBetterBlockAsm8B: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_encodeSnappyBetterBlockAsm8B - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_encodeSnappyBetterBlockAsm8B - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_encodeSnappyBetterBlockAsm8B JMP match_extend_back_loop_encodeSnappyBetterBlockAsm8B match_extend_back_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_dst_size_check_encodeSnappyBetterBlockAsm8B: - MOVL CX, SI - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+32(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), R9 + MOVL DX, DI + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+32(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), R10 // matchLen - XORL R11, R11 + XORL R12, R12 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - MOVQ 8(R8)(R11*1), R12 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + MOVQ 8(R9)(R12*1), R13 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B - XORQ 8(R9)(R11*1), R12 + XORQ 8(R10)(R12*1), R13 JNZ matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -16(DI), DI - LEAL 16(R11), R11 + LEAL -16(R8), R8 + LEAL 16(R12), R12 JMP matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R12, R12 + TZCNTQ R13, R13 #else - BSFQ R12, R12 + BSFQ R13, R13 #endif - SARQ $0x03, R12 - LEAL 8(R11)(R12*1), R11 + SARQ $0x03, R13 + LEAL 8(R12)(R13*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B - MOVQ (R8)(R11*1), R10 - XORQ (R9)(R11*1), R10 + MOVQ (R9)(R12*1), R11 + XORQ (R10)(R12*1), R11 JNZ matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -8(DI), DI - LEAL 8(R11), R11 + LEAL -8(R8), R8 + LEAL 8(R12), R12 JMP matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL (R11)(R10*1), R11 + SARQ $0x03, R11 + LEAL (R12)(R11*1), R12 JMP match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - MOVL (R8)(R11*1), R10 - CMPL (R9)(R11*1), R10 - JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -4(DI), DI - LEAL 4(R11), R11 + MOVL (R9)(R12*1), R11 + CMPL (R10)(R12*1), R11 + JNE matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B + LEAL -4(R8), R8 + LEAL 4(R12), R12 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B JB match_nolit_end_encodeSnappyBetterBlockAsm8B - MOVW (R8)(R11*1), R10 - CMPW (R9)(R11*1), R10 + MOVW (R9)(R12*1), R11 + CMPW (R10)(R12*1), R11 JNE matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL 2(R11), R11 - SUBL $0x02, DI + LEAL 2(R12), R12 + SUBL $0x02, R8 JZ match_nolit_end_encodeSnappyBetterBlockAsm8B matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVB (R8)(R11*1), R10 - CMPB (R9)(R11*1), R10 + MOVB (R9)(R12*1), R11 + CMPB (R10)(R12*1), R11 JNE match_nolit_end_encodeSnappyBetterBlockAsm8B - LEAL 1(R11), R11 + LEAL 1(R12), R12 match_nolit_end_encodeSnappyBetterBlockAsm8B: - MOVL CX, DI - SUBL BX, DI + MOVL DX, R8 + SUBL SI, R8 // Check if repeat - MOVL DI, 16(SP) - MOVL 12(SP), BX - CMPL BX, SI + MOVL R8, 16(SP) + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R9 - SUBL BX, R8 - LEAL -1(R8), BX - CMPL BX, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R10 + SUBL SI, R9 + LEAL -1(R9), SI + CMPL SI, $0x3c JB one_byte_match_emit_encodeSnappyBetterBlockAsm8B - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_match_emit_encodeSnappyBetterBlockAsm8B JB three_bytes_match_emit_encodeSnappyBetterBlockAsm8B three_bytes_match_emit_encodeSnappyBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW BX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW SI, 1(CX) + ADDQ $0x03, CX JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B two_bytes_match_emit_encodeSnappyBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB BL, 1(AX) - ADDQ $0x02, AX - CMPL BX, $0x40 + MOVB $0xf0, (CX) + MOVB SI, 1(CX) + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_match_emit_encodeSnappyBetterBlockAsm8B JMP memmove_long_match_emit_encodeSnappyBetterBlockAsm8B one_byte_match_emit_encodeSnappyBetterBlockAsm8B: - SHLB $0x02, BL - MOVB BL, (AX) - ADDQ $0x01, AX + SHLB $0x02, SI + MOVB SI, (CX) + ADDQ $0x01, CX memmove_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveShort - CMPQ R8, $0x08 + CMPQ R9, $0x08 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8 - CMPQ R8, $0x10 + CMPQ R9, $0x10 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16 - CMPQ R8, $0x20 + CMPQ R9, $0x20 JBE emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32 JMP emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8: - MOVQ (R9), R10 - MOVQ R10, (AX) + MOVQ (R10), R11 + MOVQ R11, (CX) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (R9), R10 - MOVQ -8(R9)(R8*1), R9 - MOVQ R10, (AX) - MOVQ R9, -8(AX)(R8*1) + MOVQ (R10), R11 + MOVQ -8(R10)(R9*1), R10 + MOVQ R11, (CX) + MOVQ R10, -8(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (R9), X0 - MOVOU -16(R9)(R8*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU -16(R10)(R9*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(R9*1) JMP memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B: - MOVQ BX, AX + MOVQ SI, CX JMP emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B memmove_long_match_emit_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(R8*1), BX + LEAQ (CX)(R9*1), SI // genMemMoveLong - MOVOU (R9), X0 - MOVOU 16(R9), X1 - MOVOU -32(R9)(R8*1), X2 - MOVOU -16(R9)(R8*1), X3 - MOVQ R8, R12 - SHRQ $0x05, R12 - MOVQ AX, R10 - ANDL $0x0000001f, R10 - MOVQ $0x00000040, R13 - SUBQ R10, R13 - DECQ R12 + MOVOU (R10), X0 + MOVOU 16(R10), X1 + MOVOU -32(R10)(R9*1), X2 + MOVOU -16(R10)(R9*1), X3 + MOVQ R9, R13 + SHRQ $0x05, R13 + MOVQ CX, R11 + ANDL $0x0000001f, R11 + MOVQ $0x00000040, R14 + SUBQ R11, R14 + DECQ R13 JA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(R9)(R13*1), R10 - LEAQ -32(AX)(R13*1), R14 + LEAQ -32(R10)(R14*1), R11 + LEAQ -32(CX)(R14*1), R15 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: - MOVOU (R10), X4 - MOVOU 16(R10), X5 - MOVOA X4, (R14) - MOVOA X5, 16(R14) + MOVOU (R11), X4 + MOVOU 16(R11), X5 + MOVOA X4, (R15) + MOVOA X5, 16(R15) + ADDQ $0x20, R15 + ADDQ $0x20, R11 ADDQ $0x20, R14 - ADDQ $0x20, R10 - ADDQ $0x20, R13 - DECQ R12 + DECQ R13 JNA emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(R9)(R13*1), X4 - MOVOU -16(R9)(R13*1), X5 - MOVOA X4, -32(AX)(R13*1) - MOVOA X5, -16(AX)(R13*1) - ADDQ $0x20, R13 - CMPQ R8, R13 + MOVOU -32(R10)(R14*1), X4 + MOVOU -16(R10)(R14*1), X5 + MOVOA X4, -32(CX)(R14*1) + MOVOA X5, -16(CX)(R14*1) + ADDQ $0x20, R14 + CMPQ R9, R14 JAE emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(R8*1) - MOVOU X3, -16(AX)(R8*1) - MOVQ BX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(R9*1) + MOVOU X3, -16(CX)(R9*1) + MOVQ SI, CX emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B: - ADDL R11, CX - ADDL $0x04, R11 - MOVL CX, 12(SP) + ADDL R12, DX + ADDL $0x04, R12 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B: - CMPL R11, $0x40 + CMPL R12, $0x40 JBE two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B - MOVB $0xee, (AX) - MOVW DI, 1(AX) - LEAL -60(R11), R11 - ADDQ $0x03, AX + MOVB $0xee, (CX) + MOVW R8, 1(CX) + LEAL -60(R12), R12 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B: - MOVL R11, BX - SHLL $0x02, BX - CMPL R11, $0x0c + MOVL R12, SI + SHLL $0x02, SI + CMPL R12, $0x0c JAE emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B - LEAL -15(BX), BX - MOVB DI, 1(AX) - SHRL $0x08, DI - SHLL $0x05, DI - ORL DI, BX - MOVB BL, (AX) - ADDQ $0x02, AX + LEAL -15(SI), SI + MOVB R8, 1(CX) + SHRL $0x08, R8 + SHLL $0x05, R8 + ORL R8, SI + MOVB SI, (CX) + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B: - LEAL -2(BX), BX - MOVB BL, (AX) - MOVW DI, 1(AX) - ADDQ $0x03, AX + LEAL -2(SI), SI + MOVB SI, (CX) + MOVW R8, 1(CX) + ADDQ $0x03, CX match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_encodeSnappyBetterBlockAsm8B - CMPQ AX, (SP) + CMPQ CX, (SP) JB match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B: - MOVQ $0x0000cf1bbcdcbf9b, BX - MOVQ $0x9e3779b1, DI - LEAQ 1(SI), SI - LEAQ -2(CX), R8 - MOVQ (DX)(SI*1), R9 - MOVQ 1(DX)(SI*1), R10 - MOVQ (DX)(R8*1), R11 - MOVQ 1(DX)(R8*1), R12 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 - SHLQ $0x20, R10 - IMULQ DI, R10 - SHRQ $0x38, R10 - SHLQ $0x10, R11 - IMULQ BX, R11 - SHRQ $0x36, R11 - SHLQ $0x20, R12 - IMULQ DI, R12 - SHRQ $0x38, R12 - LEAQ 1(SI), DI - LEAQ 1(R8), R13 - MOVL SI, 24(SP)(R9*4) - MOVL R8, 24(SP)(R11*4) - MOVL DI, 4120(SP)(R10*4) - MOVL R13, 4120(SP)(R12*4) - LEAQ 1(R8)(SI*1), DI - SHRQ $0x01, DI - ADDQ $0x01, SI - SUBQ $0x01, R8 + MOVQ $0x0000cf1bbcdcbf9b, SI + MOVQ $0x9e3779b1, R8 + LEAQ 1(DI), DI + LEAQ -2(DX), R9 + MOVQ (BX)(DI*1), R10 + MOVQ 1(BX)(DI*1), R11 + MOVQ (BX)(R9*1), R12 + MOVQ 1(BX)(R9*1), R13 + SHLQ $0x10, R10 + IMULQ SI, R10 + SHRQ $0x36, R10 + SHLQ $0x20, R11 + IMULQ R8, R11 + SHRQ $0x38, R11 + SHLQ $0x10, R12 + IMULQ SI, R12 + SHRQ $0x36, R12 + SHLQ $0x20, R13 + IMULQ R8, R13 + SHRQ $0x38, R13 + LEAQ 1(DI), R8 + LEAQ 1(R9), R14 + MOVL DI, (AX)(R10*4) + MOVL R9, (AX)(R12*4) + MOVL R8, 4096(AX)(R11*4) + MOVL R14, 4096(AX)(R13*4) + LEAQ 1(R9)(DI*1), R8 + SHRQ $0x01, R8 + ADDQ $0x01, DI + SUBQ $0x01, R9 index_loop_encodeSnappyBetterBlockAsm8B: - CMPQ DI, R8 + CMPQ R8, R9 JAE search_loop_encodeSnappyBetterBlockAsm8B - MOVQ (DX)(SI*1), R9 - MOVQ (DX)(DI*1), R10 - SHLQ $0x10, R9 - IMULQ BX, R9 - SHRQ $0x36, R9 + MOVQ (BX)(DI*1), R10 + MOVQ (BX)(R8*1), R11 SHLQ $0x10, R10 - IMULQ BX, R10 + IMULQ SI, R10 SHRQ $0x36, R10 - MOVL SI, 24(SP)(R9*4) - MOVL DI, 24(SP)(R10*4) - ADDQ $0x02, SI + SHLQ $0x10, R11 + IMULQ SI, R11 + SHRQ $0x36, R11 + MOVL DI, (AX)(R10*4) + MOVL R8, (AX)(R11*4) ADDQ $0x02, DI + ADDQ $0x02, R8 JMP index_loop_encodeSnappyBetterBlockAsm8B emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+32(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_encodeSnappyBetterBlockAsm8B - MOVQ $0x00000000, ret+48(FP) + MOVQ $0x00000000, ret+56(FP) RET emit_remainder_ok_encodeSnappyBetterBlockAsm8B: - MOVQ src_len+32(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+32(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI LEAL -1(SI), DX CMPL DX, $0x3c JB one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B @@ -17818,26 +17838,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm8B: JB three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVB $0xf4, (AX) - MOVW DX, 1(AX) - ADDQ $0x03, AX + MOVB $0xf4, (CX) + MOVW DX, 1(CX) + ADDQ $0x03, CX JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVB $0xf0, (AX) - MOVB DL, 1(AX) - ADDQ $0x02, AX + MOVB $0xf0, (CX) + MOVB DL, 1(CX) + ADDQ $0x02, CX CMPL DX, $0x40 JB memmove_emit_remainder_encodeSnappyBetterBlockAsm8B JMP memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B: SHLB $0x02, DL - MOVB DL, (AX) - ADDQ $0x01, AX + MOVB DL, (CX) + ADDQ $0x01, CX memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveShort @@ -17853,73 +17873,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm8B: JMP emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2: - MOVB (CX), SI - MOVB -1(CX)(BX*1), CL - MOVB SI, (AX) - MOVB CL, -1(AX)(BX*1) + MOVB (AX), SI + MOVB -1(AX)(BX*1), AL + MOVB SI, (CX) + MOVB AL, -1(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3: - MOVW (CX), SI - MOVB 2(CX), CL - MOVW SI, (AX) - MOVB CL, 2(AX) + MOVW (AX), SI + MOVB 2(AX), AL + MOVW SI, (CX) + MOVB AL, 2(CX) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7: - MOVL (CX), SI - MOVL -4(CX)(BX*1), CX - MOVL SI, (AX) - MOVL CX, -4(AX)(BX*1) + MOVL (AX), SI + MOVL -4(AX)(BX*1), AX + MOVL SI, (CX) + MOVL AX, -4(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16: - MOVQ (CX), SI - MOVQ -8(CX)(BX*1), CX - MOVQ SI, (AX) - MOVQ CX, -8(AX)(BX*1) + MOVQ (AX), SI + MOVQ -8(AX)(BX*1), AX + MOVQ SI, (CX) + MOVQ AX, -8(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32: - MOVOU (CX), X0 - MOVOU -16(CX)(BX*1), X1 - MOVOU X0, (AX) - MOVOU X1, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU -16(AX)(BX*1), X1 + MOVOU X0, (CX) + MOVOU X1, -16(CX)(BX*1) JMP memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64: - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ DX, AX + MOVQ DX, CX JMP emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B: - LEAQ (AX)(SI*1), DX + LEAQ (CX)(SI*1), DX MOVL SI, BX // genMemMoveLong - MOVOU (CX), X0 - MOVOU 16(CX), X1 - MOVOU -32(CX)(BX*1), X2 - MOVOU -16(CX)(BX*1), X3 + MOVOU (AX), X0 + MOVOU 16(AX), X1 + MOVOU -32(AX)(BX*1), X2 + MOVOU -16(AX)(BX*1), X3 MOVQ BX, DI SHRQ $0x05, DI - MOVQ AX, SI + MOVQ CX, SI ANDL $0x0000001f, SI MOVQ $0x00000040, R8 SUBQ SI, R8 DECQ DI JA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - LEAQ -32(CX)(R8*1), SI - LEAQ -32(AX)(R8*1), R9 + LEAQ -32(AX)(R8*1), SI + LEAQ -32(CX)(R8*1), R9 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back: MOVOU (SI), X4 @@ -17933,1136 +17953,1142 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_ JNA emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32: - MOVOU -32(CX)(R8*1), X4 - MOVOU -16(CX)(R8*1), X5 - MOVOA X4, -32(AX)(R8*1) - MOVOA X5, -16(AX)(R8*1) + MOVOU -32(AX)(R8*1), X4 + MOVOU -16(AX)(R8*1), X5 + MOVOA X4, -32(CX)(R8*1) + MOVOA X5, -16(CX)(R8*1) ADDQ $0x20, R8 CMPQ BX, R8 JAE emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32 - MOVOU X0, (AX) - MOVOU X1, 16(AX) - MOVOU X2, -32(AX)(BX*1) - MOVOU X3, -16(AX)(BX*1) - MOVQ DX, AX + MOVOU X0, (CX) + MOVOU X1, 16(CX) + MOVOU X2, -32(CX)(BX*1) + MOVOU X3, -16(CX)(BX*1) + MOVQ DX, CX emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B: - MOVQ dst_base+0(FP), CX - SUBQ CX, AX - MOVQ AX, ret+48(FP) + MOVQ dst_base+0(FP), AX + SUBQ AX, CX + MOVQ CX, ret+56(FP) RET -// func calcBlockSize(src []byte) int +// func calcBlockSize(src []byte, tmp *[32768]byte) int // Requires: BMI, SSE2 -TEXT ·calcBlockSize(SB), $32792-32 - XORQ AX, AX - MOVQ $0x00000100, CX - LEAQ 24(SP), DX +TEXT ·calcBlockSize(SB), $24-40 + MOVQ tmp+24(FP), AX + XORQ CX, CX + MOVQ $0x00000100, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_calcBlockSize: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_calcBlockSize MOVL $0x00000000, 12(SP) - MOVQ src_len+8(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+0(FP), BX search_loop_calcBlockSize: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x05, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x05, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_calcBlockSize - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x33, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x10, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x33, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x10, R9 - IMULQ R8, R9 - SHRQ $0x33, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x10, R11 + IMULQ R9, R11 + SHRQ $0x33, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x10, R10 + IMULQ R9, R10 + SHRQ $0x33, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_calcBlockSize - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_calcBlockSize repeat_extend_back_loop_calcBlockSize: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_calcBlockSize - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_calcBlockSize - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_calcBlockSize repeat_extend_back_end_calcBlockSize: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 5(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 5(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET repeat_dst_size_check_calcBlockSize: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_calcBlockSize - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_calcBlockSize - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_calcBlockSize - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB three_bytes_repeat_emit_calcBlockSize - CMPL BX, $0x01000000 + CMPL SI, $0x01000000 JB four_bytes_repeat_emit_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_repeat_emit_calcBlockSize four_bytes_repeat_emit_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_repeat_emit_calcBlockSize three_bytes_repeat_emit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_repeat_emit_calcBlockSize two_bytes_repeat_emit_calcBlockSize: - ADDQ $0x02, AX - CMPL BX, $0x40 + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_calcBlockSize JMP memmove_long_repeat_emit_calcBlockSize one_byte_repeat_emit_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_repeat_emit_calcBlockSize: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX JMP emit_literal_done_repeat_emit_calcBlockSize memmove_long_repeat_emit_calcBlockSize: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX emit_literal_done_repeat_emit_calcBlockSize: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+8(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+8(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_calcBlockSize: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_calcBlockSize - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSize - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_calcBlockSize - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_calcBlockSize matchlen_bsf_16repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_calcBlockSize matchlen_match8_repeat_extend_calcBlockSize: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_calcBlockSize - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSize - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_calcBlockSize matchlen_bsf_8_repeat_extend_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_calcBlockSize matchlen_match4_repeat_extend_calcBlockSize: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_calcBlockSize - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_calcBlockSize - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_calcBlockSize: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_calcBlockSize JB repeat_extend_forward_end_calcBlockSize - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_calcBlockSize - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_calcBlockSize matchlen_match1_repeat_extend_calcBlockSize: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_calcBlockSize - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_calcBlockSize: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB two_byte_offset_repeat_as_copy_calcBlockSize four_bytes_loop_back_repeat_as_copy_calcBlockSize: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE four_bytes_remain_repeat_as_copy_calcBlockSize - LEAL -64(BX), BX - ADDQ $0x05, AX - CMPL BX, $0x04 + LEAL -64(SI), SI + ADDQ $0x05, CX + CMPL SI, $0x04 JB four_bytes_remain_repeat_as_copy_calcBlockSize JMP four_bytes_loop_back_repeat_as_copy_calcBlockSize four_bytes_remain_repeat_as_copy_calcBlockSize: - TESTL BX, BX + TESTL SI, SI JZ repeat_end_emit_calcBlockSize - XORL BX, BX - ADDQ $0x05, AX + XORL SI, SI + ADDQ $0x05, CX JMP repeat_end_emit_calcBlockSize two_byte_offset_repeat_as_copy_calcBlockSize: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_calcBlockSize - LEAL -60(BX), BX - ADDQ $0x03, AX + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_calcBlockSize two_byte_offset_short_repeat_as_copy_calcBlockSize: - MOVL BX, DI - SHLL $0x02, DI - CMPL BX, $0x0c + MOVL SI, R8 + SHLL $0x02, R8 + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_calcBlockSize - CMPL SI, $0x00000800 + CMPL DI, $0x00000800 JAE emit_copy_three_repeat_as_copy_calcBlockSize - ADDQ $0x02, AX + ADDQ $0x02, CX JMP repeat_end_emit_calcBlockSize emit_copy_three_repeat_as_copy_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX repeat_end_emit_calcBlockSize: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_calcBlockSize no_repeat_found_calcBlockSize: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_calcBlockSize - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_calcBlockSize - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_calcBlockSize - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_calcBlockSize candidate3_match_calcBlockSize: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_calcBlockSize candidate2_match_calcBlockSize: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_calcBlockSize: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_calcBlockSize match_extend_back_loop_calcBlockSize: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_calcBlockSize - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_calcBlockSize - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_calcBlockSize JMP match_extend_back_loop_calcBlockSize match_extend_back_end_calcBlockSize: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 5(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 5(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_dst_size_check_calcBlockSize: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_calcBlockSize - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), DI + CMPL DI, $0x3c JB one_byte_match_emit_calcBlockSize - CMPL SI, $0x00000100 + CMPL DI, $0x00000100 JB two_bytes_match_emit_calcBlockSize - CMPL SI, $0x00010000 + CMPL DI, $0x00010000 JB three_bytes_match_emit_calcBlockSize - CMPL SI, $0x01000000 + CMPL DI, $0x01000000 JB four_bytes_match_emit_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_match_emit_calcBlockSize four_bytes_match_emit_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_match_emit_calcBlockSize three_bytes_match_emit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_match_emit_calcBlockSize two_bytes_match_emit_calcBlockSize: - ADDQ $0x02, AX - CMPL SI, $0x40 + ADDQ $0x02, CX + CMPL DI, $0x40 JB memmove_match_emit_calcBlockSize JMP memmove_long_match_emit_calcBlockSize one_byte_match_emit_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_match_emit_calcBlockSize: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX JMP emit_literal_done_match_emit_calcBlockSize memmove_long_match_emit_calcBlockSize: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX emit_literal_done_match_emit_calcBlockSize: match_nolit_loop_calcBlockSize: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+8(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+8(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_calcBlockSize: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_calcBlockSize - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSize - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_calcBlockSize - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_calcBlockSize matchlen_bsf_16match_nolit_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_calcBlockSize matchlen_match8_match_nolit_calcBlockSize: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_calcBlockSize - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSize - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_calcBlockSize matchlen_bsf_8_match_nolit_calcBlockSize: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_calcBlockSize matchlen_match4_match_nolit_calcBlockSize: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_calcBlockSize - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_calcBlockSize - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_calcBlockSize: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_calcBlockSize JB match_nolit_end_calcBlockSize - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_calcBlockSize - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_calcBlockSize matchlen_match1_match_nolit_calcBlockSize: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_calcBlockSize - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_calcBlockSize: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy - CMPL BX, $0x00010000 + CMPL SI, $0x00010000 JB two_byte_offset_match_nolit_calcBlockSize four_bytes_loop_back_match_nolit_calcBlockSize: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE four_bytes_remain_match_nolit_calcBlockSize - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 + LEAL -64(R10), R10 + ADDQ $0x05, CX + CMPL R10, $0x04 JB four_bytes_remain_match_nolit_calcBlockSize JMP four_bytes_loop_back_match_nolit_calcBlockSize four_bytes_remain_match_nolit_calcBlockSize: - TESTL R9, R9 + TESTL R10, R10 JZ match_nolit_emitcopy_end_calcBlockSize - XORL BX, BX - ADDQ $0x05, AX + XORL SI, SI + ADDQ $0x05, CX JMP match_nolit_emitcopy_end_calcBlockSize two_byte_offset_match_nolit_calcBlockSize: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_calcBlockSize - LEAL -60(R9), R9 - ADDQ $0x03, AX + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_calcBlockSize two_byte_offset_short_match_nolit_calcBlockSize: - MOVL R9, SI - SHLL $0x02, SI - CMPL R9, $0x0c + MOVL R10, DI + SHLL $0x02, DI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_calcBlockSize - CMPL BX, $0x00000800 + CMPL SI, $0x00000800 JAE emit_copy_three_match_nolit_calcBlockSize - ADDQ $0x02, AX + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_calcBlockSize emit_copy_three_match_nolit_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX match_nolit_emitcopy_end_calcBlockSize: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_calcBlockSize - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_nolit_dst_ok_calcBlockSize: - MOVQ $0x0000cf1bbcdcbf9b, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x10, DI - IMULQ R8, DI - SHRQ $0x33, DI - SHLQ $0x10, BX - IMULQ R8, BX - SHRQ $0x33, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x0000cf1bbcdcbf9b, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x10, R8 + IMULQ R9, R8 + SHRQ $0x33, R8 + SHLQ $0x10, SI + IMULQ R9, SI + SHRQ $0x33, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_calcBlockSize - INCL CX + INCL DX JMP search_loop_calcBlockSize emit_remainder_calcBlockSize: - MOVQ src_len+8(FP), CX - SUBL 12(SP), CX - LEAQ 5(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+8(FP), AX + SUBL 12(SP), AX + LEAQ 5(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_calcBlockSize - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET emit_remainder_ok_calcBlockSize: - MOVQ src_len+8(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+8(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_calcBlockSize - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI - LEAL -1(SI), CX - CMPL CX, $0x3c + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI + LEAL -1(SI), AX + CMPL AX, $0x3c JB one_byte_emit_remainder_calcBlockSize - CMPL CX, $0x00000100 + CMPL AX, $0x00000100 JB two_bytes_emit_remainder_calcBlockSize - CMPL CX, $0x00010000 + CMPL AX, $0x00010000 JB three_bytes_emit_remainder_calcBlockSize - CMPL CX, $0x01000000 + CMPL AX, $0x01000000 JB four_bytes_emit_remainder_calcBlockSize - ADDQ $0x05, AX + ADDQ $0x05, CX JMP memmove_long_emit_remainder_calcBlockSize four_bytes_emit_remainder_calcBlockSize: - ADDQ $0x04, AX + ADDQ $0x04, CX JMP memmove_long_emit_remainder_calcBlockSize three_bytes_emit_remainder_calcBlockSize: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_emit_remainder_calcBlockSize two_bytes_emit_remainder_calcBlockSize: - ADDQ $0x02, AX - CMPL CX, $0x40 + ADDQ $0x02, CX + CMPL AX, $0x40 JB memmove_emit_remainder_calcBlockSize JMP memmove_long_emit_remainder_calcBlockSize one_byte_emit_remainder_calcBlockSize: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_emit_remainder_calcBlockSize: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX JMP emit_literal_done_emit_remainder_calcBlockSize memmove_long_emit_remainder_calcBlockSize: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX emit_literal_done_emit_remainder_calcBlockSize: - MOVQ AX, ret+24(FP) + MOVQ CX, ret+32(FP) RET -// func calcBlockSizeSmall(src []byte) int +// func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int // Requires: BMI, SSE2 -TEXT ·calcBlockSizeSmall(SB), $2072-32 - XORQ AX, AX - MOVQ $0x00000010, CX - LEAQ 24(SP), DX +TEXT ·calcBlockSizeSmall(SB), $24-40 + MOVQ tmp+24(FP), AX + XORQ CX, CX + MOVQ $0x00000010, DX + MOVQ AX, BX PXOR X0, X0 zero_loop_calcBlockSizeSmall: - MOVOU X0, (DX) - MOVOU X0, 16(DX) - MOVOU X0, 32(DX) - MOVOU X0, 48(DX) - MOVOU X0, 64(DX) - MOVOU X0, 80(DX) - MOVOU X0, 96(DX) - MOVOU X0, 112(DX) - ADDQ $0x80, DX - DECQ CX + MOVOU X0, (BX) + MOVOU X0, 16(BX) + MOVOU X0, 32(BX) + MOVOU X0, 48(BX) + MOVOU X0, 64(BX) + MOVOU X0, 80(BX) + MOVOU X0, 96(BX) + MOVOU X0, 112(BX) + ADDQ $0x80, BX + DECQ DX JNZ zero_loop_calcBlockSizeSmall MOVL $0x00000000, 12(SP) - MOVQ src_len+8(FP), CX - LEAQ -9(CX), DX - LEAQ -8(CX), BX - MOVL BX, 8(SP) - SHRQ $0x05, CX - SUBL CX, DX - LEAQ (AX)(DX*1), DX - MOVQ DX, (SP) - MOVL $0x00000001, CX - MOVL CX, 16(SP) - MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), DX + LEAQ -9(DX), BX + LEAQ -8(DX), SI + MOVL SI, 8(SP) + SHRQ $0x05, DX + SUBL DX, BX + LEAQ (CX)(BX*1), BX + MOVQ BX, (SP) + MOVL $0x00000001, DX + MOVL DX, 16(SP) + MOVQ src_base+0(FP), BX search_loop_calcBlockSizeSmall: - MOVL CX, BX - SUBL 12(SP), BX - SHRL $0x04, BX - LEAL 4(CX)(BX*1), BX - CMPL BX, 8(SP) + MOVL DX, SI + SUBL 12(SP), SI + SHRL $0x04, SI + LEAL 4(DX)(SI*1), SI + CMPL SI, 8(SP) JAE emit_remainder_calcBlockSizeSmall - MOVQ (DX)(CX*1), SI - MOVL BX, 20(SP) - MOVQ $0x9e3779b1, R8 - MOVQ SI, R9 - MOVQ SI, R10 - SHRQ $0x08, R10 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x37, R9 + MOVQ (BX)(DX*1), DI + MOVL SI, 20(SP) + MOVQ $0x9e3779b1, R9 + MOVQ DI, R10 + MOVQ DI, R11 + SHRQ $0x08, R11 SHLQ $0x20, R10 - IMULQ R8, R10 + IMULQ R9, R10 SHRQ $0x37, R10 - MOVL 24(SP)(R9*4), BX - MOVL 24(SP)(R10*4), DI - MOVL CX, 24(SP)(R9*4) - LEAL 1(CX), R9 - MOVL R9, 24(SP)(R10*4) - MOVQ SI, R9 - SHRQ $0x10, R9 - SHLQ $0x20, R9 - IMULQ R8, R9 - SHRQ $0x37, R9 - MOVL CX, R8 - SUBL 16(SP), R8 - MOVL 1(DX)(R8*1), R10 - MOVQ SI, R8 - SHRQ $0x08, R8 - CMPL R8, R10 + SHLQ $0x20, R11 + IMULQ R9, R11 + SHRQ $0x37, R11 + MOVL (AX)(R10*4), SI + MOVL (AX)(R11*4), R8 + MOVL DX, (AX)(R10*4) + LEAL 1(DX), R10 + MOVL R10, (AX)(R11*4) + MOVQ DI, R10 + SHRQ $0x10, R10 + SHLQ $0x20, R10 + IMULQ R9, R10 + SHRQ $0x37, R10 + MOVL DX, R9 + SUBL 16(SP), R9 + MOVL 1(BX)(R9*1), R11 + MOVQ DI, R9 + SHRQ $0x08, R9 + CMPL R9, R11 JNE no_repeat_found_calcBlockSizeSmall - LEAL 1(CX), SI - MOVL 12(SP), BX - MOVL SI, DI - SUBL 16(SP), DI + LEAL 1(DX), DI + MOVL 12(SP), SI + MOVL DI, R8 + SUBL 16(SP), R8 JZ repeat_extend_back_end_calcBlockSizeSmall repeat_extend_back_loop_calcBlockSizeSmall: - CMPL SI, BX + CMPL DI, SI JBE repeat_extend_back_end_calcBlockSizeSmall - MOVB -1(DX)(DI*1), R8 - MOVB -1(DX)(SI*1), R9 - CMPB R8, R9 + MOVB -1(BX)(R8*1), R9 + MOVB -1(BX)(DI*1), R10 + CMPB R9, R10 JNE repeat_extend_back_end_calcBlockSizeSmall - LEAL -1(SI), SI - DECL DI + LEAL -1(DI), DI + DECL R8 JNZ repeat_extend_back_loop_calcBlockSizeSmall repeat_extend_back_end_calcBlockSizeSmall: - MOVL SI, BX - SUBL 12(SP), BX - LEAQ 3(AX)(BX*1), BX - CMPQ BX, (SP) + MOVL DI, SI + SUBL 12(SP), SI + LEAQ 3(CX)(SI*1), SI + CMPQ SI, (SP) JB repeat_dst_size_check_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET repeat_dst_size_check_calcBlockSizeSmall: - MOVL 12(SP), BX - CMPL BX, SI + MOVL 12(SP), SI + CMPL SI, DI JEQ emit_literal_done_repeat_emit_calcBlockSizeSmall - MOVL SI, DI - MOVL SI, 12(SP) - LEAQ (DX)(BX*1), R8 - SUBL BX, DI - LEAL -1(DI), BX - CMPL BX, $0x3c + MOVL DI, R8 + MOVL DI, 12(SP) + LEAQ (BX)(SI*1), R9 + SUBL SI, R8 + LEAL -1(R8), SI + CMPL SI, $0x3c JB one_byte_repeat_emit_calcBlockSizeSmall - CMPL BX, $0x00000100 + CMPL SI, $0x00000100 JB two_bytes_repeat_emit_calcBlockSizeSmall JB three_bytes_repeat_emit_calcBlockSizeSmall three_bytes_repeat_emit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_repeat_emit_calcBlockSizeSmall two_bytes_repeat_emit_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL BX, $0x40 + ADDQ $0x02, CX + CMPL SI, $0x40 JB memmove_repeat_emit_calcBlockSizeSmall JMP memmove_long_repeat_emit_calcBlockSizeSmall one_byte_repeat_emit_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_repeat_emit_calcBlockSizeSmall: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX JMP emit_literal_done_repeat_emit_calcBlockSizeSmall memmove_long_repeat_emit_calcBlockSizeSmall: - LEAQ (AX)(DI*1), AX + LEAQ (CX)(R8*1), CX emit_literal_done_repeat_emit_calcBlockSizeSmall: - ADDL $0x05, CX - MOVL CX, BX - SUBL 16(SP), BX - MOVQ src_len+8(FP), DI - SUBL CX, DI - LEAQ (DX)(CX*1), R8 - LEAQ (DX)(BX*1), BX + ADDL $0x05, DX + MOVL DX, SI + SUBL 16(SP), SI + MOVQ src_len+8(FP), R8 + SUBL DX, R8 + LEAQ (BX)(DX*1), R9 + LEAQ (BX)(SI*1), SI // matchLen - XORL R10, R10 + XORL R11, R11 matchlen_loopback_16_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x10 + CMPL R8, $0x10 JB matchlen_match8_repeat_extend_calcBlockSizeSmall - MOVQ (R8)(R10*1), R9 - MOVQ 8(R8)(R10*1), R11 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + MOVQ 8(R9)(R11*1), R12 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall - XORQ 8(BX)(R10*1), R11 + XORQ 8(SI)(R11*1), R12 JNZ matchlen_bsf_16repeat_extend_calcBlockSizeSmall - LEAL -16(DI), DI - LEAL 16(R10), R10 + LEAL -16(R8), R8 + LEAL 16(R11), R11 JMP matchlen_loopback_16_repeat_extend_calcBlockSizeSmall matchlen_bsf_16repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R11, R11 + TZCNTQ R12, R12 #else - BSFQ R11, R11 + BSFQ R12, R12 #endif - SARQ $0x03, R11 - LEAL 8(R10)(R11*1), R10 + SARQ $0x03, R12 + LEAL 8(R11)(R12*1), R11 JMP repeat_extend_forward_end_calcBlockSizeSmall matchlen_match8_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x08 + CMPL R8, $0x08 JB matchlen_match4_repeat_extend_calcBlockSizeSmall - MOVQ (R8)(R10*1), R9 - XORQ (BX)(R10*1), R9 + MOVQ (R9)(R11*1), R10 + XORQ (SI)(R11*1), R10 JNZ matchlen_bsf_8_repeat_extend_calcBlockSizeSmall - LEAL -8(DI), DI - LEAL 8(R10), R10 + LEAL -8(R8), R8 + LEAL 8(R11), R11 JMP matchlen_match4_repeat_extend_calcBlockSizeSmall matchlen_bsf_8_repeat_extend_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R9, R9 + TZCNTQ R10, R10 #else - BSFQ R9, R9 + BSFQ R10, R10 #endif - SARQ $0x03, R9 - LEAL (R10)(R9*1), R10 + SARQ $0x03, R10 + LEAL (R11)(R10*1), R11 JMP repeat_extend_forward_end_calcBlockSizeSmall matchlen_match4_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x04 + CMPL R8, $0x04 JB matchlen_match2_repeat_extend_calcBlockSizeSmall - MOVL (R8)(R10*1), R9 - CMPL (BX)(R10*1), R9 + MOVL (R9)(R11*1), R10 + CMPL (SI)(R11*1), R10 JNE matchlen_match2_repeat_extend_calcBlockSizeSmall - LEAL -4(DI), DI - LEAL 4(R10), R10 + LEAL -4(R8), R8 + LEAL 4(R11), R11 matchlen_match2_repeat_extend_calcBlockSizeSmall: - CMPL DI, $0x01 + CMPL R8, $0x01 JE matchlen_match1_repeat_extend_calcBlockSizeSmall JB repeat_extend_forward_end_calcBlockSizeSmall - MOVW (R8)(R10*1), R9 - CMPW (BX)(R10*1), R9 + MOVW (R9)(R11*1), R10 + CMPW (SI)(R11*1), R10 JNE matchlen_match1_repeat_extend_calcBlockSizeSmall - LEAL 2(R10), R10 - SUBL $0x02, DI + LEAL 2(R11), R11 + SUBL $0x02, R8 JZ repeat_extend_forward_end_calcBlockSizeSmall matchlen_match1_repeat_extend_calcBlockSizeSmall: - MOVB (R8)(R10*1), R9 - CMPB (BX)(R10*1), R9 + MOVB (R9)(R11*1), R10 + CMPB (SI)(R11*1), R10 JNE repeat_extend_forward_end_calcBlockSizeSmall - LEAL 1(R10), R10 + LEAL 1(R11), R11 repeat_extend_forward_end_calcBlockSizeSmall: - ADDL R10, CX - MOVL CX, BX - SUBL SI, BX - MOVL 16(SP), SI + ADDL R11, DX + MOVL DX, SI + SUBL DI, SI + MOVL 16(SP), DI // emitCopy two_byte_offset_repeat_as_copy_calcBlockSizeSmall: - CMPL BX, $0x40 + CMPL SI, $0x40 JBE two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall - LEAL -60(BX), BX - ADDQ $0x03, AX + LEAL -60(SI), SI + ADDQ $0x03, CX JMP two_byte_offset_repeat_as_copy_calcBlockSizeSmall two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall: - MOVL BX, SI - SHLL $0x02, SI - CMPL BX, $0x0c + MOVL SI, DI + SHLL $0x02, DI + CMPL SI, $0x0c JAE emit_copy_three_repeat_as_copy_calcBlockSizeSmall - ADDQ $0x02, AX + ADDQ $0x02, CX JMP repeat_end_emit_calcBlockSizeSmall emit_copy_three_repeat_as_copy_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX repeat_end_emit_calcBlockSizeSmall: - MOVL CX, 12(SP) + MOVL DX, 12(SP) JMP search_loop_calcBlockSizeSmall no_repeat_found_calcBlockSizeSmall: - CMPL (DX)(BX*1), SI + CMPL (BX)(SI*1), DI JEQ candidate_match_calcBlockSizeSmall - SHRQ $0x08, SI - MOVL 24(SP)(R9*4), BX - LEAL 2(CX), R8 - CMPL (DX)(DI*1), SI + SHRQ $0x08, DI + MOVL (AX)(R10*4), SI + LEAL 2(DX), R9 + CMPL (BX)(R8*1), DI JEQ candidate2_match_calcBlockSizeSmall - MOVL R8, 24(SP)(R9*4) - SHRQ $0x08, SI - CMPL (DX)(BX*1), SI + MOVL R9, (AX)(R10*4) + SHRQ $0x08, DI + CMPL (BX)(SI*1), DI JEQ candidate3_match_calcBlockSizeSmall - MOVL 20(SP), CX + MOVL 20(SP), DX JMP search_loop_calcBlockSizeSmall candidate3_match_calcBlockSizeSmall: - ADDL $0x02, CX + ADDL $0x02, DX JMP candidate_match_calcBlockSizeSmall candidate2_match_calcBlockSizeSmall: - MOVL R8, 24(SP)(R9*4) - INCL CX - MOVL DI, BX + MOVL R9, (AX)(R10*4) + INCL DX + MOVL R8, SI candidate_match_calcBlockSizeSmall: - MOVL 12(SP), SI - TESTL BX, BX + MOVL 12(SP), DI + TESTL SI, SI JZ match_extend_back_end_calcBlockSizeSmall match_extend_back_loop_calcBlockSizeSmall: - CMPL CX, SI + CMPL DX, DI JBE match_extend_back_end_calcBlockSizeSmall - MOVB -1(DX)(BX*1), DI - MOVB -1(DX)(CX*1), R8 - CMPB DI, R8 + MOVB -1(BX)(SI*1), R8 + MOVB -1(BX)(DX*1), R9 + CMPB R8, R9 JNE match_extend_back_end_calcBlockSizeSmall - LEAL -1(CX), CX - DECL BX + LEAL -1(DX), DX + DECL SI JZ match_extend_back_end_calcBlockSizeSmall JMP match_extend_back_loop_calcBlockSizeSmall match_extend_back_end_calcBlockSizeSmall: - MOVL CX, SI - SUBL 12(SP), SI - LEAQ 3(AX)(SI*1), SI - CMPQ SI, (SP) + MOVL DX, DI + SUBL 12(SP), DI + LEAQ 3(CX)(DI*1), DI + CMPQ DI, (SP) JB match_dst_size_check_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_dst_size_check_calcBlockSizeSmall: - MOVL CX, SI - MOVL 12(SP), DI - CMPL DI, SI + MOVL DX, DI + MOVL 12(SP), R8 + CMPL R8, DI JEQ emit_literal_done_match_emit_calcBlockSizeSmall - MOVL SI, R8 - MOVL SI, 12(SP) - LEAQ (DX)(DI*1), SI - SUBL DI, R8 - LEAL -1(R8), SI - CMPL SI, $0x3c + MOVL DI, R9 + MOVL DI, 12(SP) + LEAQ (BX)(R8*1), DI + SUBL R8, R9 + LEAL -1(R9), DI + CMPL DI, $0x3c JB one_byte_match_emit_calcBlockSizeSmall - CMPL SI, $0x00000100 + CMPL DI, $0x00000100 JB two_bytes_match_emit_calcBlockSizeSmall JB three_bytes_match_emit_calcBlockSizeSmall three_bytes_match_emit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_match_emit_calcBlockSizeSmall two_bytes_match_emit_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL SI, $0x40 + ADDQ $0x02, CX + CMPL DI, $0x40 JB memmove_match_emit_calcBlockSizeSmall JMP memmove_long_match_emit_calcBlockSizeSmall one_byte_match_emit_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_match_emit_calcBlockSizeSmall: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX JMP emit_literal_done_match_emit_calcBlockSizeSmall memmove_long_match_emit_calcBlockSizeSmall: - LEAQ (AX)(R8*1), AX + LEAQ (CX)(R9*1), CX emit_literal_done_match_emit_calcBlockSizeSmall: match_nolit_loop_calcBlockSizeSmall: - MOVL CX, SI - SUBL BX, SI - MOVL SI, 16(SP) - ADDL $0x04, CX - ADDL $0x04, BX - MOVQ src_len+8(FP), SI - SUBL CX, SI - LEAQ (DX)(CX*1), DI - LEAQ (DX)(BX*1), BX + MOVL DX, DI + SUBL SI, DI + MOVL DI, 16(SP) + ADDL $0x04, DX + ADDL $0x04, SI + MOVQ src_len+8(FP), DI + SUBL DX, DI + LEAQ (BX)(DX*1), R8 + LEAQ (BX)(SI*1), SI // matchLen - XORL R9, R9 + XORL R10, R10 matchlen_loopback_16_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x10 + CMPL DI, $0x10 JB matchlen_match8_match_nolit_calcBlockSizeSmall - MOVQ (DI)(R9*1), R8 - MOVQ 8(DI)(R9*1), R10 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + MOVQ 8(R8)(R10*1), R11 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall - XORQ 8(BX)(R9*1), R10 + XORQ 8(SI)(R10*1), R11 JNZ matchlen_bsf_16match_nolit_calcBlockSizeSmall - LEAL -16(SI), SI - LEAL 16(R9), R9 + LEAL -16(DI), DI + LEAL 16(R10), R10 JMP matchlen_loopback_16_match_nolit_calcBlockSizeSmall matchlen_bsf_16match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R10, R10 + TZCNTQ R11, R11 #else - BSFQ R10, R10 + BSFQ R11, R11 #endif - SARQ $0x03, R10 - LEAL 8(R9)(R10*1), R9 + SARQ $0x03, R11 + LEAL 8(R10)(R11*1), R10 JMP match_nolit_end_calcBlockSizeSmall matchlen_match8_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x08 + CMPL DI, $0x08 JB matchlen_match4_match_nolit_calcBlockSizeSmall - MOVQ (DI)(R9*1), R8 - XORQ (BX)(R9*1), R8 + MOVQ (R8)(R10*1), R9 + XORQ (SI)(R10*1), R9 JNZ matchlen_bsf_8_match_nolit_calcBlockSizeSmall - LEAL -8(SI), SI - LEAL 8(R9), R9 + LEAL -8(DI), DI + LEAL 8(R10), R10 JMP matchlen_match4_match_nolit_calcBlockSizeSmall matchlen_bsf_8_match_nolit_calcBlockSizeSmall: #ifdef GOAMD64_v3 - TZCNTQ R8, R8 + TZCNTQ R9, R9 #else - BSFQ R8, R8 + BSFQ R9, R9 #endif - SARQ $0x03, R8 - LEAL (R9)(R8*1), R9 + SARQ $0x03, R9 + LEAL (R10)(R9*1), R10 JMP match_nolit_end_calcBlockSizeSmall matchlen_match4_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x04 + CMPL DI, $0x04 JB matchlen_match2_match_nolit_calcBlockSizeSmall - MOVL (DI)(R9*1), R8 - CMPL (BX)(R9*1), R8 + MOVL (R8)(R10*1), R9 + CMPL (SI)(R10*1), R9 JNE matchlen_match2_match_nolit_calcBlockSizeSmall - LEAL -4(SI), SI - LEAL 4(R9), R9 + LEAL -4(DI), DI + LEAL 4(R10), R10 matchlen_match2_match_nolit_calcBlockSizeSmall: - CMPL SI, $0x01 + CMPL DI, $0x01 JE matchlen_match1_match_nolit_calcBlockSizeSmall JB match_nolit_end_calcBlockSizeSmall - MOVW (DI)(R9*1), R8 - CMPW (BX)(R9*1), R8 + MOVW (R8)(R10*1), R9 + CMPW (SI)(R10*1), R9 JNE matchlen_match1_match_nolit_calcBlockSizeSmall - LEAL 2(R9), R9 - SUBL $0x02, SI + LEAL 2(R10), R10 + SUBL $0x02, DI JZ match_nolit_end_calcBlockSizeSmall matchlen_match1_match_nolit_calcBlockSizeSmall: - MOVB (DI)(R9*1), R8 - CMPB (BX)(R9*1), R8 + MOVB (R8)(R10*1), R9 + CMPB (SI)(R10*1), R9 JNE match_nolit_end_calcBlockSizeSmall - LEAL 1(R9), R9 + LEAL 1(R10), R10 match_nolit_end_calcBlockSizeSmall: - ADDL R9, CX - MOVL 16(SP), BX - ADDL $0x04, R9 - MOVL CX, 12(SP) + ADDL R10, DX + MOVL 16(SP), SI + ADDL $0x04, R10 + MOVL DX, 12(SP) // emitCopy two_byte_offset_match_nolit_calcBlockSizeSmall: - CMPL R9, $0x40 + CMPL R10, $0x40 JBE two_byte_offset_short_match_nolit_calcBlockSizeSmall - LEAL -60(R9), R9 - ADDQ $0x03, AX + LEAL -60(R10), R10 + ADDQ $0x03, CX JMP two_byte_offset_match_nolit_calcBlockSizeSmall two_byte_offset_short_match_nolit_calcBlockSizeSmall: - MOVL R9, BX - SHLL $0x02, BX - CMPL R9, $0x0c + MOVL R10, SI + SHLL $0x02, SI + CMPL R10, $0x0c JAE emit_copy_three_match_nolit_calcBlockSizeSmall - ADDQ $0x02, AX + ADDQ $0x02, CX JMP match_nolit_emitcopy_end_calcBlockSizeSmall emit_copy_three_match_nolit_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX match_nolit_emitcopy_end_calcBlockSizeSmall: - CMPL CX, 8(SP) + CMPL DX, 8(SP) JAE emit_remainder_calcBlockSizeSmall - MOVQ -2(DX)(CX*1), SI - CMPQ AX, (SP) + MOVQ -2(BX)(DX*1), DI + CMPQ CX, (SP) JB match_nolit_dst_ok_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET match_nolit_dst_ok_calcBlockSizeSmall: - MOVQ $0x9e3779b1, R8 - MOVQ SI, DI - SHRQ $0x10, SI - MOVQ SI, BX - SHLQ $0x20, DI - IMULQ R8, DI - SHRQ $0x37, DI - SHLQ $0x20, BX - IMULQ R8, BX - SHRQ $0x37, BX - LEAL -2(CX), R8 - LEAQ 24(SP)(BX*4), R9 - MOVL (R9), BX - MOVL R8, 24(SP)(DI*4) - MOVL CX, (R9) - CMPL (DX)(BX*1), SI + MOVQ $0x9e3779b1, R9 + MOVQ DI, R8 + SHRQ $0x10, DI + MOVQ DI, SI + SHLQ $0x20, R8 + IMULQ R9, R8 + SHRQ $0x37, R8 + SHLQ $0x20, SI + IMULQ R9, SI + SHRQ $0x37, SI + LEAL -2(DX), R9 + LEAQ (AX)(SI*4), R10 + MOVL (R10), SI + MOVL R9, (AX)(R8*4) + MOVL DX, (R10) + CMPL (BX)(SI*1), DI JEQ match_nolit_loop_calcBlockSizeSmall - INCL CX + INCL DX JMP search_loop_calcBlockSizeSmall emit_remainder_calcBlockSizeSmall: - MOVQ src_len+8(FP), CX - SUBL 12(SP), CX - LEAQ 3(AX)(CX*1), CX - CMPQ CX, (SP) + MOVQ src_len+8(FP), AX + SUBL 12(SP), AX + LEAQ 3(CX)(AX*1), AX + CMPQ AX, (SP) JB emit_remainder_ok_calcBlockSizeSmall - MOVQ $0x00000000, ret+24(FP) + MOVQ $0x00000000, ret+32(FP) RET emit_remainder_ok_calcBlockSizeSmall: - MOVQ src_len+8(FP), CX - MOVL 12(SP), BX - CMPL BX, CX + MOVQ src_len+8(FP), AX + MOVL 12(SP), DX + CMPL DX, AX JEQ emit_literal_done_emit_remainder_calcBlockSizeSmall - MOVL CX, SI - MOVL CX, 12(SP) - LEAQ (DX)(BX*1), CX - SUBL BX, SI - LEAL -1(SI), CX - CMPL CX, $0x3c + MOVL AX, SI + MOVL AX, 12(SP) + LEAQ (BX)(DX*1), AX + SUBL DX, SI + LEAL -1(SI), AX + CMPL AX, $0x3c JB one_byte_emit_remainder_calcBlockSizeSmall - CMPL CX, $0x00000100 + CMPL AX, $0x00000100 JB two_bytes_emit_remainder_calcBlockSizeSmall JB three_bytes_emit_remainder_calcBlockSizeSmall three_bytes_emit_remainder_calcBlockSizeSmall: - ADDQ $0x03, AX + ADDQ $0x03, CX JMP memmove_long_emit_remainder_calcBlockSizeSmall two_bytes_emit_remainder_calcBlockSizeSmall: - ADDQ $0x02, AX - CMPL CX, $0x40 + ADDQ $0x02, CX + CMPL AX, $0x40 JB memmove_emit_remainder_calcBlockSizeSmall JMP memmove_long_emit_remainder_calcBlockSizeSmall one_byte_emit_remainder_calcBlockSizeSmall: - ADDQ $0x01, AX + ADDQ $0x01, CX memmove_emit_remainder_calcBlockSizeSmall: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX JMP emit_literal_done_emit_remainder_calcBlockSizeSmall memmove_long_emit_remainder_calcBlockSizeSmall: - LEAQ (AX)(SI*1), AX + LEAQ (CX)(SI*1), AX + MOVQ AX, CX emit_literal_done_emit_remainder_calcBlockSizeSmall: - MOVQ AX, ret+24(FP) + MOVQ CX, ret+32(FP) RET // func emitLiteral(dst []byte, lit []byte) int @@ -19783,7 +19809,7 @@ TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX XORQ DI, DI lz4_s2_loop: @@ -20266,7 +20292,7 @@ TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX XORQ DI, DI lz4s_s2_loop: @@ -20751,7 +20777,7 @@ TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX lz4_snappy_loop: CMPQ DX, BX @@ -21017,7 +21043,7 @@ TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64 MOVQ src_base+24(FP), DX MOVQ src_len+32(FP), BX LEAQ (DX)(BX*1), BX - LEAQ -10(AX)(CX*1), CX + LEAQ -8(AX)(CX*1), CX lz4s_snappy_loop: CMPQ DX, BX diff --git a/vendor/github.com/klauspost/compress/s2/writer.go b/vendor/github.com/klauspost/compress/s2/writer.go index 0a46f2b98..fd15078f7 100644 --- a/vendor/github.com/klauspost/compress/s2/writer.go +++ b/vendor/github.com/klauspost/compress/s2/writer.go @@ -83,11 +83,14 @@ type Writer struct { snappy bool flushOnWrite bool appendIndex bool + bufferCB func([]byte) level uint8 } type result struct { b []byte + // return when writing + ret []byte // Uncompressed start offset startOffset int64 } @@ -146,6 +149,10 @@ func (w *Writer) Reset(writer io.Writer) { for write := range toWrite { // Wait for the data to be available. input := <-write + if input.ret != nil && w.bufferCB != nil { + w.bufferCB(input.ret) + input.ret = nil + } in := input.b if len(in) > 0 { if w.err(nil) == nil { @@ -341,7 +348,8 @@ func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) { // but the input buffer cannot be written to by the caller // until Flush or Close has been called when concurrency != 1. // -// If you cannot control that, use the regular Write function. +// Use the WriterBufferDone to receive a callback when the buffer is done +// Processing. // // Note that input is not buffered. // This means that each write will result in discrete blocks being created. @@ -364,6 +372,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { } if w.concurrency == 1 { _, err := w.writeSync(buf) + if w.bufferCB != nil { + w.bufferCB(buf) + } return err } @@ -378,7 +389,7 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes} } } - + orgBuf := buf for len(buf) > 0 { // Cut input. uncompressed := buf @@ -397,6 +408,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) { startOffset: w.uncompWritten, } w.uncompWritten += int64(len(uncompressed)) + if len(buf) == 0 && w.bufferCB != nil { + res.ret = orgBuf + } go func() { race.ReadSlice(uncompressed) @@ -922,7 +936,7 @@ func WriterBetterCompression() WriterOption { } // WriterBestCompression will enable better compression. -// EncodeBetter compresses better than Encode but typically with a +// EncodeBest compresses better than Encode but typically with a // big speed decrease on compression. func WriterBestCompression() WriterOption { return func(w *Writer) error { @@ -941,6 +955,17 @@ func WriterUncompressed() WriterOption { } } +// WriterBufferDone will perform a callback when EncodeBuffer has finished +// writing a buffer to the output and the buffer can safely be reused. +// If the buffer was split into several blocks, it will be sent after the last block. +// Callbacks will not be done concurrently. +func WriterBufferDone(fn func(b []byte)) WriterOption { + return func(w *Writer) error { + w.bufferCB = fn + return nil + } +} + // WriterBlockSize allows to override the default block size. // Blocks will be this size or smaller. // Minimum size is 4KB and maximum size is 4MB. diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index 03744fbc7..9c28840c3 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -598,7 +598,9 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { printf("RLE set to 0x%x, code: %v", symb, v) } case compModeFSE: - println("Reading table for", tableIndex(i)) + if debugDecoder { + println("Reading table for", tableIndex(i)) + } if seq.fse == nil || seq.fse.preDefined { seq.fse = fseDecoderPool.Get().(*fseDecoder) } diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go index a4f5bf91f..84a79fde7 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_better.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -179,9 +179,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -210,12 +210,12 @@ encodeLoop: // Index match start+1 (long) -> s - 1 index0 := s + repOff - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -241,9 +241,9 @@ encodeLoop: if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { // Consider history as well. var seq seq - lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -270,11 +270,11 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff2 + s += length + repOff2 nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -708,9 +708,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -738,12 +738,12 @@ encodeLoop: blk.sequences = append(blk.sequences, seq) // Index match start+1 (long) -> s - 1 - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -772,9 +772,9 @@ encodeLoop: if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { // Consider history as well. var seq seq - lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -801,11 +801,11 @@ encodeLoop: } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff2 + s += length + repOff2 nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go index a154c18f7..d36be7bd8 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -138,9 +138,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -166,11 +166,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop @@ -798,9 +798,9 @@ encodeLoop: if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { // Consider history as well. var seq seq - lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) - seq.matchLen = uint32(lenght - zstdMinMatch) + seq.matchLen = uint32(length - zstdMinMatch) // We might be able to match backwards. // Extend as long as we can. @@ -826,11 +826,11 @@ encodeLoop: println("repeat sequence", seq, "next s:", s) } blk.sequences = append(blk.sequences, seq) - s += lenght + repOff + s += length + repOff nextEmit = s if s >= sLimit { if debugEncoder { - println("repeat ended", s, lenght) + println("repeat ended", s, length) } break encodeLoop diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 72af7ef0f..8f8223cd3 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -6,6 +6,7 @@ package zstd import ( "crypto/rand" + "errors" "fmt" "io" "math" @@ -149,6 +150,9 @@ func (e *Encoder) ResetContentSize(w io.Writer, size int64) { // and write CRC if requested. func (e *Encoder) Write(p []byte) (n int, err error) { s := &e.state + if s.eofWritten { + return 0, ErrEncoderClosed + } for len(p) > 0 { if len(p)+len(s.filling) < e.o.blockSize { if e.o.crc { @@ -202,7 +206,7 @@ func (e *Encoder) nextBlock(final bool) error { return nil } if final && len(s.filling) > 0 { - s.current = e.EncodeAll(s.filling, s.current[:0]) + s.current = e.encodeAll(s.encoder, s.filling, s.current[:0]) var n2 int n2, s.err = s.w.Write(s.current) if s.err != nil { @@ -288,6 +292,9 @@ func (e *Encoder) nextBlock(final bool) error { s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current s.nInput += int64(len(s.current)) s.wg.Add(1) + if final { + s.eofWritten = true + } go func(src []byte) { if debugEncoder { println("Adding block,", len(src), "bytes, final:", final) @@ -303,9 +310,6 @@ func (e *Encoder) nextBlock(final bool) error { blk := enc.Block() enc.Encode(blk, src) blk.last = final - if final { - s.eofWritten = true - } // Wait for pending writes. s.wWg.Wait() if s.writeErr != nil { @@ -401,12 +405,20 @@ func (e *Encoder) Flush() error { if len(s.filling) > 0 { err := e.nextBlock(false) if err != nil { + // Ignore Flush after Close. + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return err } } s.wg.Wait() s.wWg.Wait() if s.err != nil { + // Ignore Flush after Close. + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return s.err } return s.writeErr @@ -422,6 +434,9 @@ func (e *Encoder) Close() error { } err := e.nextBlock(true) if err != nil { + if errors.Is(s.err, ErrEncoderClosed) { + return nil + } return err } if s.frameContentSize > 0 { @@ -459,6 +474,11 @@ func (e *Encoder) Close() error { } _, s.err = s.w.Write(frame) } + if s.err == nil { + s.err = ErrEncoderClosed + return nil + } + return s.err } @@ -469,6 +489,15 @@ func (e *Encoder) Close() error { // Data compressed with EncodeAll can be decoded with the Decoder, // using either a stream or DecodeAll. func (e *Encoder) EncodeAll(src, dst []byte) []byte { + e.init.Do(e.initialize) + enc := <-e.encoders + defer func() { + e.encoders <- enc + }() + return e.encodeAll(enc, src, dst) +} + +func (e *Encoder) encodeAll(enc encoder, src, dst []byte) []byte { if len(src) == 0 { if e.o.fullZero { // Add frame header. @@ -491,13 +520,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte { } return dst } - e.init.Do(e.initialize) - enc := <-e.encoders - defer func() { - // Release encoder reference to last block. - // If a non-single block is needed the encoder will reset again. - e.encoders <- enc - }() + // Use single segments when above minimum window and below window size. single := len(src) <= e.o.windowSize && len(src) > MinWindowSize if e.o.single != nil { diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 53e160f7e..e47af66e7 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -146,7 +146,9 @@ func (d *frameDec) reset(br byteBuffer) error { } return err } - printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) + if debugDecoder { + printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) + } windowLog := 10 + (wd >> 3) windowBase := uint64(1) << windowLog windowAdd := (windowBase / 8) * uint64(wd&0x7) diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go index 8adabd828..c59f17e07 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go @@ -146,7 +146,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) default: - return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode) + return true, fmt.Errorf("sequenceDecs_decode returned erroneous code %d", errCode) } s.seqSize += ctx.litRemain @@ -292,7 +292,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error { return io.ErrUnexpectedEOF } - return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode) + return fmt.Errorf("sequenceDecs_decode_amd64 returned erroneous code %d", errCode) } if ctx.litRemain < 0 { diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s index 5b06174b8..f5591fa1e 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -1814,7 +1814,7 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 MOVQ 40(SP), AX ADDQ AX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R10, 32(SP) // outBase += outPosition @@ -2376,7 +2376,7 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 MOVQ 40(SP), CX ADDQ CX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R9, 32(SP) // outBase += outPosition @@ -2896,7 +2896,7 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 MOVQ 40(SP), AX ADDQ AX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R10, 32(SP) // outBase += outPosition @@ -3560,7 +3560,7 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 MOVQ 40(SP), CX ADDQ CX, 48(SP) - // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + // Calculate pointer to s.out[cap(s.out)] (a past-end pointer) ADDQ R9, 32(SP) // outBase += outPosition diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 4be7cc736..066bef2a4 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -88,6 +88,10 @@ var ( // Close has been called. ErrDecoderClosed = errors.New("decoder used after Close") + // ErrEncoderClosed will be returned if the Encoder was used after + // Close has been called. + ErrEncoderClosed = errors.New("encoder used after Close") + // ErrDecoderNilInput is returned when a nil Reader was provided // and an operation other than Reset/DecodeAll/Close was attempted. ErrDecoderNilInput = errors.New("nil input provided as reader") diff --git a/vendor/github.com/minio/minio-go/v7/Makefile b/vendor/github.com/minio/minio-go/v7/Makefile index 68444aa68..9e4ddc4c8 100644 --- a/vendor/github.com/minio/minio-go/v7/Makefile +++ b/vendor/github.com/minio/minio-go/v7/Makefile @@ -32,6 +32,10 @@ functional-test: @GO111MODULE=on go build -race functional_tests.go @SERVER_ENDPOINT=localhost:9000 ACCESS_KEY=minioadmin SECRET_KEY=minioadmin ENABLE_HTTPS=1 MINT_MODE=full ./functional_tests +functional-test-notls: + @GO111MODULE=on go build -race functional_tests.go + @SERVER_ENDPOINT=localhost:9000 ACCESS_KEY=minioadmin SECRET_KEY=minioadmin ENABLE_HTTPS=0 MINT_MODE=full ./functional_tests + clean: @echo "Cleaning up all the generated files" @find . -name '*.test' | xargs rm -fv diff --git a/vendor/github.com/minio/minio-go/v7/README.md b/vendor/github.com/minio/minio-go/v7/README.md index 82f70a131..be7963c52 100644 --- a/vendor/github.com/minio/minio-go/v7/README.md +++ b/vendor/github.com/minio/minio-go/v7/README.md @@ -253,7 +253,7 @@ The full API Reference is available here. * [setbucketencryption.go](https://github.com/minio/minio-go/blob/master/examples/s3/setbucketencryption.go) * [getbucketencryption.go](https://github.com/minio/minio-go/blob/master/examples/s3/getbucketencryption.go) -* [deletebucketencryption.go](https://github.com/minio/minio-go/blob/master/examples/s3/deletebucketencryption.go) +* [removebucketencryption.go](https://github.com/minio/minio-go/blob/master/examples/s3/removebucketencryption.go) ### Full Examples : Bucket replication Operations diff --git a/vendor/github.com/minio/minio-go/v7/api-datatypes.go b/vendor/github.com/minio/minio-go/v7/api-datatypes.go index 97a6f80b2..8a8fd8898 100644 --- a/vendor/github.com/minio/minio-go/v7/api-datatypes.go +++ b/vendor/github.com/minio/minio-go/v7/api-datatypes.go @@ -143,10 +143,11 @@ type UploadInfo struct { // Verified checksum values, if any. // Values are base64 (standard) encoded. // For multipart objects this is a checksum of the checksum of each part. - ChecksumCRC32 string - ChecksumCRC32C string - ChecksumSHA1 string - ChecksumSHA256 string + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + ChecksumCRC64NVME string } // RestoreInfo contains information of the restore operation of an archived object @@ -215,10 +216,11 @@ type ObjectInfo struct { Restore *RestoreInfo // Checksum values - ChecksumCRC32 string - ChecksumCRC32C string - ChecksumSHA1 string - ChecksumSHA256 string + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + ChecksumCRC64NVME string Internal *struct { K int // Data blocks diff --git a/vendor/github.com/minio/minio-go/v7/api-get-object.go b/vendor/github.com/minio/minio-go/v7/api-get-object.go index d7fd27835..5cc85f61c 100644 --- a/vendor/github.com/minio/minio-go/v7/api-get-object.go +++ b/vendor/github.com/minio/minio-go/v7/api-get-object.go @@ -318,7 +318,7 @@ func (o *Object) doGetRequest(request getRequest) (getResponse, error) { response := <-o.resCh // Return any error to the top level. - if response.Error != nil { + if response.Error != nil && response.Error != io.EOF { return response, response.Error } @@ -340,7 +340,7 @@ func (o *Object) doGetRequest(request getRequest) (getResponse, error) { // Data are ready on the wire, no need to reinitiate connection in lower level o.seekData = false - return response, nil + return response, response.Error } // setOffset - handles the setting of offsets for diff --git a/vendor/github.com/minio/minio-go/v7/api-prompt-object.go b/vendor/github.com/minio/minio-go/v7/api-prompt-object.go new file mode 100644 index 000000000..dac062a75 --- /dev/null +++ b/vendor/github.com/minio/minio-go/v7/api-prompt-object.go @@ -0,0 +1,78 @@ +/* + * MinIO Go Library for Amazon S3 Compatible Cloud Storage + * Copyright 2015-2024 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package minio + +import ( + "bytes" + "context" + "io" + "net/http" + + "github.com/goccy/go-json" + "github.com/minio/minio-go/v7/pkg/s3utils" +) + +// PromptObject performs language model inference with the prompt and referenced object as context. +// Inference is performed using a Lambda handler that can process the prompt and object. +// Currently, this functionality is limited to certain MinIO servers. +func (c *Client) PromptObject(ctx context.Context, bucketName, objectName, prompt string, opts PromptObjectOptions) (io.ReadCloser, error) { + // Input validation. + if err := s3utils.CheckValidBucketName(bucketName); err != nil { + return nil, ErrorResponse{ + StatusCode: http.StatusBadRequest, + Code: "InvalidBucketName", + Message: err.Error(), + } + } + if err := s3utils.CheckValidObjectName(objectName); err != nil { + return nil, ErrorResponse{ + StatusCode: http.StatusBadRequest, + Code: "XMinioInvalidObjectName", + Message: err.Error(), + } + } + + opts.AddLambdaArnToReqParams(opts.LambdaArn) + opts.SetHeader("Content-Type", "application/json") + opts.AddPromptArg("prompt", prompt) + promptReqBytes, err := json.Marshal(opts.PromptArgs) + if err != nil { + return nil, err + } + + // Execute POST on bucket/object. + resp, err := c.executeMethod(ctx, http.MethodPost, requestMetadata{ + bucketName: bucketName, + objectName: objectName, + queryValues: opts.toQueryValues(), + customHeader: opts.Header(), + contentSHA256Hex: sum256Hex(promptReqBytes), + contentBody: bytes.NewReader(promptReqBytes), + contentLength: int64(len(promptReqBytes)), + }) + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusOK { + defer closeResponse(resp) + return nil, httpRespToErrorResponse(resp, bucketName, objectName) + } + + return resp.Body, nil +} diff --git a/vendor/github.com/minio/minio-go/v7/api-prompt-options.go b/vendor/github.com/minio/minio-go/v7/api-prompt-options.go new file mode 100644 index 000000000..4493a75d4 --- /dev/null +++ b/vendor/github.com/minio/minio-go/v7/api-prompt-options.go @@ -0,0 +1,84 @@ +/* + * MinIO Go Library for Amazon S3 Compatible Cloud Storage + * Copyright 2015-2024 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package minio + +import ( + "net/http" + "net/url" +) + +// PromptObjectOptions provides options to PromptObject call. +// LambdaArn is the ARN of the Prompt Lambda to be invoked. +// PromptArgs is a map of key-value pairs to be passed to the inference action on the Prompt Lambda. +// "prompt" is a reserved key and should not be used as a key in PromptArgs. +type PromptObjectOptions struct { + LambdaArn string + PromptArgs map[string]any + headers map[string]string + reqParams url.Values +} + +// Header returns the http.Header representation of the POST options. +func (o PromptObjectOptions) Header() http.Header { + headers := make(http.Header, len(o.headers)) + for k, v := range o.headers { + headers.Set(k, v) + } + return headers +} + +// AddPromptArg Add a key value pair to the prompt arguments where the key is a string and +// the value is a JSON serializable. +func (o *PromptObjectOptions) AddPromptArg(key string, value any) { + if o.PromptArgs == nil { + o.PromptArgs = make(map[string]any) + } + o.PromptArgs[key] = value +} + +// AddLambdaArnToReqParams adds the lambdaArn to the request query string parameters. +func (o *PromptObjectOptions) AddLambdaArnToReqParams(lambdaArn string) { + if o.reqParams == nil { + o.reqParams = make(url.Values) + } + o.reqParams.Add("lambdaArn", lambdaArn) +} + +// SetHeader adds a key value pair to the options. The +// key-value pair will be part of the HTTP POST request +// headers. +func (o *PromptObjectOptions) SetHeader(key, value string) { + if o.headers == nil { + o.headers = make(map[string]string) + } + o.headers[http.CanonicalHeaderKey(key)] = value +} + +// toQueryValues - Convert the reqParams in Options to query string parameters. +func (o *PromptObjectOptions) toQueryValues() url.Values { + urlValues := make(url.Values) + if o.reqParams != nil { + for key, values := range o.reqParams { + for _, value := range values { + urlValues.Add(key, value) + } + } + } + + return urlValues +} diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-fan-out.go b/vendor/github.com/minio/minio-go/v7/api-put-object-fan-out.go index 0ae9142e1..3023b949c 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object-fan-out.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object-fan-out.go @@ -85,7 +85,10 @@ func (c *Client) PutObjectFanOut(ctx context.Context, bucket string, fanOutData policy.SetEncryption(fanOutReq.SSE) // Set checksum headers if any. - policy.SetChecksum(fanOutReq.Checksum) + err := policy.SetChecksum(fanOutReq.Checksum) + if err != nil { + return nil, err + } url, formData, err := c.PresignedPostPolicy(ctx, policy) if err != nil { diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go b/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go index a70cbea9e..03bd34f76 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go @@ -83,10 +83,7 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj // HTTPS connection. hashAlgos, hashSums := c.hashMaterials(opts.SendContentMd5, !opts.DisableContentSha256) if len(hashSums) == 0 { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Initiate a new multipart upload. @@ -113,7 +110,6 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj // Create checksums // CRC32C is ~50% faster on AMD64 @ 30GB/s - var crcBytes []byte customHeader := make(http.Header) crc := opts.AutoChecksum.Hasher() for partNumber <= totalPartsCount { @@ -154,7 +150,6 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj crc.Write(buf[:length]) cSum := crc.Sum(nil) customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum)) - crcBytes = append(crcBytes, cSum...) } p := uploadPartParams{bucketName: bucketName, objectName: objectName, uploadID: uploadID, reader: rd, partNumber: partNumber, md5Base64: md5Base64, sha256Hex: sha256Hex, size: int64(length), sse: opts.ServerSideEncryption, streamSha256: !opts.DisableContentSha256, customHeader: customHeader} @@ -182,18 +177,21 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj // Loop over total uploaded parts to save them in // Parts array before completing the multipart request. + allParts := make([]ObjectPart, 0, len(partsInfo)) for i := 1; i < partNumber; i++ { part, ok := partsInfo[i] if !ok { return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } + allParts = append(allParts, part) complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, - ChecksumCRC32: part.ChecksumCRC32, - ChecksumCRC32C: part.ChecksumCRC32C, - ChecksumSHA1: part.ChecksumSHA1, - ChecksumSHA256: part.ChecksumSHA256, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, + ChecksumCRC64NVME: part.ChecksumCRC64NVME, }) } @@ -203,12 +201,8 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj ServerSideEncryption: opts.ServerSideEncryption, AutoChecksum: opts.AutoChecksum, } - if len(crcBytes) > 0 { - // Add hash of hashes. - crc.Reset() - crc.Write(crcBytes) - opts.UserMetadata = map[string]string{opts.AutoChecksum.Key(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} - } + applyAutoChecksum(&opts, allParts) + uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err @@ -354,10 +348,11 @@ func (c *Client) uploadPart(ctx context.Context, p uploadPartParams) (ObjectPart // Once successfully uploaded, return completed part. h := resp.Header objPart := ObjectPart{ - ChecksumCRC32: h.Get("x-amz-checksum-crc32"), - ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"), - ChecksumSHA1: h.Get("x-amz-checksum-sha1"), - ChecksumSHA256: h.Get("x-amz-checksum-sha256"), + ChecksumCRC32: h.Get(ChecksumCRC32.Key()), + ChecksumCRC32C: h.Get(ChecksumCRC32C.Key()), + ChecksumSHA1: h.Get(ChecksumSHA1.Key()), + ChecksumSHA256: h.Get(ChecksumSHA256.Key()), + ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()), } objPart.Size = p.size objPart.PartNumber = p.partNumber @@ -457,9 +452,10 @@ func (c *Client) completeMultipartUpload(ctx context.Context, bucketName, object Expiration: expTime, ExpirationRuleID: ruleID, - ChecksumSHA256: completeMultipartUploadResult.ChecksumSHA256, - ChecksumSHA1: completeMultipartUploadResult.ChecksumSHA1, - ChecksumCRC32: completeMultipartUploadResult.ChecksumCRC32, - ChecksumCRC32C: completeMultipartUploadResult.ChecksumCRC32C, + ChecksumSHA256: completeMultipartUploadResult.ChecksumSHA256, + ChecksumSHA1: completeMultipartUploadResult.ChecksumSHA1, + ChecksumCRC32: completeMultipartUploadResult.ChecksumCRC32, + ChecksumCRC32C: completeMultipartUploadResult.ChecksumCRC32C, + ChecksumCRC64NVME: completeMultipartUploadResult.ChecksumCRC64NVME, }, nil } diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go b/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go index eef976c8c..3ff3b69ef 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go @@ -52,7 +52,7 @@ func (c *Client) putObjectMultipartStream(ctx context.Context, bucketName, objec } else { info, err = c.putObjectMultipartStreamOptionalChecksum(ctx, bucketName, objectName, reader, size, opts) } - if err != nil { + if err != nil && s3utils.IsGoogleEndpoint(*c.endpointURL) { errResp := ToErrorResponse(err) // Verify if multipart functionality is not available, if not // fall back to single PutObject operation. @@ -113,10 +113,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN } withChecksum := c.trailingHeaderSupport if withChecksum { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Initiate a new multipart upload. uploadID, err := c.newUploadID(ctx, bucketName, objectName, opts) @@ -240,6 +237,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN // Gather the responses as they occur and update any // progress bar. + allParts := make([]ObjectPart, 0, totalPartsCount) for u := 1; u <= totalPartsCount; u++ { select { case <-ctx.Done(): @@ -248,16 +246,17 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN if uploadRes.Error != nil { return UploadInfo{}, uploadRes.Error } - + allParts = append(allParts, uploadRes.Part) // Update the totalUploadedSize. totalUploadedSize += uploadRes.Size complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: uploadRes.Part.ETag, - PartNumber: uploadRes.Part.PartNumber, - ChecksumCRC32: uploadRes.Part.ChecksumCRC32, - ChecksumCRC32C: uploadRes.Part.ChecksumCRC32C, - ChecksumSHA1: uploadRes.Part.ChecksumSHA1, - ChecksumSHA256: uploadRes.Part.ChecksumSHA256, + ETag: uploadRes.Part.ETag, + PartNumber: uploadRes.Part.PartNumber, + ChecksumCRC32: uploadRes.Part.ChecksumCRC32, + ChecksumCRC32C: uploadRes.Part.ChecksumCRC32C, + ChecksumSHA1: uploadRes.Part.ChecksumSHA1, + ChecksumSHA256: uploadRes.Part.ChecksumSHA256, + ChecksumCRC64NVME: uploadRes.Part.ChecksumCRC64NVME, }) } } @@ -275,15 +274,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN AutoChecksum: opts.AutoChecksum, } if withChecksum { - // Add hash of hashes. - crc := opts.AutoChecksum.Hasher() - for _, part := range complMultipartUpload.Parts { - cs, err := base64.StdEncoding.DecodeString(part.Checksum(opts.AutoChecksum)) - if err == nil { - crc.Write(cs) - } - } - opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} + applyAutoChecksum(&opts, allParts) } uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) @@ -312,10 +303,7 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b } if !opts.SendContentMd5 { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Calculate the optimal parts info for a given size. @@ -342,7 +330,6 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b // Create checksums // CRC32C is ~50% faster on AMD64 @ 30GB/s - var crcBytes []byte customHeader := make(http.Header) crc := opts.AutoChecksum.Hasher() md5Hash := c.md5Hasher() @@ -389,7 +376,6 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b crc.Write(buf[:length]) cSum := crc.Sum(nil) customHeader.Set(opts.AutoChecksum.KeyCapitalized(), base64.StdEncoding.EncodeToString(cSum)) - crcBytes = append(crcBytes, cSum...) } // Update progress reader appropriately to the latest offset @@ -420,18 +406,21 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b // Loop over total uploaded parts to save them in // Parts array before completing the multipart request. + allParts := make([]ObjectPart, 0, len(partsInfo)) for i := 1; i < partNumber; i++ { part, ok := partsInfo[i] if !ok { return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } + allParts = append(allParts, part) complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, - ChecksumCRC32: part.ChecksumCRC32, - ChecksumCRC32C: part.ChecksumCRC32C, - ChecksumSHA1: part.ChecksumSHA1, - ChecksumSHA256: part.ChecksumSHA256, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, + ChecksumCRC64NVME: part.ChecksumCRC64NVME, }) } @@ -442,12 +431,7 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b ServerSideEncryption: opts.ServerSideEncryption, AutoChecksum: opts.AutoChecksum, } - if len(crcBytes) > 0 { - // Add hash of hashes. - crc.Reset() - crc.Write(crcBytes) - opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} - } + applyAutoChecksum(&opts, allParts) uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err @@ -475,10 +459,7 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam opts.AutoChecksum = opts.Checksum } if !opts.SendContentMd5 { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Cancel all when an error occurs. @@ -510,7 +491,6 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam // Create checksums // CRC32C is ~50% faster on AMD64 @ 30GB/s - var crcBytes []byte crc := opts.AutoChecksum.Hasher() // Total data read and written to server. should be equal to 'size' at the end of the call. @@ -570,7 +550,6 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam crc.Write(buf[:length]) cSum := crc.Sum(nil) customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum)) - crcBytes = append(crcBytes, cSum...) } wg.Add(1) @@ -630,18 +609,21 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam // Loop over total uploaded parts to save them in // Parts array before completing the multipart request. + allParts := make([]ObjectPart, 0, len(partsInfo)) for i := 1; i < partNumber; i++ { part, ok := partsInfo[i] if !ok { return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } + allParts = append(allParts, part) complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, - ChecksumCRC32: part.ChecksumCRC32, - ChecksumCRC32C: part.ChecksumCRC32C, - ChecksumSHA1: part.ChecksumSHA1, - ChecksumSHA256: part.ChecksumSHA256, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, + ChecksumCRC64NVME: part.ChecksumCRC64NVME, }) } @@ -652,12 +634,8 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam ServerSideEncryption: opts.ServerSideEncryption, AutoChecksum: opts.AutoChecksum, } - if len(crcBytes) > 0 { - // Add hash of hashes. - crc.Reset() - crc.Write(crcBytes) - opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} - } + applyAutoChecksum(&opts, allParts) + uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err @@ -823,9 +801,10 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string, ExpirationRuleID: ruleID, // Checksum values - ChecksumCRC32: h.Get("x-amz-checksum-crc32"), - ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"), - ChecksumSHA1: h.Get("x-amz-checksum-sha1"), - ChecksumSHA256: h.Get("x-amz-checksum-sha256"), + ChecksumCRC32: h.Get(ChecksumCRC32.Key()), + ChecksumCRC32C: h.Get(ChecksumCRC32C.Key()), + ChecksumSHA1: h.Get(ChecksumSHA1.Key()), + ChecksumSHA256: h.Get(ChecksumSHA256.Key()), + ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()), }, nil } diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object.go b/vendor/github.com/minio/minio-go/v7/api-put-object.go index d769648a7..098175784 100644 --- a/vendor/github.com/minio/minio-go/v7/api-put-object.go +++ b/vendor/github.com/minio/minio-go/v7/api-put-object.go @@ -45,6 +45,8 @@ const ( ReplicationStatusFailed ReplicationStatus = "FAILED" // ReplicationStatusReplica indicates object is a replica of a source ReplicationStatusReplica ReplicationStatus = "REPLICA" + // ReplicationStatusReplicaEdge indicates object is a replica of a edge source + ReplicationStatusReplicaEdge ReplicationStatus = "REPLICA-EDGE" ) // Empty returns true if no replication status set. @@ -385,10 +387,7 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam opts.AutoChecksum = opts.Checksum } if !opts.SendContentMd5 { - if opts.UserMetadata == nil { - opts.UserMetadata = make(map[string]string, 1) - } - opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + addAutoChecksumHeaders(&opts) } // Initiate a new multipart upload. @@ -415,7 +414,6 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam // Create checksums // CRC32C is ~50% faster on AMD64 @ 30GB/s - var crcBytes []byte customHeader := make(http.Header) crc := opts.AutoChecksum.Hasher() @@ -441,7 +439,6 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam crc.Write(buf[:length]) cSum := crc.Sum(nil) customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum)) - crcBytes = append(crcBytes, cSum...) } // Update progress reader appropriately to the latest offset @@ -473,18 +470,21 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam // Loop over total uploaded parts to save them in // Parts array before completing the multipart request. + allParts := make([]ObjectPart, 0, len(partsInfo)) for i := 1; i < partNumber; i++ { part, ok := partsInfo[i] if !ok { return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i)) } + allParts = append(allParts, part) complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{ - ETag: part.ETag, - PartNumber: part.PartNumber, - ChecksumCRC32: part.ChecksumCRC32, - ChecksumCRC32C: part.ChecksumCRC32C, - ChecksumSHA1: part.ChecksumSHA1, - ChecksumSHA256: part.ChecksumSHA256, + ETag: part.ETag, + PartNumber: part.PartNumber, + ChecksumCRC32: part.ChecksumCRC32, + ChecksumCRC32C: part.ChecksumCRC32C, + ChecksumSHA1: part.ChecksumSHA1, + ChecksumSHA256: part.ChecksumSHA256, + ChecksumCRC64NVME: part.ChecksumCRC64NVME, }) } @@ -495,12 +495,8 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam ServerSideEncryption: opts.ServerSideEncryption, AutoChecksum: opts.AutoChecksum, } - if len(crcBytes) > 0 { - // Add hash of hashes. - crc.Reset() - crc.Write(crcBytes) - opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): base64.StdEncoding.EncodeToString(crc.Sum(nil))} - } + applyAutoChecksum(&opts, allParts) + uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts) if err != nil { return UploadInfo{}, err diff --git a/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go b/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go index 790606c50..5e015fb82 100644 --- a/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go +++ b/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go @@ -18,6 +18,7 @@ package minio import ( + "encoding/base64" "encoding/xml" "errors" "io" @@ -276,10 +277,45 @@ type ObjectPart struct { Size int64 // Checksum values of each part. - ChecksumCRC32 string - ChecksumCRC32C string - ChecksumSHA1 string - ChecksumSHA256 string + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + ChecksumCRC64NVME string +} + +// Checksum will return the checksum for the given type. +// Will return the empty string if not set. +func (c ObjectPart) Checksum(t ChecksumType) string { + switch { + case t.Is(ChecksumCRC32C): + return c.ChecksumCRC32C + case t.Is(ChecksumCRC32): + return c.ChecksumCRC32 + case t.Is(ChecksumSHA1): + return c.ChecksumSHA1 + case t.Is(ChecksumSHA256): + return c.ChecksumSHA256 + case t.Is(ChecksumCRC64NVME): + return c.ChecksumCRC64NVME + } + return "" +} + +// ChecksumRaw returns the decoded checksum from the part. +func (c ObjectPart) ChecksumRaw(t ChecksumType) ([]byte, error) { + b64 := c.Checksum(t) + if b64 == "" { + return nil, errors.New("no checksum set") + } + decoded, err := base64.StdEncoding.DecodeString(b64) + if err != nil { + return nil, err + } + if len(decoded) != t.RawByteLen() { + return nil, errors.New("checksum length mismatch") + } + return decoded, nil } // ListObjectPartsResult container for ListObjectParts response. @@ -296,6 +332,12 @@ type ListObjectPartsResult struct { NextPartNumberMarker int MaxParts int + // ChecksumAlgorithm will be CRC32, CRC32C, etc. + ChecksumAlgorithm string + + // ChecksumType is FULL_OBJECT or COMPOSITE (assume COMPOSITE when unset) + ChecksumType string + // Indicates whether the returned list of parts is truncated. IsTruncated bool ObjectParts []ObjectPart `xml:"Part"` @@ -320,10 +362,11 @@ type completeMultipartUploadResult struct { ETag string // Checksum values, hash of hashes of parts. - ChecksumCRC32 string - ChecksumCRC32C string - ChecksumSHA1 string - ChecksumSHA256 string + ChecksumCRC32 string + ChecksumCRC32C string + ChecksumSHA1 string + ChecksumSHA256 string + ChecksumCRC64NVME string } // CompletePart sub container lists individual part numbers and their @@ -334,10 +377,11 @@ type CompletePart struct { ETag string // Checksum values - ChecksumCRC32 string `xml:"ChecksumCRC32,omitempty"` - ChecksumCRC32C string `xml:"ChecksumCRC32C,omitempty"` - ChecksumSHA1 string `xml:"ChecksumSHA1,omitempty"` - ChecksumSHA256 string `xml:"ChecksumSHA256,omitempty"` + ChecksumCRC32 string `xml:"ChecksumCRC32,omitempty"` + ChecksumCRC32C string `xml:"ChecksumCRC32C,omitempty"` + ChecksumSHA1 string `xml:"ChecksumSHA1,omitempty"` + ChecksumSHA256 string `xml:"ChecksumSHA256,omitempty"` + ChecksumCRC64NVME string `xml:",omitempty"` } // Checksum will return the checksum for the given type. @@ -352,6 +396,8 @@ func (c CompletePart) Checksum(t ChecksumType) string { return c.ChecksumSHA1 case t.Is(ChecksumSHA256): return c.ChecksumSHA256 + case t.Is(ChecksumCRC64NVME): + return c.ChecksumCRC64NVME } return "" } diff --git a/vendor/github.com/minio/minio-go/v7/api.go b/vendor/github.com/minio/minio-go/v7/api.go index 1d6b66502..83c003e49 100644 --- a/vendor/github.com/minio/minio-go/v7/api.go +++ b/vendor/github.com/minio/minio-go/v7/api.go @@ -99,6 +99,7 @@ type Client struct { healthStatus int32 trailingHeaderSupport bool + maxRetries int } // Options for New method @@ -123,12 +124,16 @@ type Options struct { // Custom hash routines. Leave nil to use standard. CustomMD5 func() md5simd.Hasher CustomSHA256 func() md5simd.Hasher + + // Number of times a request is retried. Defaults to 10 retries if this option is not configured. + // Set to 1 to disable retries. + MaxRetries int } // Global constants. const ( libraryName = "minio-go" - libraryVersion = "v7.0.77" + libraryVersion = "v7.0.82" ) // User Agent should always following the below style. @@ -278,6 +283,11 @@ func privateNew(endpoint string, opts *Options) (*Client, error) { // healthcheck is not initialized clnt.healthStatus = unknown + clnt.maxRetries = MaxRetry + if opts.MaxRetries > 0 { + clnt.maxRetries = opts.MaxRetries + } + // Return. return clnt, nil } @@ -590,9 +600,9 @@ func (c *Client) executeMethod(ctx context.Context, method string, metadata requ return nil, errors.New(c.endpointURL.String() + " is offline.") } - var retryable bool // Indicates if request can be retried. - var bodySeeker io.Seeker // Extracted seeker from io.Reader. - reqRetry := MaxRetry // Indicates how many times we can retry the request + var retryable bool // Indicates if request can be retried. + var bodySeeker io.Seeker // Extracted seeker from io.Reader. + var reqRetry = c.maxRetries // Indicates how many times we can retry the request if metadata.contentBody != nil { // Check if body is seekable then it is retryable. diff --git a/vendor/github.com/minio/minio-go/v7/checksum.go b/vendor/github.com/minio/minio-go/v7/checksum.go index 7eb1bf25a..8e4c27ce4 100644 --- a/vendor/github.com/minio/minio-go/v7/checksum.go +++ b/vendor/github.com/minio/minio-go/v7/checksum.go @@ -21,11 +21,15 @@ import ( "crypto/sha1" "crypto/sha256" "encoding/base64" + "encoding/binary" + "errors" "hash" "hash/crc32" + "hash/crc64" "io" "math/bits" "net/http" + "sort" ) // ChecksumType contains information about the checksum type. @@ -41,23 +45,41 @@ const ( ChecksumCRC32 // ChecksumCRC32C indicates a CRC32 checksum with Castagnoli table. ChecksumCRC32C + // ChecksumCRC64NVME indicates CRC64 with 0xad93d23594c93659 polynomial. + ChecksumCRC64NVME // Keep after all valid checksums checksumLast + // ChecksumFullObject is a modifier that can be used on CRC32 and CRC32C + // to indicate full object checksums. + ChecksumFullObject + // checksumMask is a mask for valid checksum types. checksumMask = checksumLast - 1 // ChecksumNone indicates no checksum. ChecksumNone ChecksumType = 0 - amzChecksumAlgo = "x-amz-checksum-algorithm" - amzChecksumCRC32 = "x-amz-checksum-crc32" - amzChecksumCRC32C = "x-amz-checksum-crc32c" - amzChecksumSHA1 = "x-amz-checksum-sha1" - amzChecksumSHA256 = "x-amz-checksum-sha256" + // ChecksumFullObjectCRC32 indicates full object CRC32 + ChecksumFullObjectCRC32 = ChecksumCRC32 | ChecksumFullObject + + // ChecksumFullObjectCRC32C indicates full object CRC32C + ChecksumFullObjectCRC32C = ChecksumCRC32C | ChecksumFullObject + + amzChecksumAlgo = "x-amz-checksum-algorithm" + amzChecksumCRC32 = "x-amz-checksum-crc32" + amzChecksumCRC32C = "x-amz-checksum-crc32c" + amzChecksumSHA1 = "x-amz-checksum-sha1" + amzChecksumSHA256 = "x-amz-checksum-sha256" + amzChecksumCRC64NVME = "x-amz-checksum-crc64nvme" ) +// Base returns the base type, without modifiers. +func (c ChecksumType) Base() ChecksumType { + return c & checksumMask +} + // Is returns if c is all of t. func (c ChecksumType) Is(t ChecksumType) bool { return c&t == t @@ -75,10 +97,39 @@ func (c ChecksumType) Key() string { return amzChecksumSHA1 case ChecksumSHA256: return amzChecksumSHA256 + case ChecksumCRC64NVME: + return amzChecksumCRC64NVME } return "" } +// CanComposite will return if the checksum type can be used for composite multipart upload on AWS. +func (c ChecksumType) CanComposite() bool { + switch c & checksumMask { + case ChecksumSHA256, ChecksumSHA1, ChecksumCRC32, ChecksumCRC32C: + return true + } + return false +} + +// CanMergeCRC will return if the checksum type can be used for multipart upload on AWS. +func (c ChecksumType) CanMergeCRC() bool { + switch c & checksumMask { + case ChecksumCRC32, ChecksumCRC32C, ChecksumCRC64NVME: + return true + } + return false +} + +// FullObjectRequested will return if the checksum type indicates full object checksum was requested. +func (c ChecksumType) FullObjectRequested() bool { + switch c & (ChecksumFullObject | checksumMask) { + case ChecksumFullObjectCRC32C, ChecksumFullObjectCRC32, ChecksumCRC64NVME: + return true + } + return false +} + // KeyCapitalized returns the capitalized key as used in HTTP headers. func (c ChecksumType) KeyCapitalized() string { return http.CanonicalHeaderKey(c.Key()) @@ -93,10 +144,17 @@ func (c ChecksumType) RawByteLen() int { return sha1.Size case ChecksumSHA256: return sha256.Size + case ChecksumCRC64NVME: + return crc64.Size } return 0 } +const crc64NVMEPolynomial = 0xad93d23594c93659 + +// crc64 uses reversed polynomials. +var crc64Table = crc64.MakeTable(bits.Reverse64(crc64NVMEPolynomial)) + // Hasher returns a hasher corresponding to the checksum type. // Returns nil if no checksum. func (c ChecksumType) Hasher() hash.Hash { @@ -109,13 +167,15 @@ func (c ChecksumType) Hasher() hash.Hash { return sha1.New() case ChecksumSHA256: return sha256.New() + case ChecksumCRC64NVME: + return crc64.New(crc64Table) } return nil } // IsSet returns whether the type is valid and known. func (c ChecksumType) IsSet() bool { - return bits.OnesCount32(uint32(c)) == 1 + return bits.OnesCount32(uint32(c&checksumMask)) == 1 } // SetDefault will set the checksum if not already set. @@ -125,6 +185,16 @@ func (c *ChecksumType) SetDefault(t ChecksumType) { } } +// EncodeToString the encoded hash value of the content provided in b. +func (c ChecksumType) EncodeToString(b []byte) string { + if !c.IsSet() { + return "" + } + h := c.Hasher() + h.Write(b) + return base64.StdEncoding.EncodeToString(h.Sum(nil)) +} + // String returns the type as a string. // CRC32, CRC32C, SHA1, and SHA256 for valid values. // Empty string for unset and "" if not valid. @@ -140,6 +210,8 @@ func (c ChecksumType) String() string { return "SHA256" case ChecksumNone: return "" + case ChecksumCRC64NVME: + return "CRC64NVME" } return "" } @@ -221,3 +293,129 @@ func (c Checksum) Raw() []byte { } return c.r } + +// CompositeChecksum returns the composite checksum of all provided parts. +func (c ChecksumType) CompositeChecksum(p []ObjectPart) (*Checksum, error) { + if !c.CanComposite() { + return nil, errors.New("cannot do composite checksum") + } + sort.Slice(p, func(i, j int) bool { + return p[i].PartNumber < p[j].PartNumber + }) + c = c.Base() + crcBytes := make([]byte, 0, len(p)*c.RawByteLen()) + for _, part := range p { + pCrc, err := part.ChecksumRaw(c) + if err != nil { + return nil, err + } + crcBytes = append(crcBytes, pCrc...) + } + h := c.Hasher() + h.Write(crcBytes) + return &Checksum{Type: c, r: h.Sum(nil)}, nil +} + +// FullObjectChecksum will return the full object checksum from provided parts. +func (c ChecksumType) FullObjectChecksum(p []ObjectPart) (*Checksum, error) { + if !c.CanMergeCRC() { + return nil, errors.New("cannot merge this checksum type") + } + c = c.Base() + sort.Slice(p, func(i, j int) bool { + return p[i].PartNumber < p[j].PartNumber + }) + + switch len(p) { + case 0: + return nil, errors.New("no parts given") + case 1: + check, err := p[0].ChecksumRaw(c) + if err != nil { + return nil, err + } + return &Checksum{ + Type: c, + r: check, + }, nil + } + var merged uint32 + var merged64 uint64 + first, err := p[0].ChecksumRaw(c) + if err != nil { + return nil, err + } + sz := p[0].Size + switch c { + case ChecksumCRC32, ChecksumCRC32C: + merged = binary.BigEndian.Uint32(first) + case ChecksumCRC64NVME: + merged64 = binary.BigEndian.Uint64(first) + } + + poly32 := uint32(crc32.IEEE) + if c.Is(ChecksumCRC32C) { + poly32 = crc32.Castagnoli + } + for _, part := range p[1:] { + if part.Size == 0 { + continue + } + sz += part.Size + pCrc, err := part.ChecksumRaw(c) + if err != nil { + return nil, err + } + switch c { + case ChecksumCRC32, ChecksumCRC32C: + merged = crc32Combine(poly32, merged, binary.BigEndian.Uint32(pCrc), part.Size) + case ChecksumCRC64NVME: + merged64 = crc64Combine(bits.Reverse64(crc64NVMEPolynomial), merged64, binary.BigEndian.Uint64(pCrc), part.Size) + } + } + var tmp [8]byte + switch c { + case ChecksumCRC32, ChecksumCRC32C: + binary.BigEndian.PutUint32(tmp[:], merged) + return &Checksum{ + Type: c, + r: tmp[:4], + }, nil + case ChecksumCRC64NVME: + binary.BigEndian.PutUint64(tmp[:], merged64) + return &Checksum{ + Type: c, + r: tmp[:8], + }, nil + default: + return nil, errors.New("unknown checksum type") + } +} + +func addAutoChecksumHeaders(opts *PutObjectOptions) { + if opts.UserMetadata == nil { + opts.UserMetadata = make(map[string]string, 1) + } + opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String() + if opts.AutoChecksum.FullObjectRequested() { + opts.UserMetadata["X-Amz-Checksum-Type"] = "FULL_OBJECT" + } +} + +func applyAutoChecksum(opts *PutObjectOptions, allParts []ObjectPart) { + if !opts.AutoChecksum.IsSet() { + return + } + if opts.AutoChecksum.CanComposite() && !opts.AutoChecksum.Is(ChecksumFullObject) { + // Add composite hash of hashes. + crc, err := opts.AutoChecksum.CompositeChecksum(allParts) + if err == nil { + opts.UserMetadata = map[string]string{opts.AutoChecksum.Key(): crc.Encoded()} + } + } else if opts.AutoChecksum.CanMergeCRC() { + crc, err := opts.AutoChecksum.FullObjectChecksum(allParts) + if err == nil { + opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): crc.Encoded(), "X-Amz-Checksum-Type": "FULL_OBJECT"} + } + } +} diff --git a/vendor/github.com/minio/minio-go/v7/functional_tests.go b/vendor/github.com/minio/minio-go/v7/functional_tests.go index 780dc8997..33e87e6e1 100644 --- a/vendor/github.com/minio/minio-go/v7/functional_tests.go +++ b/vendor/github.com/minio/minio-go/v7/functional_tests.go @@ -160,7 +160,7 @@ func logError(testName, function string, args map[string]interface{}, startTime } else { logFailure(testName, function, args, startTime, alert, message, err) if !isRunOnFail() { - panic(err) + panic(fmt.Sprintf("Test failed with message: %s, err: %v", message, err)) } } } @@ -393,6 +393,42 @@ func getFuncNameLoc(caller int) string { return strings.TrimPrefix(runtime.FuncForPC(pc).Name(), "main.") } +type ClientConfig struct { + // MinIO client configuration + TraceOn bool // Turn on tracing of HTTP requests and responses to stderr + CredsV2 bool // Use V2 credentials if true, otherwise use v4 + TrailingHeaders bool // Send trailing headers in requests +} + +func NewClient(config ClientConfig) (*minio.Client, error) { + // Instantiate new MinIO client + var creds *credentials.Credentials + if config.CredsV2 { + creds = credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), "") + } else { + creds = credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), "") + } + opts := &minio.Options{ + Creds: creds, + Transport: createHTTPTransport(), + Secure: mustParseBool(os.Getenv(enableHTTPS)), + TrailingHeaders: config.TrailingHeaders, + } + client, err := minio.New(os.Getenv(serverEndpoint), opts) + if err != nil { + return nil, err + } + + if config.TraceOn { + client.TraceOn(os.Stderr) + } + + // Set user agent. + client.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + + return client, nil +} + // Tests bucket re-create errors. func testMakeBucketError() { region := "eu-central-1" @@ -407,27 +443,12 @@ func testMakeBucketError() { "region": region, } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - Transport: createHTTPTransport(), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -462,20 +483,12 @@ func testMetadataSizeLimit() { "objectName": "", "opts.UserMetadata": "", } - rand.Seed(startTime.Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - Transport: createHTTPTransport(), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client creation failed", err) return } - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -531,27 +544,12 @@ func testMakeBucketRegions() { "region": region, } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -598,27 +596,12 @@ func testPutObjectReadAt() { "opts": "objectContentType", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -697,27 +680,12 @@ func testListObjectVersions() { "recursive": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -817,27 +785,12 @@ func testStatObjectWithVersioning() { function := "StatObject" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -935,27 +888,12 @@ func testGetObjectWithVersioning() { function := "GetObject()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1075,27 +1013,12 @@ func testPutObjectWithVersioning() { function := "GetObject()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1223,28 +1146,12 @@ func testListMultipartUpload() { function := "GetObject()" args := map[string]interface{}{} - // Instantiate new minio client object. - opts := &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - } - c, err := minio.New(os.Getenv(serverEndpoint), opts) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - core, err := minio.NewCore(os.Getenv(serverEndpoint), opts) - if err != nil { - logError(testName, function, args, startTime, "", "MinIO core client object creation failed", err) - return - } - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + core := minio.Core{Client: c} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") @@ -1347,27 +1254,12 @@ func testCopyObjectWithVersioning() { function := "CopyObject()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1485,27 +1377,12 @@ func testConcurrentCopyObjectWithVersioning() { function := "CopyObject()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1646,27 +1523,12 @@ func testComposeObjectWithVersioning() { function := "ComposeObject()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1787,27 +1649,12 @@ func testRemoveObjectWithVersioning() { function := "DeleteObject()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1900,27 +1747,12 @@ func testRemoveObjectsWithVersioning() { function := "DeleteObjects()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -1996,27 +1828,12 @@ func testObjectTaggingWithVersioning() { function := "{Get,Set,Remove}ObjectTagging()" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -2164,27 +1981,12 @@ func testPutObjectWithChecksums() { return } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -2204,9 +2006,13 @@ func testPutObjectWithChecksums() { {cs: minio.ChecksumCRC32}, {cs: minio.ChecksumSHA1}, {cs: minio.ChecksumSHA256}, + {cs: minio.ChecksumCRC64NVME}, } for _, test := range tests { + if os.Getenv("MINT_NO_FULL_OBJECT") != "" && test.cs.FullObjectRequested() { + continue + } bufSize := dataFileMap["datafile-10-kB"] // Save the data @@ -2230,7 +2036,7 @@ func testPutObjectWithChecksums() { h := test.cs.Hasher() h.Reset() - // Test with Wrong CRC. + // Test with a bad CRC - we haven't called h.Write(b), so this is a checksum of empty data meta[test.cs.Key()] = base64.StdEncoding.EncodeToString(h.Sum(nil)) args["metadata"] = meta args["range"] = "false" @@ -2263,6 +2069,7 @@ func testPutObjectWithChecksums() { cmpChecksum(resp.ChecksumSHA1, meta["x-amz-checksum-sha1"]) cmpChecksum(resp.ChecksumCRC32, meta["x-amz-checksum-crc32"]) cmpChecksum(resp.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + cmpChecksum(resp.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"]) // Read the data back gopts := minio.GetObjectOptions{Checksum: true} @@ -2282,6 +2089,7 @@ func testPutObjectWithChecksums() { cmpChecksum(st.ChecksumSHA1, meta["x-amz-checksum-sha1"]) cmpChecksum(st.ChecksumCRC32, meta["x-amz-checksum-crc32"]) cmpChecksum(st.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + cmpChecksum(st.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"]) if st.Size != int64(bufSize) { logError(testName, function, args, startTime, "", "Number of bytes returned by PutObject does not match GetObject, expected "+string(bufSize)+" got "+string(st.Size), err) @@ -2325,12 +2133,12 @@ func testPutObjectWithChecksums() { cmpChecksum(st.ChecksumSHA1, "") cmpChecksum(st.ChecksumCRC32, "") cmpChecksum(st.ChecksumCRC32C, "") + cmpChecksum(st.ChecksumCRC64NVME, "") delete(args, "range") delete(args, "metadata") + logSuccess(testName, function, args, startTime) } - - logSuccess(testName, function, args, startTime) } // Test PutObject with custom checksums. @@ -2350,28 +2158,12 @@ func testPutObjectWithTrailingChecksums() { return } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - TrailingHeaders: true, - }) + c, err := NewClient(ClientConfig{TrailingHeaders: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -2387,13 +2179,16 @@ func testPutObjectWithTrailingChecksums() { tests := []struct { cs minio.ChecksumType }{ + {cs: minio.ChecksumCRC64NVME}, {cs: minio.ChecksumCRC32C}, {cs: minio.ChecksumCRC32}, {cs: minio.ChecksumSHA1}, {cs: minio.ChecksumSHA256}, } - for _, test := range tests { + if os.Getenv("MINT_NO_FULL_OBJECT") != "" && test.cs.FullObjectRequested() { + continue + } function := "PutObject(bucketName, objectName, reader,size, opts)" bufSize := dataFileMap["datafile-10-kB"] @@ -2441,6 +2236,7 @@ func testPutObjectWithTrailingChecksums() { cmpChecksum(resp.ChecksumSHA1, meta["x-amz-checksum-sha1"]) cmpChecksum(resp.ChecksumCRC32, meta["x-amz-checksum-crc32"]) cmpChecksum(resp.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + cmpChecksum(resp.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"]) // Read the data back gopts := minio.GetObjectOptions{Checksum: true} @@ -2461,6 +2257,7 @@ func testPutObjectWithTrailingChecksums() { cmpChecksum(st.ChecksumSHA1, meta["x-amz-checksum-sha1"]) cmpChecksum(st.ChecksumCRC32, meta["x-amz-checksum-crc32"]) cmpChecksum(st.ChecksumCRC32C, meta["x-amz-checksum-crc32c"]) + cmpChecksum(resp.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"]) if st.Size != int64(bufSize) { logError(testName, function, args, startTime, "", "Number of bytes returned by PutObject does not match GetObject, expected "+string(bufSize)+" got "+string(st.Size), err) @@ -2505,6 +2302,7 @@ func testPutObjectWithTrailingChecksums() { cmpChecksum(st.ChecksumSHA1, "") cmpChecksum(st.ChecksumCRC32, "") cmpChecksum(st.ChecksumCRC32C, "") + cmpChecksum(st.ChecksumCRC64NVME, "") function = "GetObjectAttributes(...)" s, err := c.GetObjectAttributes(context.Background(), bucketName, objectName, minio.ObjectAttributesOptions{}) @@ -2519,9 +2317,8 @@ func testPutObjectWithTrailingChecksums() { delete(args, "range") delete(args, "metadata") + logSuccess(testName, function, args, startTime) } - - logSuccess(testName, function, args, startTime) } // Test PutObject with custom checksums. @@ -2533,7 +2330,7 @@ func testPutMultipartObjectWithChecksums(trailing bool) { args := map[string]interface{}{ "bucketName": "", "objectName": "", - "opts": fmt.Sprintf("minio.PutObjectOptions{UserMetadata: metadata, Progress: progress Checksum: %v}", trailing), + "opts": fmt.Sprintf("minio.PutObjectOptions{UserMetadata: metadata, Trailing: %v}", trailing), } if !isFullMode() { @@ -2541,28 +2338,12 @@ func testPutMultipartObjectWithChecksums(trailing bool) { return } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - TrailingHeaders: trailing, - }) + c, err := NewClient(ClientConfig{TrailingHeaders: trailing}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -2574,14 +2355,18 @@ func testPutMultipartObjectWithChecksums(trailing bool) { return } - hashMultiPart := func(b []byte, partSize int, hasher hash.Hash) string { + hashMultiPart := func(b []byte, partSize int, cs minio.ChecksumType) string { r := bytes.NewReader(b) + hasher := cs.Hasher() + if cs.FullObjectRequested() { + partSize = len(b) + } tmp := make([]byte, partSize) parts := 0 var all []byte for { n, err := io.ReadFull(r, tmp) - if err != nil && err != io.ErrUnexpectedEOF { + if err != nil && err != io.ErrUnexpectedEOF && err != io.EOF { logError(testName, function, args, startTime, "", "Calc crc failed", err) } if n == 0 { @@ -2595,6 +2380,9 @@ func testPutMultipartObjectWithChecksums(trailing bool) { break } } + if parts == 1 { + return base64.StdEncoding.EncodeToString(hasher.Sum(nil)) + } hasher.Reset() hasher.Write(all) return fmt.Sprintf("%s-%d", base64.StdEncoding.EncodeToString(hasher.Sum(nil)), parts) @@ -2603,6 +2391,9 @@ func testPutMultipartObjectWithChecksums(trailing bool) { tests := []struct { cs minio.ChecksumType }{ + {cs: minio.ChecksumFullObjectCRC32}, + {cs: minio.ChecksumFullObjectCRC32C}, + {cs: minio.ChecksumCRC64NVME}, {cs: minio.ChecksumCRC32C}, {cs: minio.ChecksumCRC32}, {cs: minio.ChecksumSHA1}, @@ -2610,8 +2401,12 @@ func testPutMultipartObjectWithChecksums(trailing bool) { } for _, test := range tests { - bufSize := dataFileMap["datafile-129-MB"] + if os.Getenv("MINT_NO_FULL_OBJECT") != "" && test.cs.FullObjectRequested() { + continue + } + args["section"] = "prep" + bufSize := dataFileMap["datafile-129-MB"] // Save the data objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "") args["objectName"] = objectName @@ -2620,7 +2415,7 @@ func testPutMultipartObjectWithChecksums(trailing bool) { cmpChecksum := func(got, want string) { if want != got { logError(testName, function, args, startTime, "", "checksum mismatch", fmt.Errorf("want %s, got %s", want, got)) - //fmt.Printf("want %s, got %s\n", want, got) + // fmt.Printf("want %s, got %s\n", want, got) return } } @@ -2635,7 +2430,7 @@ func testPutMultipartObjectWithChecksums(trailing bool) { reader.Close() h := test.cs.Hasher() h.Reset() - want := hashMultiPart(b, partSize, test.cs.Hasher()) + want := hashMultiPart(b, partSize, test.cs) var cs minio.ChecksumType rd := io.Reader(io.NopCloser(bytes.NewReader(b))) @@ -2643,7 +2438,9 @@ func testPutMultipartObjectWithChecksums(trailing bool) { cs = test.cs rd = bytes.NewReader(b) } + // Set correct CRC. + args["section"] = "PutObject" resp, err := c.PutObject(context.Background(), bucketName, objectName, rd, int64(bufSize), minio.PutObjectOptions{ DisableContentSha256: true, DisableMultipart: false, @@ -2657,7 +2454,7 @@ func testPutMultipartObjectWithChecksums(trailing bool) { return } - switch test.cs { + switch test.cs.Base() { case minio.ChecksumCRC32C: cmpChecksum(resp.ChecksumCRC32C, want) case minio.ChecksumCRC32: @@ -2666,15 +2463,41 @@ func testPutMultipartObjectWithChecksums(trailing bool) { cmpChecksum(resp.ChecksumSHA1, want) case minio.ChecksumSHA256: cmpChecksum(resp.ChecksumSHA256, want) + case minio.ChecksumCRC64NVME: + cmpChecksum(resp.ChecksumCRC64NVME, want) } + args["section"] = "HeadObject" + st, err := c.StatObject(context.Background(), bucketName, objectName, minio.StatObjectOptions{Checksum: true}) + if err != nil { + logError(testName, function, args, startTime, "", "StatObject failed", err) + return + } + switch test.cs.Base() { + case minio.ChecksumCRC32C: + cmpChecksum(st.ChecksumCRC32C, want) + case minio.ChecksumCRC32: + cmpChecksum(st.ChecksumCRC32, want) + case minio.ChecksumSHA1: + cmpChecksum(st.ChecksumSHA1, want) + case minio.ChecksumSHA256: + cmpChecksum(st.ChecksumSHA256, want) + case minio.ChecksumCRC64NVME: + cmpChecksum(st.ChecksumCRC64NVME, want) + } + + args["section"] = "GetObjectAttributes" s, err := c.GetObjectAttributes(context.Background(), bucketName, objectName, minio.ObjectAttributesOptions{}) if err != nil { logError(testName, function, args, startTime, "", "GetObjectAttributes failed", err) return } - want = want[:strings.IndexByte(want, '-')] + + if strings.ContainsRune(want, '-') { + want = want[:strings.IndexByte(want, '-')] + } switch test.cs { + // Full Object CRC does not return anything with GetObjectAttributes case minio.ChecksumCRC32C: cmpChecksum(s.Checksum.ChecksumCRC32C, want) case minio.ChecksumCRC32: @@ -2690,13 +2513,14 @@ func testPutMultipartObjectWithChecksums(trailing bool) { gopts.PartNumber = 2 // We cannot use StatObject, since it ignores partnumber. + args["section"] = "GetObject-Part" r, err := c.GetObject(context.Background(), bucketName, objectName, gopts) if err != nil { logError(testName, function, args, startTime, "", "GetObject failed", err) return } io.Copy(io.Discard, r) - st, err := r.Stat() + st, err = r.Stat() if err != nil { logError(testName, function, args, startTime, "", "Stat failed", err) return @@ -2708,6 +2532,7 @@ func testPutMultipartObjectWithChecksums(trailing bool) { want = base64.StdEncoding.EncodeToString(h.Sum(nil)) switch test.cs { + // Full Object CRC does not return any part CRC for whatever reason. case minio.ChecksumCRC32C: cmpChecksum(st.ChecksumCRC32C, want) case minio.ChecksumCRC32: @@ -2716,12 +2541,17 @@ func testPutMultipartObjectWithChecksums(trailing bool) { cmpChecksum(st.ChecksumSHA1, want) case minio.ChecksumSHA256: cmpChecksum(st.ChecksumSHA256, want) + case minio.ChecksumCRC64NVME: + // AWS doesn't return part checksum, but may in the future. + if st.ChecksumCRC64NVME != "" { + cmpChecksum(st.ChecksumCRC64NVME, want) + } } delete(args, "metadata") + delete(args, "section") + logSuccess(testName, function, args, startTime) } - - logSuccess(testName, function, args, startTime) } // Test PutObject with trailing checksums. @@ -2741,25 +2571,12 @@ func testTrailingChecksums() { return } - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - TrailingHeaders: true, - }) + c, err := NewClient(ClientConfig{TrailingHeaders: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -2881,7 +2698,6 @@ func testTrailingChecksums() { test.ChecksumCRC32C = hashMultiPart(b, int(test.PO.PartSize), test.hasher) // Set correct CRC. - // c.TraceOn(os.Stderr) resp, err := c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(b), int64(bufSize), test.PO) if err != nil { logError(testName, function, args, startTime, "", "PutObject failed", err) @@ -2932,6 +2748,7 @@ func testTrailingChecksums() { } delete(args, "metadata") + logSuccess(testName, function, args, startTime) } } @@ -2952,25 +2769,12 @@ func testPutObjectWithAutomaticChecksums() { return } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - TrailingHeaders: true, - }) + c, err := NewClient(ClientConfig{TrailingHeaders: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -2997,8 +2801,6 @@ func testPutObjectWithAutomaticChecksums() { {header: "x-amz-checksum-crc32c", hasher: crc32.New(crc32.MakeTable(crc32.Castagnoli))}, } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) // defer c.TraceOff() for i, test := range tests { @@ -3108,20 +2910,12 @@ func testGetObjectAttributes() { return } - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - TrailingHeaders: true, - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{TrailingHeaders: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName err = c.MakeBucket( @@ -3315,19 +3109,12 @@ func testGetObjectAttributesSSECEncryption() { return } - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - TrailingHeaders: true, - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - Transport: createHTTPTransport(), - }) + c, err := NewClient(ClientConfig{TrailingHeaders: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName err = c.MakeBucket( @@ -3401,19 +3188,12 @@ func testGetObjectAttributesErrorCases() { return } - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - TrailingHeaders: true, - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{TrailingHeaders: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) unknownBucket := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-bucket-") unknownObject := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-object-") @@ -3565,16 +3345,10 @@ func validateObjectAttributeRequest(OA *minio.ObjectAttributes, opts *minio.Obje } } - hasFullObjectChecksum := true - if OA.Checksum.ChecksumCRC32 == "" { - if OA.Checksum.ChecksumCRC32C == "" { - if OA.Checksum.ChecksumSHA1 == "" { - if OA.Checksum.ChecksumSHA256 == "" { - hasFullObjectChecksum = false - } - } - } - } + hasFullObjectChecksum := (OA.Checksum.ChecksumCRC32 != "" || + OA.Checksum.ChecksumCRC32C != "" || + OA.Checksum.ChecksumSHA1 != "" || + OA.Checksum.ChecksumSHA256 != "") if test.HasFullChecksum { if !hasFullObjectChecksum { @@ -3663,27 +3437,12 @@ func testPutObjectWithMetadata() { return } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -3770,27 +3529,12 @@ func testPutObjectWithContentLanguage() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -3840,27 +3584,12 @@ func testPutObjectStreaming() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -3912,27 +3641,12 @@ func testGetObjectSeekEnd() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4035,27 +3749,12 @@ func testGetObjectClosedTwice() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4126,26 +3825,13 @@ func testRemoveObjectsContext() { "bucketName": "", } - // Seed random based on current tie. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Enable tracing, write to stdout. - // c.TraceOn(os.Stderr) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4223,27 +3909,12 @@ func testRemoveMultipleObjects() { "bucketName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - - // Enable tracing, write to stdout. - // c.TraceOn(os.Stderr) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4307,27 +3978,12 @@ func testRemoveMultipleObjectsWithResult() { "bucketName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - - // Enable tracing, write to stdout. - // c.TraceOn(os.Stderr) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4443,27 +4099,12 @@ func testFPutObjectMultipart() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4549,27 +4190,12 @@ func testFPutObject() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") location := "us-east-1" @@ -4719,27 +4345,13 @@ func testFPutObjectContext() { "fileName": "", "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4820,27 +4432,13 @@ func testFPutObjectContextV2() { "objectName": "", "opts": "minio.PutObjectOptions{ContentType:objectContentType}", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4925,24 +4523,12 @@ func testPutObjectContext() { "opts": "", } - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Make a new bucket. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -4995,27 +4581,12 @@ func testGetObjectS3Zip() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{"x-minio-extract": true} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -5179,27 +4750,12 @@ func testGetObjectReadSeekFunctional() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -5349,27 +4905,12 @@ func testGetObjectReadAtFunctional() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -5527,27 +5068,12 @@ func testGetObjectReadAtWhenEOFWasReached() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -5647,27 +5173,12 @@ func testPresignedPostPolicy() { "policy": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") @@ -5695,50 +5206,22 @@ func testPresignedPostPolicy() { return } - // Save the data - _, err = c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(buf), int64(len(buf)), minio.PutObjectOptions{ContentType: "binary/octet-stream"}) - if err != nil { - logError(testName, function, args, startTime, "", "PutObject failed", err) - return - } - policy := minio.NewPostPolicy() - - if err := policy.SetBucket(""); err == nil { - logError(testName, function, args, startTime, "", "SetBucket did not fail for invalid conditions", err) - return - } - if err := policy.SetKey(""); err == nil { - logError(testName, function, args, startTime, "", "SetKey did not fail for invalid conditions", err) - return - } - if err := policy.SetExpires(time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC)); err == nil { - logError(testName, function, args, startTime, "", "SetExpires did not fail for invalid conditions", err) - return - } - if err := policy.SetContentType(""); err == nil { - logError(testName, function, args, startTime, "", "SetContentType did not fail for invalid conditions", err) - return - } - if err := policy.SetContentLengthRange(1024*1024, 1024); err == nil { - logError(testName, function, args, startTime, "", "SetContentLengthRange did not fail for invalid conditions", err) - return - } - if err := policy.SetUserMetadata("", ""); err == nil { - logError(testName, function, args, startTime, "", "SetUserMetadata did not fail for invalid conditions", err) - return - } - policy.SetBucket(bucketName) policy.SetKey(objectName) policy.SetExpires(time.Now().UTC().AddDate(0, 0, 10)) // expires in 10 days policy.SetContentType("binary/octet-stream") policy.SetContentLengthRange(10, 1024*1024) policy.SetUserMetadata(metadataKey, metadataValue) + policy.SetContentEncoding("gzip") // Add CRC32C checksum := minio.ChecksumCRC32C.ChecksumBytes(buf) - policy.SetChecksum(checksum) + err = policy.SetChecksum(checksum) + if err != nil { + logError(testName, function, args, startTime, "", "SetChecksum failed", err) + return + } args["policy"] = policy.String() @@ -5834,7 +5317,7 @@ func testPresignedPostPolicy() { expectedLocation := scheme + os.Getenv(serverEndpoint) + "/" + bucketName + "/" + objectName expectedLocationBucketDNS := scheme + bucketName + "." + os.Getenv(serverEndpoint) + "/" + objectName - if !strings.Contains(expectedLocation, "s3.amazonaws.com/") { + if !strings.Contains(expectedLocation, ".amazonaws.com/") { // Test when not against AWS S3. if val, ok := res.Header["Location"]; ok { if val[0] != expectedLocation && val[0] != expectedLocationBucketDNS { @@ -5846,9 +5329,184 @@ func testPresignedPostPolicy() { return } } - want := checksum.Encoded() - if got := res.Header.Get("X-Amz-Checksum-Crc32c"); got != want { - logError(testName, function, args, startTime, "", fmt.Sprintf("Want checksum %q, got %q", want, got), nil) + wantChecksumCrc32c := checksum.Encoded() + if got := res.Header.Get("X-Amz-Checksum-Crc32c"); got != wantChecksumCrc32c { + logError(testName, function, args, startTime, "", fmt.Sprintf("Want checksum %q, got %q", wantChecksumCrc32c, got), nil) + return + } + + // Ensure that when we subsequently GetObject, the checksum is returned + gopts := minio.GetObjectOptions{Checksum: true} + r, err := c.GetObject(context.Background(), bucketName, objectName, gopts) + if err != nil { + logError(testName, function, args, startTime, "", "GetObject failed", err) + return + } + st, err := r.Stat() + if err != nil { + logError(testName, function, args, startTime, "", "Stat failed", err) + return + } + if st.ChecksumCRC32C != wantChecksumCrc32c { + logError(testName, function, args, startTime, "", fmt.Sprintf("Want checksum %s, got %s", wantChecksumCrc32c, st.ChecksumCRC32C), nil) + return + } + + logSuccess(testName, function, args, startTime) +} + +// testPresignedPostPolicyWrongFile tests that when we have a policy with a checksum, we cannot POST the wrong file +func testPresignedPostPolicyWrongFile() { + // initialize logging params + startTime := time.Now() + testName := getFuncName() + function := "PresignedPostPolicy(policy)" + args := map[string]interface{}{ + "policy": "", + } + + c, err := NewClient(ClientConfig{}) + if err != nil { + logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) + return + } + + // Generate a new random bucket name. + bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") + + // Make a new bucket in 'us-east-1' (source bucket). + err = c.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{Region: "us-east-1"}) + if err != nil { + logError(testName, function, args, startTime, "", "MakeBucket failed", err) + return + } + + defer cleanupBucket(bucketName, c) + + objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "") + // Azure requires the key to not start with a number + metadataKey := randString(60, rand.NewSource(time.Now().UnixNano()), "user") + metadataValue := randString(60, rand.NewSource(time.Now().UnixNano()), "") + + policy := minio.NewPostPolicy() + policy.SetBucket(bucketName) + policy.SetKey(objectName) + policy.SetExpires(time.Now().UTC().AddDate(0, 0, 10)) // expires in 10 days + policy.SetContentType("binary/octet-stream") + policy.SetContentLengthRange(10, 1024*1024) + policy.SetUserMetadata(metadataKey, metadataValue) + + // Add CRC32C of some data that the policy will explicitly allow. + checksum := minio.ChecksumCRC32C.ChecksumBytes([]byte{0x01, 0x02, 0x03}) + err = policy.SetChecksum(checksum) + if err != nil { + logError(testName, function, args, startTime, "", "SetChecksum failed", err) + return + } + + args["policy"] = policy.String() + + presignedPostPolicyURL, formData, err := c.PresignedPostPolicy(context.Background(), policy) + if err != nil { + logError(testName, function, args, startTime, "", "PresignedPostPolicy failed", err) + return + } + + // At this stage, we have a policy that allows us to upload for a specific checksum. + // Test that uploading datafile-10-kB, with a different checksum, fails as expected + filePath := getMintDataDirFilePath("datafile-10-kB") + if filePath == "" { + // Make a temp file with 10 KB data. + file, err := os.CreateTemp(os.TempDir(), "PresignedPostPolicyTest") + if err != nil { + logError(testName, function, args, startTime, "", "TempFile creation failed", err) + return + } + if _, err = io.Copy(file, getDataReader("datafile-10-kB")); err != nil { + logError(testName, function, args, startTime, "", "Copy failed", err) + return + } + if err = file.Close(); err != nil { + logError(testName, function, args, startTime, "", "File Close failed", err) + return + } + filePath = file.Name() + } + fileReader := getDataReader("datafile-10-kB") + defer fileReader.Close() + buf10k, err := io.ReadAll(fileReader) + if err != nil { + logError(testName, function, args, startTime, "", "ReadAll failed", err) + return + } + otherChecksum := minio.ChecksumCRC32C.ChecksumBytes(buf10k) + + var formBuf bytes.Buffer + writer := multipart.NewWriter(&formBuf) + for k, v := range formData { + if k == "x-amz-checksum-crc32c" { + v = otherChecksum.Encoded() + } + writer.WriteField(k, v) + } + + // Add file to post request + f, err := os.Open(filePath) + defer f.Close() + if err != nil { + logError(testName, function, args, startTime, "", "File open failed", err) + return + } + w, err := writer.CreateFormFile("file", filePath) + if err != nil { + logError(testName, function, args, startTime, "", "CreateFormFile failed", err) + return + } + _, err = io.Copy(w, f) + if err != nil { + logError(testName, function, args, startTime, "", "Copy failed", err) + return + } + writer.Close() + + httpClient := &http.Client{ + Timeout: 30 * time.Second, + Transport: createHTTPTransport(), + } + args["url"] = presignedPostPolicyURL.String() + + req, err := http.NewRequest(http.MethodPost, presignedPostPolicyURL.String(), bytes.NewReader(formBuf.Bytes())) + if err != nil { + logError(testName, function, args, startTime, "", "HTTP request failed", err) + return + } + + req.Header.Set("Content-Type", writer.FormDataContentType()) + + // Make the POST request with the form data. + res, err := httpClient.Do(req) + if err != nil { + logError(testName, function, args, startTime, "", "HTTP request failed", err) + return + } + defer res.Body.Close() + if res.StatusCode != http.StatusForbidden { + logError(testName, function, args, startTime, "", "HTTP request unexpected status", errors.New(res.Status)) + return + } + + // Read the response body, ensure it has checksum failure message + resBody, err := io.ReadAll(res.Body) + if err != nil { + logError(testName, function, args, startTime, "", "ReadAll failed", err) + return + } + + // Normalize the response body, because S3 uses quotes around the policy condition components + // in the error message, MinIO does not. + resBodyStr := strings.ReplaceAll(string(resBody), `"`, "") + if !strings.Contains(resBodyStr, "Policy Condition failed: [eq, $x-amz-checksum-crc32c, 8TDyHg=") { + logError(testName, function, args, startTime, "", "Unexpected response body", errors.New(resBodyStr)) return } @@ -5863,27 +5521,12 @@ func testCopyObject() { function := "CopyObject(dst, src)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") @@ -6058,27 +5701,12 @@ func testSSECEncryptedGetObjectReadSeekFunctional() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -6241,27 +5869,12 @@ func testSSES3EncryptedGetObjectReadSeekFunctional() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -6422,27 +6035,12 @@ func testSSECEncryptedGetObjectReadAtFunctional() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -6606,27 +6204,12 @@ func testSSES3EncryptedGetObjectReadAtFunctional() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -6791,27 +6374,13 @@ func testSSECEncryptionPutGet() { "objectName": "", "sse": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -6901,27 +6470,13 @@ func testSSECEncryptionFPut() { "contentType": "", "sse": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -7024,27 +6579,13 @@ func testSSES3EncryptionPutGet() { "objectName": "", "sse": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -7132,27 +6673,13 @@ func testSSES3EncryptionFPut() { "contentType": "", "sse": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -7261,26 +6788,12 @@ func testBucketNotification() { return } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable to debug - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - bucketName := os.Getenv("NOTIFY_BUCKET") args["bucketName"] = bucketName @@ -7356,26 +6869,12 @@ func testFunctional() { functionAll := "" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, nil, startTime, "", "MinIO client object creation failed", err) return } - // Enable to debug - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") @@ -8035,24 +7534,12 @@ func testGetObjectModified() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Make a new bucket. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8131,24 +7618,12 @@ func testPutObjectUploadSeekedObject() { "contentType": "binary/octet-stream", } - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Make a new bucket. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8251,27 +7726,12 @@ func testMakeBucketErrorV2() { "region": "eu-west-1", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") region := "eu-west-1" @@ -8311,27 +7771,12 @@ func testGetObjectClosedTwiceV2() { "region": "eu-west-1", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8402,27 +7847,12 @@ func testFPutObjectV2() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8563,27 +7993,12 @@ func testMakeBucketRegionsV2() { "region": "eu-west-1", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8626,27 +8041,12 @@ func testGetObjectReadSeekFunctionalV2() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8781,27 +8181,12 @@ func testGetObjectReadAtFunctionalV2() { function := "GetObject(bucketName, objectName)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -8943,27 +8328,12 @@ func testCopyObjectV2() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") @@ -9162,13 +8532,7 @@ func testComposeObjectErrorCasesV2() { function := "ComposeObject(destination, sourceList)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9260,13 +8624,7 @@ func testCompose10KSourcesV2() { function := "ComposeObject(destination, sourceList)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9282,13 +8640,7 @@ func testEncryptedEmptyObject() { function := "PutObject(bucketName, objectName, reader, objectSize, opts)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return @@ -9436,7 +8788,7 @@ func testEncryptedCopyObjectWrapper(c *minio.Client, bucketName string, sseSrc, dstEncryption = sseDst } // 3. get copied object and check if content is equal - coreClient := minio.Core{c} + coreClient := minio.Core{Client: c} reader, _, _, err := coreClient.GetObject(context.Background(), bucketName, "dstObject", minio.GetObjectOptions{ServerSideEncryption: dstEncryption}) if err != nil { logError(testName, function, args, startTime, "", "GetObject failed", err) @@ -9543,13 +8895,7 @@ func testUnencryptedToSSECCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9558,7 +8904,6 @@ func testUnencryptedToSSECCopyObject() { bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") sseDst := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"dstObject")) - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, nil, sseDst) } @@ -9570,13 +8915,7 @@ func testUnencryptedToSSES3CopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9586,7 +8925,6 @@ func testUnencryptedToSSES3CopyObject() { var sseSrc encrypt.ServerSide sseDst := encrypt.NewSSE() - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9598,13 +8936,7 @@ func testUnencryptedToUnencryptedCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9613,7 +8945,6 @@ func testUnencryptedToUnencryptedCopyObject() { bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") var sseSrc, sseDst encrypt.ServerSide - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9625,13 +8956,7 @@ func testEncryptedSSECToSSECCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9641,7 +8966,6 @@ func testEncryptedSSECToSSECCopyObject() { sseSrc := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"srcObject")) sseDst := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"dstObject")) - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9653,13 +8977,7 @@ func testEncryptedSSECToSSES3CopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9669,7 +8987,6 @@ func testEncryptedSSECToSSES3CopyObject() { sseSrc := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"srcObject")) sseDst := encrypt.NewSSE() - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9681,13 +8998,7 @@ func testEncryptedSSECToUnencryptedCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9697,7 +9008,6 @@ func testEncryptedSSECToUnencryptedCopyObject() { sseSrc := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"srcObject")) var sseDst encrypt.ServerSide - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9709,13 +9019,7 @@ func testEncryptedSSES3ToSSECCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9725,7 +9029,6 @@ func testEncryptedSSES3ToSSECCopyObject() { sseSrc := encrypt.NewSSE() sseDst := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"dstObject")) - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9737,13 +9040,7 @@ func testEncryptedSSES3ToSSES3CopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9753,7 +9050,6 @@ func testEncryptedSSES3ToSSES3CopyObject() { sseSrc := encrypt.NewSSE() sseDst := encrypt.NewSSE() - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9765,13 +9061,7 @@ func testEncryptedSSES3ToUnencryptedCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9781,7 +9071,6 @@ func testEncryptedSSES3ToUnencryptedCopyObject() { sseSrc := encrypt.NewSSE() var sseDst encrypt.ServerSide - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9793,13 +9082,7 @@ func testEncryptedCopyObjectV2() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9809,7 +9092,6 @@ func testEncryptedCopyObjectV2() { sseSrc := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"srcObject")) sseDst := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"dstObject")) - // c.TraceOn(os.Stderr) testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst) } @@ -9820,13 +9102,7 @@ func testDecryptedCopyObject() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return @@ -9880,26 +9156,14 @@ func testSSECMultipartEncryptedToSSECCopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -10078,26 +9342,14 @@ func testSSECEncryptedToSSECCopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -10256,26 +9508,14 @@ func testSSECEncryptedToUnencryptedCopyPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -10433,26 +9673,14 @@ func testSSECEncryptedToSSES3CopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -10613,26 +9841,14 @@ func testUnencryptedToSSECCopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -10788,26 +10004,14 @@ func testUnencryptedToUnencryptedCopyPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -10959,26 +10163,14 @@ func testUnencryptedToSSES3CopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -11132,26 +10324,14 @@ func testSSES3EncryptedToSSECCopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -11308,26 +10488,14 @@ func testSSES3EncryptedToUnencryptedCopyPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -11480,26 +10648,14 @@ func testSSES3EncryptedToSSES3CopyObjectPart() { function := "CopyObjectPart(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - client, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + client, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return } // Instantiate new core client object. - c := minio.Core{client} - - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) + c := minio.Core{Client: client} // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test") @@ -11654,19 +10810,12 @@ func testUserMetadataCopying() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // c.TraceOn(os.Stderr) testUserMetadataCopyingWrapper(c) } @@ -11831,19 +10980,12 @@ func testUserMetadataCopyingV2() { function := "CopyObject(destination, source)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) return } - // c.TraceOn(os.Stderr) testUserMetadataCopyingWrapper(c) } @@ -11854,13 +10996,7 @@ func testStorageClassMetadataPutObject() { args := map[string]interface{}{} testName := getFuncName() - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return @@ -11942,13 +11078,7 @@ func testStorageClassInvalidMetadataPutObject() { args := map[string]interface{}{} testName := getFuncName() - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return @@ -11985,13 +11115,7 @@ func testStorageClassMetadataCopyObject() { args := map[string]interface{}{} testName := getFuncName() - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - Transport: createHTTPTransport(), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err) return @@ -12112,27 +11236,12 @@ func testPutObjectNoLengthV2() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { - logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) + logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -12188,27 +11297,12 @@ func testPutObjectsUnknownV2() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { - logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) + logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -12279,27 +11373,12 @@ func testPutObject0ByteV2() { "opts": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { - logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) + logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -12344,13 +11423,7 @@ func testComposeObjectErrorCases() { function := "ComposeObject(destination, sourceList)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return @@ -12367,13 +11440,7 @@ func testCompose10KSources() { function := "ComposeObject(destination, sourceList)" args := map[string]interface{}{} - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return @@ -12391,26 +11458,12 @@ func testFunctionalV2() { functionAll := "" args := map[string]interface{}{} - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - Transport: createHTTPTransport(), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) return } - // Enable to debug - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") location := "us-east-1" @@ -12844,27 +11897,13 @@ func testGetObjectContext() { "bucketName": "", "objectName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -12947,27 +11986,13 @@ func testFGetObjectContext() { "objectName": "", "fileName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -13039,24 +12064,12 @@ func testGetObjectRanges() { defer cancel() rng := rand.NewSource(time.Now().UnixNano()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rng, "minio-go-test-") args["bucketName"] = bucketName @@ -13146,27 +12159,13 @@ func testGetObjectACLContext() { "bucketName": "", "objectName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -13324,24 +12323,12 @@ func testPutObjectContextV2() { "size": "", "opts": "", } - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { - logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) + logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Make a new bucket. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -13396,27 +12383,13 @@ func testGetObjectContextV2() { "bucketName": "", "objectName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { - logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) + logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -13497,27 +12470,13 @@ func testFGetObjectContextV2() { "objectName": "", "fileName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{CredsV2: true}) if err != nil { - logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err) + logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -13586,27 +12545,13 @@ func testListObjects() { "objectPrefix": "", "recursive": "true", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -13690,24 +12635,12 @@ func testCors() { "cors": "", } - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Create or reuse a bucket that will get cors settings applied to it and deleted when done bucketName := os.Getenv("MINIO_GO_TEST_BUCKET_CORS") if bucketName == "" { @@ -14426,24 +13359,12 @@ func testCorsSetGetDelete() { "cors": "", } - // Instantiate new minio client object - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -14525,27 +13446,13 @@ func testRemoveObjects() { "objectPrefix": "", "recursive": "true", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -14659,27 +13566,13 @@ func testGetBucketTagging() { args := map[string]interface{}{ "bucketName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -14715,27 +13608,13 @@ func testSetBucketTagging() { "bucketName": "", "tags": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -14801,27 +13680,13 @@ func testRemoveBucketTagging() { args := map[string]interface{}{ "bucketName": "", } - // Seed random based on current time. - rand.Seed(time.Now().Unix()) - // Instantiate new minio client object. - c, err := minio.New(os.Getenv(serverEndpoint), - &minio.Options{ - Creds: credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""), - Transport: createHTTPTransport(), - Secure: mustParseBool(os.Getenv(enableHTTPS)), - }) + c, err := NewClient(ClientConfig{}) if err != nil { logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err) return } - // Enable tracing, write to stderr. - // c.TraceOn(os.Stderr) - - // Set user agent. - c.SetAppInfo("MinIO-go-FunctionalTest", appVersion) - // Generate a new random bucket name. bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-") args["bucketName"] = bucketName @@ -14967,6 +13832,7 @@ func main() { testGetObjectReadAtFunctional() testGetObjectReadAtWhenEOFWasReached() testPresignedPostPolicy() + testPresignedPostPolicyWrongFile() testCopyObject() testComposeObjectErrorCases() testCompose10KSources() diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_web_identity.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_web_identity.go index 596d95152..8c06bac60 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_web_identity.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_web_identity.go @@ -25,6 +25,7 @@ import ( "io" "net/http" "net/url" + "os" "strconv" "strings" "time" @@ -57,9 +58,10 @@ type WebIdentityResult struct { // WebIdentityToken - web identity token with expiry. type WebIdentityToken struct { - Token string - AccessToken string - Expiry int + Token string + AccessToken string + RefreshToken string + Expiry int } // A STSWebIdentity retrieves credentials from MinIO service, and keeps track if @@ -85,29 +87,59 @@ type STSWebIdentity struct { // assuming. RoleARN string + // Policy is the policy where the credentials should be limited too. + Policy string + // roleSessionName is the identifier for the assumed role session. roleSessionName string } // NewSTSWebIdentity returns a pointer to a new // Credentials object wrapping the STSWebIdentity. -func NewSTSWebIdentity(stsEndpoint string, getWebIDTokenExpiry func() (*WebIdentityToken, error)) (*Credentials, error) { +func NewSTSWebIdentity(stsEndpoint string, getWebIDTokenExpiry func() (*WebIdentityToken, error), opts ...func(*STSWebIdentity)) (*Credentials, error) { if stsEndpoint == "" { return nil, errors.New("STS endpoint cannot be empty") } if getWebIDTokenExpiry == nil { return nil, errors.New("Web ID token and expiry retrieval function should be defined") } - return New(&STSWebIdentity{ + i := &STSWebIdentity{ Client: &http.Client{ Transport: http.DefaultTransport, }, STSEndpoint: stsEndpoint, GetWebIDTokenExpiry: getWebIDTokenExpiry, - }), nil + } + for _, o := range opts { + o(i) + } + return New(i), nil +} + +// NewKubernetesIdentity returns a pointer to a new +// Credentials object using the Kubernetes service account +func NewKubernetesIdentity(stsEndpoint string, opts ...func(*STSWebIdentity)) (*Credentials, error) { + return NewSTSWebIdentity(stsEndpoint, func() (*WebIdentityToken, error) { + token, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token") + if err != nil { + return nil, err + } + + return &WebIdentityToken{ + Token: string(token), + }, nil + }, opts...) +} + +// WithPolicy option will enforce that the returned credentials +// will be scoped down to the specified policy +func WithPolicy(policy string) func(*STSWebIdentity) { + return func(i *STSWebIdentity) { + i.Policy = policy + } } -func getWebIdentityCredentials(clnt *http.Client, endpoint, roleARN, roleSessionName string, +func getWebIdentityCredentials(clnt *http.Client, endpoint, roleARN, roleSessionName string, policy string, getWebIDTokenExpiry func() (*WebIdentityToken, error), ) (AssumeRoleWithWebIdentityResponse, error) { idToken, err := getWebIDTokenExpiry() @@ -130,9 +162,16 @@ func getWebIdentityCredentials(clnt *http.Client, endpoint, roleARN, roleSession // Usually set when server is using extended userInfo endpoint. v.Set("WebIdentityAccessToken", idToken.AccessToken) } + if idToken.RefreshToken != "" { + // Usually set when server is using extended userInfo endpoint. + v.Set("WebIdentityRefreshToken", idToken.RefreshToken) + } if idToken.Expiry > 0 { v.Set("DurationSeconds", fmt.Sprintf("%d", idToken.Expiry)) } + if policy != "" { + v.Set("Policy", policy) + } v.Set("Version", STSVersion) u, err := url.Parse(endpoint) @@ -183,7 +222,7 @@ func getWebIdentityCredentials(clnt *http.Client, endpoint, roleARN, roleSession // Retrieve retrieves credentials from the MinIO service. // Error will be returned if the request fails. func (m *STSWebIdentity) Retrieve() (Value, error) { - a, err := getWebIdentityCredentials(m.Client, m.STSEndpoint, m.RoleARN, m.roleSessionName, m.GetWebIDTokenExpiry) + a, err := getWebIdentityCredentials(m.Client, m.STSEndpoint, m.RoleARN, m.roleSessionName, m.Policy, m.GetWebIDTokenExpiry) if err != nil { return Value{}, err } diff --git a/vendor/github.com/minio/minio-go/v7/pkg/lifecycle/lifecycle.go b/vendor/github.com/minio/minio-go/v7/pkg/lifecycle/lifecycle.go index e706b57de..344af2b78 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/lifecycle/lifecycle.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/lifecycle/lifecycle.go @@ -434,12 +434,34 @@ func (de DelMarkerExpiration) MarshalXML(enc *xml.Encoder, start xml.StartElemen return enc.EncodeElement(delMarkerExp(de), start) } +// AllVersionsExpiration represents AllVersionsExpiration actions element in an ILM policy +type AllVersionsExpiration struct { + XMLName xml.Name `xml:"AllVersionsExpiration" json:"-"` + Days int `xml:"Days,omitempty" json:"Days,omitempty"` + DeleteMarker ExpireDeleteMarker `xml:"DeleteMarker,omitempty" json:"DeleteMarker,omitempty"` +} + +// IsNull returns true if days field is 0 +func (e AllVersionsExpiration) IsNull() bool { + return e.Days == 0 +} + +// MarshalXML satisfies xml.Marshaler to provide custom encoding +func (e AllVersionsExpiration) MarshalXML(enc *xml.Encoder, start xml.StartElement) error { + if e.IsNull() { + return nil + } + type allVersionsExp AllVersionsExpiration + return enc.EncodeElement(allVersionsExp(e), start) +} + // MarshalJSON customizes json encoding by omitting empty values func (r Rule) MarshalJSON() ([]byte, error) { type rule struct { AbortIncompleteMultipartUpload *AbortIncompleteMultipartUpload `json:"AbortIncompleteMultipartUpload,omitempty"` Expiration *Expiration `json:"Expiration,omitempty"` DelMarkerExpiration *DelMarkerExpiration `json:"DelMarkerExpiration,omitempty"` + AllVersionsExpiration *AllVersionsExpiration `json:"AllVersionsExpiration,omitempty"` ID string `json:"ID"` RuleFilter *Filter `json:"Filter,omitempty"` NoncurrentVersionExpiration *NoncurrentVersionExpiration `json:"NoncurrentVersionExpiration,omitempty"` @@ -475,6 +497,9 @@ func (r Rule) MarshalJSON() ([]byte, error) { if !r.NoncurrentVersionTransition.isNull() { newr.NoncurrentVersionTransition = &r.NoncurrentVersionTransition } + if !r.AllVersionsExpiration.IsNull() { + newr.AllVersionsExpiration = &r.AllVersionsExpiration + } return json.Marshal(newr) } @@ -485,6 +510,7 @@ type Rule struct { AbortIncompleteMultipartUpload AbortIncompleteMultipartUpload `xml:"AbortIncompleteMultipartUpload,omitempty" json:"AbortIncompleteMultipartUpload,omitempty"` Expiration Expiration `xml:"Expiration,omitempty" json:"Expiration,omitempty"` DelMarkerExpiration DelMarkerExpiration `xml:"DelMarkerExpiration,omitempty" json:"DelMarkerExpiration,omitempty"` + AllVersionsExpiration AllVersionsExpiration `xml:"AllVersionsExpiration,omitempty" json:"AllVersionsExpiration,omitempty"` ID string `xml:"ID" json:"ID"` RuleFilter Filter `xml:"Filter,omitempty" json:"Filter,omitempty"` NoncurrentVersionExpiration NoncurrentVersionExpiration `xml:"NoncurrentVersionExpiration,omitempty" json:"NoncurrentVersionExpiration,omitempty"` diff --git a/vendor/github.com/minio/minio-go/v7/pkg/tags/tags.go b/vendor/github.com/minio/minio-go/v7/pkg/tags/tags.go index 7a84a6f34..33465c632 100644 --- a/vendor/github.com/minio/minio-go/v7/pkg/tags/tags.go +++ b/vendor/github.com/minio/minio-go/v7/pkg/tags/tags.go @@ -69,7 +69,7 @@ const ( // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions // borrowed from this article and also testing various ASCII characters following regex // is supported by AWS S3 for both tags and values. -var validTagKeyValue = regexp.MustCompile(`^[a-zA-Z0-9-+\-._:/@ ]+$`) +var validTagKeyValue = regexp.MustCompile(`^[a-zA-Z0-9-+\-._:/@ =]+$`) func checkKey(key string) error { if len(key) == 0 { diff --git a/vendor/github.com/minio/minio-go/v7/post-policy.go b/vendor/github.com/minio/minio-go/v7/post-policy.go index 19687e027..26bf441b5 100644 --- a/vendor/github.com/minio/minio-go/v7/post-policy.go +++ b/vendor/github.com/minio/minio-go/v7/post-policy.go @@ -85,7 +85,7 @@ func (p *PostPolicy) SetExpires(t time.Time) error { // SetKey - Sets an object name for the policy based upload. func (p *PostPolicy) SetKey(key string) error { - if strings.TrimSpace(key) == "" || key == "" { + if strings.TrimSpace(key) == "" { return errInvalidArgument("Object name is empty.") } policyCond := policyCondition{ @@ -118,7 +118,7 @@ func (p *PostPolicy) SetKeyStartsWith(keyStartsWith string) error { // SetBucket - Sets bucket at which objects will be uploaded to. func (p *PostPolicy) SetBucket(bucketName string) error { - if strings.TrimSpace(bucketName) == "" || bucketName == "" { + if strings.TrimSpace(bucketName) == "" { return errInvalidArgument("Bucket name is empty.") } policyCond := policyCondition{ @@ -135,7 +135,7 @@ func (p *PostPolicy) SetBucket(bucketName string) error { // SetCondition - Sets condition for credentials, date and algorithm func (p *PostPolicy) SetCondition(matchType, condition, value string) error { - if strings.TrimSpace(value) == "" || value == "" { + if strings.TrimSpace(value) == "" { return errInvalidArgument("No value specified for condition") } @@ -156,7 +156,7 @@ func (p *PostPolicy) SetCondition(matchType, condition, value string) error { // SetTagging - Sets tagging for the object for this policy based upload. func (p *PostPolicy) SetTagging(tagging string) error { - if strings.TrimSpace(tagging) == "" || tagging == "" { + if strings.TrimSpace(tagging) == "" { return errInvalidArgument("No tagging specified.") } _, err := tags.ParseObjectXML(strings.NewReader(tagging)) @@ -178,7 +178,7 @@ func (p *PostPolicy) SetTagging(tagging string) error { // SetContentType - Sets content-type of the object for this policy // based upload. func (p *PostPolicy) SetContentType(contentType string) error { - if strings.TrimSpace(contentType) == "" || contentType == "" { + if strings.TrimSpace(contentType) == "" { return errInvalidArgument("No content type specified.") } policyCond := policyCondition{ @@ -211,7 +211,7 @@ func (p *PostPolicy) SetContentTypeStartsWith(contentTypeStartsWith string) erro // SetContentDisposition - Sets content-disposition of the object for this policy func (p *PostPolicy) SetContentDisposition(contentDisposition string) error { - if strings.TrimSpace(contentDisposition) == "" || contentDisposition == "" { + if strings.TrimSpace(contentDisposition) == "" { return errInvalidArgument("No content disposition specified.") } policyCond := policyCondition{ @@ -226,27 +226,44 @@ func (p *PostPolicy) SetContentDisposition(contentDisposition string) error { return nil } +// SetContentEncoding - Sets content-encoding of the object for this policy +func (p *PostPolicy) SetContentEncoding(contentEncoding string) error { + if strings.TrimSpace(contentEncoding) == "" { + return errInvalidArgument("No content encoding specified.") + } + policyCond := policyCondition{ + matchType: "eq", + condition: "$Content-Encoding", + value: contentEncoding, + } + if err := p.addNewPolicy(policyCond); err != nil { + return err + } + p.formData["Content-Encoding"] = contentEncoding + return nil +} + // SetContentLengthRange - Set new min and max content length // condition for all incoming uploads. -func (p *PostPolicy) SetContentLengthRange(min, max int64) error { - if min > max { +func (p *PostPolicy) SetContentLengthRange(minLen, maxLen int64) error { + if minLen > maxLen { return errInvalidArgument("Minimum limit is larger than maximum limit.") } - if min < 0 { + if minLen < 0 { return errInvalidArgument("Minimum limit cannot be negative.") } - if max <= 0 { + if maxLen <= 0 { return errInvalidArgument("Maximum limit cannot be non-positive.") } - p.contentLengthRange.min = min - p.contentLengthRange.max = max + p.contentLengthRange.min = minLen + p.contentLengthRange.max = maxLen return nil } // SetSuccessActionRedirect - Sets the redirect success url of the object for this policy // based upload. func (p *PostPolicy) SetSuccessActionRedirect(redirect string) error { - if strings.TrimSpace(redirect) == "" || redirect == "" { + if strings.TrimSpace(redirect) == "" { return errInvalidArgument("Redirect is empty") } policyCond := policyCondition{ @@ -264,7 +281,7 @@ func (p *PostPolicy) SetSuccessActionRedirect(redirect string) error { // SetSuccessStatusAction - Sets the status success code of the object for this policy // based upload. func (p *PostPolicy) SetSuccessStatusAction(status string) error { - if strings.TrimSpace(status) == "" || status == "" { + if strings.TrimSpace(status) == "" { return errInvalidArgument("Status is empty") } policyCond := policyCondition{ @@ -282,10 +299,10 @@ func (p *PostPolicy) SetSuccessStatusAction(status string) error { // SetUserMetadata - Set user metadata as a key/value couple. // Can be retrieved through a HEAD request or an event. func (p *PostPolicy) SetUserMetadata(key, value string) error { - if strings.TrimSpace(key) == "" || key == "" { + if strings.TrimSpace(key) == "" { return errInvalidArgument("Key is empty") } - if strings.TrimSpace(value) == "" || value == "" { + if strings.TrimSpace(value) == "" { return errInvalidArgument("Value is empty") } headerName := fmt.Sprintf("x-amz-meta-%s", key) @@ -304,7 +321,7 @@ func (p *PostPolicy) SetUserMetadata(key, value string) error { // SetUserMetadataStartsWith - Set how an user metadata should starts with. // Can be retrieved through a HEAD request or an event. func (p *PostPolicy) SetUserMetadataStartsWith(key, value string) error { - if strings.TrimSpace(key) == "" || key == "" { + if strings.TrimSpace(key) == "" { return errInvalidArgument("Key is empty") } headerName := fmt.Sprintf("x-amz-meta-%s", key) @@ -321,11 +338,29 @@ func (p *PostPolicy) SetUserMetadataStartsWith(key, value string) error { } // SetChecksum sets the checksum of the request. -func (p *PostPolicy) SetChecksum(c Checksum) { +func (p *PostPolicy) SetChecksum(c Checksum) error { if c.IsSet() { p.formData[amzChecksumAlgo] = c.Type.String() p.formData[c.Type.Key()] = c.Encoded() + + policyCond := policyCondition{ + matchType: "eq", + condition: fmt.Sprintf("$%s", amzChecksumAlgo), + value: c.Type.String(), + } + if err := p.addNewPolicy(policyCond); err != nil { + return err + } + policyCond = policyCondition{ + matchType: "eq", + condition: fmt.Sprintf("$%s", c.Type.Key()), + value: c.Encoded(), + } + if err := p.addNewPolicy(policyCond); err != nil { + return err + } } + return nil } // SetEncryption - sets encryption headers for POST API diff --git a/vendor/github.com/minio/minio-go/v7/retry-continous.go b/vendor/github.com/minio/minio-go/v7/retry-continous.go index bfeea95f3..81fcf16f1 100644 --- a/vendor/github.com/minio/minio-go/v7/retry-continous.go +++ b/vendor/github.com/minio/minio-go/v7/retry-continous.go @@ -20,7 +20,7 @@ package minio import "time" // newRetryTimerContinous creates a timer with exponentially increasing delays forever. -func (c *Client) newRetryTimerContinous(unit, cap time.Duration, jitter float64, doneCh chan struct{}) <-chan int { +func (c *Client) newRetryTimerContinous(baseSleep, maxSleep time.Duration, jitter float64, doneCh chan struct{}) <-chan int { attemptCh := make(chan int) // normalize jitter to the range [0, 1.0] @@ -39,10 +39,10 @@ func (c *Client) newRetryTimerContinous(unit, cap time.Duration, jitter float64, if attempt > maxAttempt { attempt = maxAttempt } - // sleep = random_between(0, min(cap, base * 2 ** attempt)) - sleep := unit * time.Duration(1< cap { - sleep = cap + // sleep = random_between(0, min(maxSleep, base * 2 ** attempt)) + sleep := baseSleep * time.Duration(1< maxSleep { + sleep = maxSleep } if jitter != NoJitter { sleep -= time.Duration(c.random.Float64() * float64(sleep) * jitter) diff --git a/vendor/github.com/minio/minio-go/v7/retry.go b/vendor/github.com/minio/minio-go/v7/retry.go index d15eb5901..4cc45920c 100644 --- a/vendor/github.com/minio/minio-go/v7/retry.go +++ b/vendor/github.com/minio/minio-go/v7/retry.go @@ -45,7 +45,7 @@ var DefaultRetryCap = time.Second // newRetryTimer creates a timer with exponentially increasing // delays until the maximum retry attempts are reached. -func (c *Client) newRetryTimer(ctx context.Context, maxRetry int, unit, cap time.Duration, jitter float64) <-chan int { +func (c *Client) newRetryTimer(ctx context.Context, maxRetry int, baseSleep, maxSleep time.Duration, jitter float64) <-chan int { attemptCh := make(chan int) // computes the exponential backoff duration according to @@ -59,10 +59,10 @@ func (c *Client) newRetryTimer(ctx context.Context, maxRetry int, unit, cap time jitter = MaxJitter } - // sleep = random_between(0, min(cap, base * 2 ** attempt)) - sleep := unit * time.Duration(1< cap { - sleep = cap + // sleep = random_between(0, min(maxSleep, base * 2 ** attempt)) + sleep := baseSleep * time.Duration(1< maxSleep { + sleep = maxSleep } if jitter != NoJitter { sleep -= time.Duration(c.random.Float64() * float64(sleep) * jitter) diff --git a/vendor/github.com/minio/minio-go/v7/utils.go b/vendor/github.com/minio/minio-go/v7/utils.go index a5beb371f..cd7d2c27e 100644 --- a/vendor/github.com/minio/minio-go/v7/utils.go +++ b/vendor/github.com/minio/minio-go/v7/utils.go @@ -378,10 +378,11 @@ func ToObjectInfo(bucketName, objectName string, h http.Header) (ObjectInfo, err Restore: restore, // Checksum values - ChecksumCRC32: h.Get("x-amz-checksum-crc32"), - ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"), - ChecksumSHA1: h.Get("x-amz-checksum-sha1"), - ChecksumSHA256: h.Get("x-amz-checksum-sha256"), + ChecksumCRC32: h.Get(ChecksumCRC32.Key()), + ChecksumCRC32C: h.Get(ChecksumCRC32C.Key()), + ChecksumSHA1: h.Get(ChecksumSHA1.Key()), + ChecksumSHA256: h.Get(ChecksumSHA256.Key()), + ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()), }, nil } @@ -698,3 +699,146 @@ func (h *hashReaderWrapper) Read(p []byte) (n int, err error) { } return n, err } + +// Following is ported from C to Go in 2016 by Justin Ruggles, with minimal alteration. +// Used uint for unsigned long. Used uint32 for input arguments in order to match +// the Go hash/crc32 package. zlib CRC32 combine (https://github.com/madler/zlib) +// Modified for hash/crc64 by Klaus Post, 2024. +func gf2MatrixTimes(mat []uint64, vec uint64) uint64 { + var sum uint64 + + for vec != 0 { + if vec&1 != 0 { + sum ^= mat[0] + } + vec >>= 1 + mat = mat[1:] + } + return sum +} + +func gf2MatrixSquare(square, mat []uint64) { + if len(square) != len(mat) { + panic("square matrix size mismatch") + } + for n := range mat { + square[n] = gf2MatrixTimes(mat, mat[n]) + } +} + +// crc32Combine returns the combined CRC-32 hash value of the two passed CRC-32 +// hash values crc1 and crc2. poly represents the generator polynomial +// and len2 specifies the byte length that the crc2 hash covers. +func crc32Combine(poly uint32, crc1, crc2 uint32, len2 int64) uint32 { + // degenerate case (also disallow negative lengths) + if len2 <= 0 { + return crc1 + } + + even := make([]uint64, 32) // even-power-of-two zeros operator + odd := make([]uint64, 32) // odd-power-of-two zeros operator + + // put operator for one zero bit in odd + odd[0] = uint64(poly) // CRC-32 polynomial + row := uint64(1) + for n := 1; n < 32; n++ { + odd[n] = row + row <<= 1 + } + + // put operator for two zero bits in even + gf2MatrixSquare(even, odd) + + // put operator for four zero bits in odd + gf2MatrixSquare(odd, even) + + // apply len2 zeros to crc1 (first square will put the operator for one + // zero byte, eight zero bits, in even) + crc1n := uint64(crc1) + for { + // apply zeros operator for this bit of len2 + gf2MatrixSquare(even, odd) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(even, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + + // another iteration of the loop with odd and even swapped + gf2MatrixSquare(odd, even) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(odd, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + } + + // return combined crc + crc1n ^= uint64(crc2) + return uint32(crc1n) +} + +func crc64Combine(poly uint64, crc1, crc2 uint64, len2 int64) uint64 { + // degenerate case (also disallow negative lengths) + if len2 <= 0 { + return crc1 + } + + even := make([]uint64, 64) // even-power-of-two zeros operator + odd := make([]uint64, 64) // odd-power-of-two zeros operator + + // put operator for one zero bit in odd + odd[0] = poly // CRC-64 polynomial + row := uint64(1) + for n := 1; n < 64; n++ { + odd[n] = row + row <<= 1 + } + + // put operator for two zero bits in even + gf2MatrixSquare(even, odd) + + // put operator for four zero bits in odd + gf2MatrixSquare(odd, even) + + // apply len2 zeros to crc1 (first square will put the operator for one + // zero byte, eight zero bits, in even) + crc1n := crc1 + for { + // apply zeros operator for this bit of len2 + gf2MatrixSquare(even, odd) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(even, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + + // another iteration of the loop with odd and even swapped + gf2MatrixSquare(odd, even) + if len2&1 != 0 { + crc1n = gf2MatrixTimes(odd, crc1n) + } + len2 >>= 1 + + // if no more bits set, then done + if len2 == 0 { + break + } + } + + // return combined crc + crc1n ^= crc2 + return crc1n +} diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/agent/agent.go b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/agent/agent.go index 47fab9897..cffa7dfc1 100644 --- a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/agent/agent.go +++ b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/agent/agent.go @@ -214,6 +214,7 @@ func FlowsAgent(cfg *Config) (*Flows, error) { FilterIPCIDR: r.FilterIPCIDR, FilterProtocol: r.FilterProtocol, FilterPeerIP: r.FilterPeerIP, + FilterPeerCIDR: r.FilterPeerCIDR, FilterDestinationPort: tracer.ConvertFilterPortsToInstr(r.FilterDestinationPort, r.FilterDestinationPortRange, r.FilterDestinationPorts), FilterSourcePort: tracer.ConvertFilterPortsToInstr(r.FilterSourcePort, r.FilterSourcePortRange, r.FilterSourcePorts), FilterPort: tracer.ConvertFilterPortsToInstr(r.FilterPort, r.FilterPortRange, r.FilterPorts), diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/agent/config.go b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/agent/config.go index 9968b01a5..84677bf90 100644 --- a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/agent/config.go +++ b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/agent/config.go @@ -77,6 +77,9 @@ type FlowFilter struct { FilterDrops bool `json:"drops,omitempty"` // FilterSample is the sample rate this matching flow will use FilterSample uint32 `json:"sample,omitempty"` + // FilterPeerCIDR is the PeerIP CIDR to filter flows. + // Example: 10.10.10.0/24 or 100:100:100:100::/64, default is 0.0.0.0/0 + FilterPeerCIDR string `json:"peer_cidr,omitempty"` } type Config struct { diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/agent/packets_agent.go b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/agent/packets_agent.go index 1157901c3..b3b95ea34 100644 --- a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/agent/packets_agent.go +++ b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/agent/packets_agent.go @@ -78,27 +78,26 @@ func PacketsAgent(cfg *Config) (*Packets, error) { debug = true } filterRules := make([]*tracer.FilterConfig, 0) - if cfg.EnableFlowFilter { - var flowFilters []*FlowFilter - if err := json.Unmarshal([]byte(cfg.FlowFilterRules), &flowFilters); err != nil { - return nil, err - } + var flowFilters []*FlowFilter + if err := json.Unmarshal([]byte(cfg.FlowFilterRules), &flowFilters); err != nil { + return nil, err + } - for _, r := range flowFilters { - filterRules = append(filterRules, &tracer.FilterConfig{ - FilterAction: r.FilterAction, - FilterDirection: r.FilterDirection, - FilterIPCIDR: r.FilterIPCIDR, - FilterProtocol: r.FilterProtocol, - FilterPeerIP: r.FilterPeerIP, - FilterDestinationPort: tracer.ConvertFilterPortsToInstr(r.FilterDestinationPort, r.FilterDestinationPortRange, r.FilterDestinationPorts), - FilterSourcePort: tracer.ConvertFilterPortsToInstr(r.FilterSourcePort, r.FilterSourcePortRange, r.FilterSourcePorts), - FilterPort: tracer.ConvertFilterPortsToInstr(r.FilterPort, r.FilterPortRange, r.FilterPorts), - FilterTCPFlags: r.FilterTCPFlags, - FilterDrops: r.FilterDrops, - FilterSample: r.FilterSample, - }) - } + for _, r := range flowFilters { + filterRules = append(filterRules, &tracer.FilterConfig{ + FilterAction: r.FilterAction, + FilterDirection: r.FilterDirection, + FilterIPCIDR: r.FilterIPCIDR, + FilterProtocol: r.FilterProtocol, + FilterPeerIP: r.FilterPeerIP, + FilterPeerCIDR: r.FilterPeerCIDR, + FilterDestinationPort: tracer.ConvertFilterPortsToInstr(r.FilterDestinationPort, r.FilterDestinationPortRange, r.FilterDestinationPorts), + FilterSourcePort: tracer.ConvertFilterPortsToInstr(r.FilterSourcePort, r.FilterSourcePortRange, r.FilterSourcePorts), + FilterPort: tracer.ConvertFilterPortsToInstr(r.FilterPort, r.FilterPortRange, r.FilterPorts), + FilterTCPFlags: r.FilterTCPFlags, + FilterDrops: r.FilterDrops, + FilterSample: r.FilterSample, + }) } ebpfConfig := &tracer.FlowFetcherConfig{ EnableIngress: ingress, diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_arm64_bpfel.go b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_arm64_bpfel.go index f12dda23f..ecae50cad 100644 --- a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_arm64_bpfel.go +++ b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_arm64_bpfel.go @@ -13,14 +13,17 @@ import ( ) type BpfAdditionalMetrics struct { + StartMonoTimeTs uint64 + EndMonoTimeTs uint64 DnsRecord BpfDnsRecordT PktDrops BpfPktDropsT FlowRtt uint64 NetworkEventsIdx uint8 NetworkEvents [4][8]uint8 _ [1]byte + EthProtocol uint16 TranslatedFlow BpfTranslatedFlowT - _ [6]byte + _ [4]byte } type BpfDirectionT uint32 @@ -59,8 +62,9 @@ const ( ) type BpfFilterKeyT struct { - PrefixLen uint32 - IpData [16]uint8 + PrefixLen uint32 + IpData [16]uint8 + PeerIpData [16]uint8 } type BpfFilterValueT struct { diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_arm64_bpfel.o b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_arm64_bpfel.o index 240486c52..78c5e4967 100644 Binary files a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_arm64_bpfel.o and b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_arm64_bpfel.o differ diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_powerpc_bpfel.go b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_powerpc_bpfel.go index a8b825750..555aedc37 100644 --- a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_powerpc_bpfel.go +++ b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_powerpc_bpfel.go @@ -13,14 +13,17 @@ import ( ) type BpfAdditionalMetrics struct { + StartMonoTimeTs uint64 + EndMonoTimeTs uint64 DnsRecord BpfDnsRecordT PktDrops BpfPktDropsT FlowRtt uint64 NetworkEventsIdx uint8 NetworkEvents [4][8]uint8 _ [1]byte + EthProtocol uint16 TranslatedFlow BpfTranslatedFlowT - _ [6]byte + _ [4]byte } type BpfDirectionT uint32 @@ -59,8 +62,9 @@ const ( ) type BpfFilterKeyT struct { - PrefixLen uint32 - IpData [16]uint8 + PrefixLen uint32 + IpData [16]uint8 + PeerIpData [16]uint8 } type BpfFilterValueT struct { diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_powerpc_bpfel.o b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_powerpc_bpfel.o index fae0677f0..2b79ebcea 100644 Binary files a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_powerpc_bpfel.o and b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_powerpc_bpfel.o differ diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_s390_bpfeb.go b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_s390_bpfeb.go index 3372ed20b..d1d73ff71 100644 --- a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_s390_bpfeb.go +++ b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_s390_bpfeb.go @@ -13,14 +13,17 @@ import ( ) type BpfAdditionalMetrics struct { + StartMonoTimeTs uint64 + EndMonoTimeTs uint64 DnsRecord BpfDnsRecordT PktDrops BpfPktDropsT FlowRtt uint64 NetworkEventsIdx uint8 NetworkEvents [4][8]uint8 _ [1]byte + EthProtocol uint16 TranslatedFlow BpfTranslatedFlowT - _ [6]byte + _ [4]byte } type BpfDirectionT uint32 @@ -59,8 +62,9 @@ const ( ) type BpfFilterKeyT struct { - PrefixLen uint32 - IpData [16]uint8 + PrefixLen uint32 + IpData [16]uint8 + PeerIpData [16]uint8 } type BpfFilterValueT struct { diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_s390_bpfeb.o b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_s390_bpfeb.o index aae1b0003..f463b8b88 100644 Binary files a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_s390_bpfeb.o and b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_s390_bpfeb.o differ diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_x86_bpfel.go b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_x86_bpfel.go index a5839acdd..a36ac163f 100644 --- a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_x86_bpfel.go +++ b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_x86_bpfel.go @@ -13,14 +13,17 @@ import ( ) type BpfAdditionalMetrics struct { + StartMonoTimeTs uint64 + EndMonoTimeTs uint64 DnsRecord BpfDnsRecordT PktDrops BpfPktDropsT FlowRtt uint64 NetworkEventsIdx uint8 NetworkEvents [4][8]uint8 _ [1]byte + EthProtocol uint16 TranslatedFlow BpfTranslatedFlowT - _ [6]byte + _ [4]byte } type BpfDirectionT uint32 @@ -59,8 +62,9 @@ const ( ) type BpfFilterKeyT struct { - PrefixLen uint32 - IpData [16]uint8 + PrefixLen uint32 + IpData [16]uint8 + PeerIpData [16]uint8 } type BpfFilterValueT struct { diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_x86_bpfel.o b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_x86_bpfel.o index 9399eaff3..33edfa321 100644 Binary files a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_x86_bpfel.o and b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf/bpf_x86_bpfel.o differ diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/model/flow_content.go b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/model/flow_content.go index 111303565..57b2721a6 100644 --- a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/model/flow_content.go +++ b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/model/flow_content.go @@ -1,6 +1,8 @@ package model -import "github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf" +import ( + "github.com/netobserv/netobserv-ebpf-agent/pkg/ebpf" +) type BpfFlowContent struct { *ebpf.BpfFlowMetrics @@ -57,10 +59,30 @@ func AccumulateBase(p *ebpf.BpfFlowMetrics, other *ebpf.BpfFlowMetrics) *ebpf.Bp return p } +func (p *BpfFlowContent) buildBaseFromAdditional(add *ebpf.BpfAdditionalMetrics) { + if add == nil { + return + } + // Accumulate time into base metrics if unset + if p.BpfFlowMetrics.StartMonoTimeTs == 0 || (p.BpfFlowMetrics.StartMonoTimeTs > add.StartMonoTimeTs && add.StartMonoTimeTs != 0) { + p.BpfFlowMetrics.StartMonoTimeTs = add.StartMonoTimeTs + } + if p.BpfFlowMetrics.EndMonoTimeTs == 0 || p.BpfFlowMetrics.EndMonoTimeTs < add.EndMonoTimeTs { + p.BpfFlowMetrics.EndMonoTimeTs = add.EndMonoTimeTs + } + if p.BpfFlowMetrics.EthProtocol == 0 { + p.BpfFlowMetrics.EthProtocol = add.EthProtocol + } + if p.BpfFlowMetrics.Flags == 0 && add.PktDrops.LatestFlags != 0 { + p.BpfFlowMetrics.Flags = add.PktDrops.LatestFlags + } +} + func (p *BpfFlowContent) AccumulateAdditional(other *ebpf.BpfAdditionalMetrics) { if other == nil { return } + p.buildBaseFromAdditional(other) if p.AdditionalMetrics == nil { p.AdditionalMetrics = other return @@ -83,6 +105,9 @@ func (p *BpfFlowContent) AccumulateAdditional(other *ebpf.BpfAdditionalMetrics) if other.PktDrops.LatestDropCause != 0 { p.AdditionalMetrics.PktDrops.LatestDropCause = other.PktDrops.LatestDropCause } + if other.PktDrops.LatestState != 0 { + p.AdditionalMetrics.PktDrops.LatestState = other.PktDrops.LatestState + } // RTT if p.AdditionalMetrics.FlowRtt < other.FlowRtt { p.AdditionalMetrics.FlowRtt = other.FlowRtt diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/tracer/flow_filter.go b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/tracer/flow_filter.go index b29e2dcfb..cd2b22760 100644 --- a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/tracer/flow_filter.go +++ b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/tracer/flow_filter.go @@ -22,6 +22,7 @@ type FilterConfig struct { FilterIcmpType int FilterIcmpCode int FilterPeerIP string + FilterPeerCIDR string FilterAction string FilterTCPFlags string FilterDrops bool @@ -82,6 +83,32 @@ func (f *Filter) getFilterKey(config *FilterConfig) (ebpf.BpfFilterKeyT, error) pfLen, _ := ipNet.Mask.Size() key.PrefixLen = uint32(pfLen) + if config.FilterPeerCIDR == "" && config.FilterPeerIP == "" { + config.FilterPeerCIDR = "0.0.0.0/0" + } + + if config.FilterPeerCIDR != "" { + ip, ipNet, err = net.ParseCIDR(config.FilterPeerCIDR) + if err != nil { + return key, fmt.Errorf("failed to parse FlowFilterPeerCIDR: %w", err) + } + if ip.To4() != nil { + copy(key.PeerIpData[:], ip.To4()) + } else { + copy(key.PeerIpData[:], ip.To16()) + } + pfLen, _ = ipNet.Mask.Size() + key.PrefixLen += uint32(pfLen) + } else if config.FilterPeerIP != "" { + ip = []byte(config.FilterPeerIP) + copy(key.PeerIpData[:], ip) + if ip.To4() != nil { + key.PrefixLen += 32 + } else { + key.PrefixLen += 128 + } + } + return key, nil } diff --git a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/tracer/tracer.go b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/tracer/tracer.go index 6b9ab6cc3..2b355230b 100644 --- a/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/tracer/tracer.go +++ b/vendor/github.com/netobserv/netobserv-ebpf-agent/pkg/tracer/tracer.go @@ -852,23 +852,21 @@ func (m *FlowFetcher) LookupAndDeleteMap(met *metrics.Metrics) map[ebpf.BpfFlowI } flowMap := m.objects.AggregatedFlows - - iterator := flowMap.Iterate() var flows = make(map[ebpf.BpfFlowId]model.BpfFlowContent, m.cacheMaxSize) var ids []ebpf.BpfFlowId var id ebpf.BpfFlowId var baseMetrics ebpf.BpfFlowMetrics - var additionalMetrics []ebpf.BpfAdditionalMetrics // First, get all ids and don't care about metrics (we need lookup+delete to be atomic) + iterator := flowMap.Iterate() for iterator.Next(&id, &baseMetrics) { ids = append(ids, id) } - count := 0 + countMain := 0 // Run the atomic Lookup+Delete; if new ids have been inserted in the meantime, they'll be fetched next time for i, id := range ids { - count++ + countMain++ if err := flowMap.LookupAndDelete(&id, &baseMetrics); err != nil { if i == 0 && errors.Is(err, cilium.ErrNotSupported) { log.WithError(err).Warnf("switching to legacy mode") @@ -881,25 +879,42 @@ func (m *FlowFetcher) LookupAndDeleteMap(met *metrics.Metrics) map[ebpf.BpfFlowI } flowPayload := model.BpfFlowContent{BpfFlowMetrics: &ebpf.BpfFlowMetrics{}} flowPayload.AccumulateBase(&baseMetrics) + flows[id] = flowPayload + } - // Fetch additional metrics; ids are without direction and interface - shorterID := id - shorterID.Direction = 0 - shorterID.IfIndex = 0 - if err := m.objects.AdditionalFlowMetrics.LookupAndDelete(&shorterID, &additionalMetrics); err != nil { - if !errors.Is(err, cilium.ErrKeyNotExist) { - log.WithError(err).WithField("flowId", id).Warnf("couldn't lookup/delete additional metrics entry") - met.Errors.WithErrorName("flow-fetcher", "CannotDeleteAdditionalMetric").Inc() - } - } else { - for i := range additionalMetrics { - flowPayload.AccumulateAdditional(&additionalMetrics[i]) + // Reiterate on additional metrics + var additionalMetrics []ebpf.BpfAdditionalMetrics + ids = []ebpf.BpfFlowId{} + addtlIterator := m.objects.AdditionalFlowMetrics.Iterate() + for addtlIterator.Next(&id, &additionalMetrics) { + ids = append(ids, id) + } + + countAdditional := 0 + for i, id := range ids { + countAdditional++ + if err := m.objects.AdditionalFlowMetrics.LookupAndDelete(&id, &additionalMetrics); err != nil { + if i == 0 && errors.Is(err, cilium.ErrNotSupported) { + log.WithError(err).Warnf("switching to legacy mode") + m.lookupAndDeleteSupported = false + return m.legacyLookupAndDeleteMap(met) } + log.WithError(err).WithField("flowId", id).Warnf("couldn't lookup/delete additional metrics entry") + met.Errors.WithErrorName("flow-fetcher", "CannotDeleteAdditionalMetric").Inc() + continue + } + flowPayload, found := flows[id] + if !found { + flowPayload = model.BpfFlowContent{BpfFlowMetrics: &ebpf.BpfFlowMetrics{}} + } + for iMet := range additionalMetrics { + flowPayload.AccumulateAdditional(&additionalMetrics[iMet]) } flows[id] = flowPayload } - met.BufferSizeGauge.WithBufferName("hashmap-total").Set(float64(count)) - met.BufferSizeGauge.WithBufferName("hashmap-unique").Set(float64(len(flows))) + met.BufferSizeGauge.WithBufferName("flowmap").Set(float64(countMain)) + met.BufferSizeGauge.WithBufferName("additionalmap").Set(float64(countAdditional)) + met.BufferSizeGauge.WithBufferName("merged-maps").Set(float64(len(flows))) m.ReadGlobalCounter(met) return flows @@ -1246,6 +1261,12 @@ func NewPacketFetcher(cfg *FlowFetcherConfig) (*PacketFetcher, error) { return nil, fmt.Errorf("rewriting BPF constants definition: %w", err) } + // remove pinning from all maps + maps2Name := []string{"aggregated_flows", "additional_flow_metrics", "direct_flows", "dns_flows", "filter_map", "global_counters", "packet_record"} + for _, m := range maps2Name { + spec.Maps[m].Pinning = 0 + } + type pcaBpfPrograms struct { TcEgressPcaParse *cilium.Program `ebpf:"tc_egress_pca_parse"` TcIngressPcaParse *cilium.Program `ebpf:"tc_ingress_pca_parse"` @@ -1272,7 +1293,7 @@ func NewPacketFetcher(cfg *FlowFetcherConfig) (*PacketFetcher, error) { delete(spec.Programs, constEnableNetworkEventsMonitoring) delete(spec.Programs, constNetworkEventsMonitoringGroupID) - if err := spec.LoadAndAssign(&newObjects, nil); err != nil { + if err := spec.LoadAndAssign(&newObjects, &cilium.CollectionOptions{Maps: cilium.MapOptions{PinPath: ""}}); err != nil { var ve *cilium.VerifierError if errors.As(err, &ve) { // Using %+v will print the whole verifier error, not just the last diff --git a/vendor/go.opentelemetry.io/otel/.gitignore b/vendor/go.opentelemetry.io/otel/.gitignore index 895c7664b..ae8577ef3 100644 --- a/vendor/go.opentelemetry.io/otel/.gitignore +++ b/vendor/go.opentelemetry.io/otel/.gitignore @@ -12,11 +12,3 @@ go.work go.work.sum gen/ - -/example/dice/dice -/example/namedtracer/namedtracer -/example/otel-collector/otel-collector -/example/opencensus/opencensus -/example/passthrough/passthrough -/example/prometheus/prometheus -/example/zipkin/zipkin diff --git a/vendor/go.opentelemetry.io/otel/.golangci.yml b/vendor/go.opentelemetry.io/otel/.golangci.yml index d09555506..dbfb2a165 100644 --- a/vendor/go.opentelemetry.io/otel/.golangci.yml +++ b/vendor/go.opentelemetry.io/otel/.golangci.yml @@ -127,8 +127,6 @@ linters-settings: - "**/metric/**/*.go" - "**/bridge/*.go" - "**/bridge/**/*.go" - - "**/example/*.go" - - "**/example/**/*.go" - "**/trace/*.go" - "**/trace/**/*.go" - "**/log/*.go" diff --git a/vendor/go.opentelemetry.io/otel/CHANGELOG.md b/vendor/go.opentelemetry.io/otel/CHANGELOG.md index 4b361d026..8f68dbd04 100644 --- a/vendor/go.opentelemetry.io/otel/CHANGELOG.md +++ b/vendor/go.opentelemetry.io/otel/CHANGELOG.md @@ -11,6 +11,52 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm +## [1.32.0/0.54.0/0.8.0/0.0.11] 2024-11-08 + +### Added + +- Add `go.opentelemetry.io/otel/sdk/metric/exemplar.AlwaysOffFilter`, which can be used to disable exemplar recording. (#5850) +- Add `go.opentelemetry.io/otel/sdk/metric.WithExemplarFilter`, which can be used to configure the exemplar filter used by the metrics SDK. (#5850) +- Add `ExemplarReservoirProviderSelector` and `DefaultExemplarReservoirProviderSelector` to `go.opentelemetry.io/otel/sdk/metric`, which defines the exemplar reservoir to use based on the aggregation of the metric. (#5861) +- Add `ExemplarReservoirProviderSelector` to `go.opentelemetry.io/otel/sdk/metric.Stream` to allow using views to configure the exemplar reservoir to use for a metric. (#5861) +- Add `ReservoirProvider`, `HistogramReservoirProvider` and `FixedSizeReservoirProvider` to `go.opentelemetry.io/otel/sdk/metric/exemplar` to make it convenient to use providers of Reservoirs. (#5861) +- The `go.opentelemetry.io/otel/semconv/v1.27.0` package. + The package contains semantic conventions from the `v1.27.0` version of the OpenTelemetry Semantic Conventions. (#5894) +- Add `Attributes attribute.Set` field to `Scope` in `go.opentelemetry.io/otel/sdk/instrumentation`. (#5903) +- Add `Attributes attribute.Set` field to `ScopeRecords` in `go.opentelemetry.io/otel/log/logtest`. (#5927) +- `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc` adds instrumentation scope attributes. (#5934) +- `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp` adds instrumentation scope attributes. (#5934) +- `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc` adds instrumentation scope attributes. (#5935) +- `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp` adds instrumentation scope attributes. (#5935) +- `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc` adds instrumentation scope attributes. (#5933) +- `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp` adds instrumentation scope attributes. (#5933) +- `go.opentelemetry.io/otel/exporters/prometheus` adds instrumentation scope attributes in `otel_scope_info` metric as labels. (#5932) + +### Changed + +- Support scope attributes and make them as identifying for `Tracer` in `go.opentelemetry.io/otel` and `go.opentelemetry.io/otel/sdk/trace`. (#5924) +- Support scope attributes and make them as identifying for `Meter` in `go.opentelemetry.io/otel` and `go.opentelemetry.io/otel/sdk/metric`. (#5926) +- Support scope attributes and make them as identifying for `Logger` in `go.opentelemetry.io/otel` and `go.opentelemetry.io/otel/sdk/log`. (#5925) +- Make schema URL and scope attributes as identifying for `Tracer` in `go.opentelemetry.io/otel/bridge/opentracing`. (#5931) +- Clear unneeded slice elements to allow GC to collect the objects in `go.opentelemetry.io/otel/sdk/metric` and `go.opentelemetry.io/otel/sdk/trace`. (#5804) + +### Fixed + +- Global MeterProvider registration unwraps global instrument Observers, the undocumented Unwrap() methods are now private. (#5881) +- `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc` now keeps the metadata already present in the context when `WithHeaders` is used. (#5892) +- `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc` now keeps the metadata already present in the context when `WithHeaders` is used. (#5911) +- `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc` now keeps the metadata already present in the context when `WithHeaders` is used. (#5915) +- Fix `go.opentelemetry.io/otel/exporters/prometheus` trying to add exemplars to Gauge metrics, which is unsupported. (#5912) +- Fix `WithEndpointURL` to always use a secure connection when an https URL is passed in `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc`. (#5944) +- Fix `WithEndpointURL` to always use a secure connection when an https URL is passed in `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp`. (#5944) +- Fix `WithEndpointURL` to always use a secure connection when an https URL is passed in `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc`. (#5944) +- Fix `WithEndpointURL` to always use a secure connection when an https URL is passed in `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp`. (#5944) +- Fix incorrect metrics generated from callbacks when multiple readers are used in `go.opentelemetry.io/otel/sdk/metric`. (#5900) + +### Removed + +- Remove all examples under `go.opentelemetry.io/otel/example` as they are moved to [Contrib repository](https://github.com/open-telemetry/opentelemetry-go-contrib/tree/main/examples). (#5930) + ## [1.31.0/0.53.0/0.7.0/0.0.10] 2024-10-11 ### Added @@ -3110,7 +3156,8 @@ It contains api and sdk for trace and meter. - CircleCI build CI manifest files. - CODEOWNERS file to track owners of this project. -[Unreleased]: https://github.com/open-telemetry/opentelemetry-go/compare/v1.31.0...HEAD +[Unreleased]: https://github.com/open-telemetry/opentelemetry-go/compare/v1.32.0...HEAD +[1.32.0/0.54.0/0.8.0/0.0.11]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.32.0 [1.31.0/0.53.0/0.7.0/0.0.10]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.31.0 [1.30.0/0.52.0/0.6.0/0.0.9]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.30.0 [1.29.0/0.51.0/0.5.0]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.29.0 diff --git a/vendor/go.opentelemetry.io/otel/CONTRIBUTING.md b/vendor/go.opentelemetry.io/otel/CONTRIBUTING.md index bb3396557..22a2e9dbd 100644 --- a/vendor/go.opentelemetry.io/otel/CONTRIBUTING.md +++ b/vendor/go.opentelemetry.io/otel/CONTRIBUTING.md @@ -629,6 +629,10 @@ should be canceled. ## Approvers and Maintainers +### Triagers + +- [Cheng-Zhen Yang](https://github.com/scorpionknifes), Independent + ### Approvers ### Maintainers @@ -641,13 +645,13 @@ should be canceled. ### Emeritus -- [Aaron Clawson](https://github.com/MadVikingGod), LightStep -- [Anthony Mirabella](https://github.com/Aneurysm9), AWS -- [Chester Cheung](https://github.com/hanyuancheung), Tencent -- [Evan Torrie](https://github.com/evantorrie), Yahoo -- [Gustavo Silva Paiva](https://github.com/paivagustavo), LightStep -- [Josh MacDonald](https://github.com/jmacd), LightStep -- [Liz Fong-Jones](https://github.com/lizthegrey), Honeycomb +- [Aaron Clawson](https://github.com/MadVikingGod) +- [Anthony Mirabella](https://github.com/Aneurysm9) +- [Chester Cheung](https://github.com/hanyuancheung) +- [Evan Torrie](https://github.com/evantorrie) +- [Gustavo Silva Paiva](https://github.com/paivagustavo) +- [Josh MacDonald](https://github.com/jmacd) +- [Liz Fong-Jones](https://github.com/lizthegrey) ### Become an Approver or a Maintainer diff --git a/vendor/go.opentelemetry.io/otel/Makefile b/vendor/go.opentelemetry.io/otel/Makefile index a1228a212..b8292a4fb 100644 --- a/vendor/go.opentelemetry.io/otel/Makefile +++ b/vendor/go.opentelemetry.io/otel/Makefile @@ -260,7 +260,7 @@ SEMCONVPKG ?= "semconv/" semconv-generate: $(SEMCONVGEN) $(SEMCONVKIT) [ "$(TAG)" ] || ( echo "TAG unset: missing opentelemetry semantic-conventions tag"; exit 1 ) [ "$(OTEL_SEMCONV_REPO)" ] || ( echo "OTEL_SEMCONV_REPO unset: missing path to opentelemetry semantic-conventions repo"; exit 1 ) - $(SEMCONVGEN) -i "$(OTEL_SEMCONV_REPO)/model/." --only=attribute_group -p conventionType=trace -f attribute_group.go -t "$(SEMCONVPKG)/template.j2" -s "$(TAG)" + $(SEMCONVGEN) -i "$(OTEL_SEMCONV_REPO)/model/." --only=attribute_group -p conventionType=trace -f attribute_group.go -z "$(SEMCONVPKG)/capitalizations.txt" -t "$(SEMCONVPKG)/template.j2" -s "$(TAG)" $(SEMCONVGEN) -i "$(OTEL_SEMCONV_REPO)/model/." --only=metric -f metric.go -t "$(SEMCONVPKG)/metric_template.j2" -s "$(TAG)" $(SEMCONVKIT) -output "$(SEMCONVPKG)/$(TAG)" -tag "$(TAG)" diff --git a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go index 428cfea23..e0fa0570a 100644 --- a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go +++ b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go @@ -155,7 +155,12 @@ func (c *client) exportContext(parent context.Context) (context.Context, context } if c.metadata.Len() > 0 { - ctx = metadata.NewOutgoingContext(ctx, c.metadata) + md := c.metadata + if outMD, ok := metadata.FromOutgoingContext(ctx); ok { + md = metadata.Join(md, outMD) + } + + ctx = metadata.NewOutgoingContext(ctx, md) } return ctx, cancel diff --git a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/options.go b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/options.go index b6ed9a2bb..c016b4dbe 100644 --- a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/options.go +++ b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/options.go @@ -139,7 +139,7 @@ func NewGRPCConfig(opts ...GRPCOption) Config { if cfg.ServiceConfig != "" { cfg.DialOptions = append(cfg.DialOptions, grpc.WithDefaultServiceConfig(cfg.ServiceConfig)) } - // Priroritize GRPCCredentials over Insecure (passing both is an error). + // Prioritize GRPCCredentials over Insecure (passing both is an error). if cfg.Metrics.GRPCCredentials != nil { cfg.DialOptions = append(cfg.DialOptions, grpc.WithTransportCredentials(cfg.Metrics.GRPCCredentials)) } else if cfg.Metrics.Insecure { @@ -287,9 +287,7 @@ func WithEndpointURL(v string) GenericOption { cfg.Metrics.Endpoint = u.Host cfg.Metrics.URLPath = u.Path - if u.Scheme != "https" { - cfg.Metrics.Insecure = true - } + cfg.Metrics.Insecure = u.Scheme != "https" return cfg }) diff --git a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/tls.go b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/tls.go index 0229ac80b..03e7fbcdf 100644 --- a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/tls.go +++ b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/tls.go @@ -14,7 +14,7 @@ import ( ) // ReadTLSConfigFromFile reads a PEM certificate file and creates -// a tls.Config that will use this certifate to verify a server certificate. +// a tls.Config that will use this certificate to verify a server certificate. func ReadTLSConfigFromFile(path string) (*tls.Config, error) { b, err := os.ReadFile(path) if err != nil { diff --git a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/transform/metricdata.go b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/transform/metricdata.go index 975e3b7aa..abf7f0219 100644 --- a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/transform/metricdata.go +++ b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/transform/metricdata.go @@ -46,8 +46,9 @@ func ScopeMetrics(sms []metricdata.ScopeMetrics) ([]*mpb.ScopeMetrics, error) { out = append(out, &mpb.ScopeMetrics{ Scope: &cpb.InstrumentationScope{ - Name: sm.Scope.Name, - Version: sm.Scope.Version, + Name: sm.Scope.Name, + Version: sm.Scope.Version, + Attributes: AttrIter(sm.Scope.Attributes.Iter()), }, Metrics: ms, SchemaUrl: sm.Scope.SchemaURL, @@ -83,13 +84,13 @@ func metric(m metricdata.Metrics) (*mpb.Metric, error) { } switch a := m.Data.(type) { case metricdata.Gauge[int64]: - out.Data = Gauge[int64](a) + out.Data = Gauge(a) case metricdata.Gauge[float64]: - out.Data = Gauge[float64](a) + out.Data = Gauge(a) case metricdata.Sum[int64]: - out.Data, err = Sum[int64](a) + out.Data, err = Sum(a) case metricdata.Sum[float64]: - out.Data, err = Sum[float64](a) + out.Data, err = Sum(a) case metricdata.Histogram[int64]: out.Data, err = Histogram(a) case metricdata.Histogram[float64]: @@ -279,10 +280,7 @@ func Temporality(t metricdata.Temporality) (mpb.AggregationTemporality, error) { // timeUnixNano on the zero Time returns 0. // The result does not depend on the location associated with t. func timeUnixNano(t time.Time) uint64 { - if t.IsZero() { - return 0 - } - return uint64(t.UnixNano()) + return uint64(max(0, t.UnixNano())) // nolint:gosec // Overflow checked. } // Exemplars returns a slice of OTLP Exemplars generated from exemplars. diff --git a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/version.go b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/version.go index 1046eb593..19b789b8b 100644 --- a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/version.go +++ b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/version.go @@ -5,5 +5,5 @@ package otlpmetricgrpc // import "go.opentelemetry.io/otel/exporters/otlp/otlpme // Version is the current release version of the OpenTelemetry OTLP over gRPC metrics exporter in use. func Version() string { - return "1.29.0" + return "1.32.0" } diff --git a/vendor/go.opentelemetry.io/otel/internal/global/instruments.go b/vendor/go.opentelemetry.io/otel/internal/global/instruments.go index 3a0cc42f6..ae92a4251 100644 --- a/vendor/go.opentelemetry.io/otel/internal/global/instruments.go +++ b/vendor/go.opentelemetry.io/otel/internal/global/instruments.go @@ -13,7 +13,7 @@ import ( // unwrapper unwraps to return the underlying instrument implementation. type unwrapper interface { - Unwrap() metric.Observable + unwrap() metric.Observable } type afCounter struct { @@ -40,7 +40,7 @@ func (i *afCounter) setDelegate(m metric.Meter) { i.delegate.Store(ctr) } -func (i *afCounter) Unwrap() metric.Observable { +func (i *afCounter) unwrap() metric.Observable { if ctr := i.delegate.Load(); ctr != nil { return ctr.(metric.Float64ObservableCounter) } @@ -71,7 +71,7 @@ func (i *afUpDownCounter) setDelegate(m metric.Meter) { i.delegate.Store(ctr) } -func (i *afUpDownCounter) Unwrap() metric.Observable { +func (i *afUpDownCounter) unwrap() metric.Observable { if ctr := i.delegate.Load(); ctr != nil { return ctr.(metric.Float64ObservableUpDownCounter) } @@ -102,7 +102,7 @@ func (i *afGauge) setDelegate(m metric.Meter) { i.delegate.Store(ctr) } -func (i *afGauge) Unwrap() metric.Observable { +func (i *afGauge) unwrap() metric.Observable { if ctr := i.delegate.Load(); ctr != nil { return ctr.(metric.Float64ObservableGauge) } @@ -133,7 +133,7 @@ func (i *aiCounter) setDelegate(m metric.Meter) { i.delegate.Store(ctr) } -func (i *aiCounter) Unwrap() metric.Observable { +func (i *aiCounter) unwrap() metric.Observable { if ctr := i.delegate.Load(); ctr != nil { return ctr.(metric.Int64ObservableCounter) } @@ -164,7 +164,7 @@ func (i *aiUpDownCounter) setDelegate(m metric.Meter) { i.delegate.Store(ctr) } -func (i *aiUpDownCounter) Unwrap() metric.Observable { +func (i *aiUpDownCounter) unwrap() metric.Observable { if ctr := i.delegate.Load(); ctr != nil { return ctr.(metric.Int64ObservableUpDownCounter) } @@ -195,7 +195,7 @@ func (i *aiGauge) setDelegate(m metric.Meter) { i.delegate.Store(ctr) } -func (i *aiGauge) Unwrap() metric.Observable { +func (i *aiGauge) unwrap() metric.Observable { if ctr := i.delegate.Load(); ctr != nil { return ctr.(metric.Int64ObservableGauge) } diff --git a/vendor/go.opentelemetry.io/otel/internal/global/meter.go b/vendor/go.opentelemetry.io/otel/internal/global/meter.go index e3db438a0..a6acd8dca 100644 --- a/vendor/go.opentelemetry.io/otel/internal/global/meter.go +++ b/vendor/go.opentelemetry.io/otel/internal/global/meter.go @@ -5,6 +5,7 @@ package global // import "go.opentelemetry.io/otel/internal/global" import ( "container/list" + "context" "reflect" "sync" @@ -66,6 +67,7 @@ func (p *meterProvider) Meter(name string, opts ...metric.MeterOption) metric.Me name: name, version: c.InstrumentationVersion(), schema: c.SchemaURL(), + attrs: c.InstrumentationAttributes(), } if p.meters == nil { @@ -472,8 +474,7 @@ func (m *meter) RegisterCallback(f metric.Callback, insts ...metric.Observable) defer m.mtx.Unlock() if m.delegate != nil { - insts = unwrapInstruments(insts) - return m.delegate.RegisterCallback(f, insts...) + return m.delegate.RegisterCallback(unwrapCallback(f), unwrapInstruments(insts)...) } reg := ®istration{instruments: insts, function: f} @@ -487,15 +488,11 @@ func (m *meter) RegisterCallback(f metric.Callback, insts ...metric.Observable) return reg, nil } -type wrapped interface { - unwrap() metric.Observable -} - func unwrapInstruments(instruments []metric.Observable) []metric.Observable { out := make([]metric.Observable, 0, len(instruments)) for _, inst := range instruments { - if in, ok := inst.(wrapped); ok { + if in, ok := inst.(unwrapper); ok { out = append(out, in.unwrap()) } else { out = append(out, inst) @@ -515,9 +512,61 @@ type registration struct { unregMu sync.Mutex } -func (c *registration) setDelegate(m metric.Meter) { - insts := unwrapInstruments(c.instruments) +type unwrapObs struct { + embedded.Observer + obs metric.Observer +} + +// unwrapFloat64Observable returns an expected metric.Float64Observable after +// unwrapping the global object. +func unwrapFloat64Observable(inst metric.Float64Observable) metric.Float64Observable { + if unwrapped, ok := inst.(unwrapper); ok { + if floatObs, ok := unwrapped.unwrap().(metric.Float64Observable); ok { + // Note: if the unwrapped object does not + // unwrap as an observable for either of the + // predicates here, it means an internal bug in + // this package. We avoid logging an error in + // this case, because the SDK has to try its + // own type conversion on the object. The SDK + // will see this and be forced to respond with + // its own error. + // + // This code uses a double-nested if statement + // to avoid creating a branch that is + // impossible to cover. + inst = floatObs + } + } + return inst +} + +// unwrapInt64Observable returns an expected metric.Int64Observable after +// unwrapping the global object. +func unwrapInt64Observable(inst metric.Int64Observable) metric.Int64Observable { + if unwrapped, ok := inst.(unwrapper); ok { + if unint, ok := unwrapped.unwrap().(metric.Int64Observable); ok { + // See the comment in unwrapFloat64Observable(). + inst = unint + } + } + return inst +} + +func (uo *unwrapObs) ObserveFloat64(inst metric.Float64Observable, value float64, opts ...metric.ObserveOption) { + uo.obs.ObserveFloat64(unwrapFloat64Observable(inst), value, opts...) +} + +func (uo *unwrapObs) ObserveInt64(inst metric.Int64Observable, value int64, opts ...metric.ObserveOption) { + uo.obs.ObserveInt64(unwrapInt64Observable(inst), value, opts...) +} +func unwrapCallback(f metric.Callback) metric.Callback { + return func(ctx context.Context, obs metric.Observer) error { + return f(ctx, &unwrapObs{obs: obs}) + } +} + +func (c *registration) setDelegate(m metric.Meter) { c.unregMu.Lock() defer c.unregMu.Unlock() @@ -526,7 +575,7 @@ func (c *registration) setDelegate(m metric.Meter) { return } - reg, err := m.RegisterCallback(c.function, insts...) + reg, err := m.RegisterCallback(unwrapCallback(c.function), unwrapInstruments(c.instruments)...) if err != nil { GetErrorHandler().Handle(err) return diff --git a/vendor/go.opentelemetry.io/otel/internal/global/trace.go b/vendor/go.opentelemetry.io/otel/internal/global/trace.go index e31f442b4..ac65262c6 100644 --- a/vendor/go.opentelemetry.io/otel/internal/global/trace.go +++ b/vendor/go.opentelemetry.io/otel/internal/global/trace.go @@ -87,6 +87,7 @@ func (p *tracerProvider) Tracer(name string, opts ...trace.TracerOption) trace.T name: name, version: c.InstrumentationVersion(), schema: c.SchemaURL(), + attrs: c.InstrumentationAttributes(), } if p.tracers == nil { @@ -102,7 +103,12 @@ func (p *tracerProvider) Tracer(name string, opts ...trace.TracerOption) trace.T return t } -type il struct{ name, version, schema string } +type il struct { + name string + version string + schema string + attrs attribute.Set +} // tracer is a placeholder for a trace.Tracer. // diff --git a/vendor/go.opentelemetry.io/otel/sdk/instrumentation/scope.go b/vendor/go.opentelemetry.io/otel/sdk/instrumentation/scope.go index 728115045..34852a47b 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/instrumentation/scope.go +++ b/vendor/go.opentelemetry.io/otel/sdk/instrumentation/scope.go @@ -3,6 +3,8 @@ package instrumentation // import "go.opentelemetry.io/otel/sdk/instrumentation" +import "go.opentelemetry.io/otel/attribute" + // Scope represents the instrumentation scope. type Scope struct { // Name is the name of the instrumentation scope. This should be the @@ -12,4 +14,6 @@ type Scope struct { Version string // SchemaURL of the telemetry emitted by the scope. SchemaURL string + // Attributes of the telemetry emitted by the scope. + Attributes attribute.Set } diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/config.go b/vendor/go.opentelemetry.io/otel/sdk/metric/config.go index bbe7bf671..203cd9d65 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/config.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/config.go @@ -5,17 +5,22 @@ package metric // import "go.opentelemetry.io/otel/sdk/metric" import ( "context" - "fmt" + "errors" + "os" + "strings" "sync" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/sdk/metric/exemplar" "go.opentelemetry.io/otel/sdk/resource" ) // config contains configuration options for a MeterProvider. type config struct { - res *resource.Resource - readers []Reader - views []View + res *resource.Resource + readers []Reader + views []View + exemplarFilter exemplar.Filter } // readerSignals returns a force-flush and shutdown function for a @@ -39,25 +44,13 @@ func (c config) readerSignals() (forceFlush, shutdown func(context.Context) erro // value. func unify(funcs []func(context.Context) error) func(context.Context) error { return func(ctx context.Context) error { - var errs []error + var err error for _, f := range funcs { - if err := f(ctx); err != nil { - errs = append(errs, err) + if e := f(ctx); e != nil { + err = errors.Join(err, e) } } - return unifyErrors(errs) - } -} - -// unifyErrors combines multiple errors into a single error. -func unifyErrors(errs []error) error { - switch len(errs) { - case 0: - return nil - case 1: - return errs[0] - default: - return fmt.Errorf("%v", errs) + return err } } @@ -75,7 +68,13 @@ func unifyShutdown(funcs []func(context.Context) error) func(context.Context) er // newConfig returns a config configured with options. func newConfig(options []Option) config { - conf := config{res: resource.Default()} + conf := config{ + res: resource.Default(), + exemplarFilter: exemplar.TraceBasedFilter, + } + for _, o := range meterProviderOptionsFromEnv() { + conf = o.apply(conf) + } for _, o := range options { conf = o.apply(conf) } @@ -103,7 +102,11 @@ func (o optionFunc) apply(conf config) config { // go.opentelemetry.io/otel/sdk/resource package will be used. func WithResource(res *resource.Resource) Option { return optionFunc(func(conf config) config { - conf.res = res + var err error + conf.res, err = resource.Merge(resource.Environment(), res) + if err != nil { + otel.Handle(err) + } return conf }) } @@ -135,3 +138,35 @@ func WithView(views ...View) Option { return cfg }) } + +// WithExemplarFilter configures the exemplar filter. +// +// The exemplar filter determines which measurements are offered to the +// exemplar reservoir, but the exemplar reservoir makes the final decision of +// whether to store an exemplar. +// +// By default, the [exemplar.SampledFilter] +// is used. Exemplars can be entirely disabled by providing the +// [exemplar.AlwaysOffFilter]. +func WithExemplarFilter(filter exemplar.Filter) Option { + return optionFunc(func(cfg config) config { + cfg.exemplarFilter = filter + return cfg + }) +} + +func meterProviderOptionsFromEnv() []Option { + var opts []Option + // https://github.com/open-telemetry/opentelemetry-specification/blob/d4b241f451674e8f611bb589477680341006ad2b/specification/configuration/sdk-environment-variables.md#exemplar + const filterEnvKey = "OTEL_METRICS_EXEMPLAR_FILTER" + + switch strings.ToLower(strings.TrimSpace(os.Getenv(filterEnvKey))) { + case "always_on": + opts = append(opts, WithExemplarFilter(exemplar.AlwaysOnFilter)) + case "always_off": + opts = append(opts, WithExemplarFilter(exemplar.AlwaysOffFilter)) + case "trace_based": + opts = append(opts, WithExemplarFilter(exemplar.TraceBasedFilter)) + } + return opts +} diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar.go index 82619da78..0335b8ae4 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar.go @@ -4,51 +4,49 @@ package metric // import "go.opentelemetry.io/otel/sdk/metric" import ( - "os" "runtime" - "slices" - "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" - "go.opentelemetry.io/otel/sdk/metric/internal/x" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/metric/exemplar" + "go.opentelemetry.io/otel/sdk/metric/internal/aggregate" ) -// reservoirFunc returns the appropriately configured exemplar reservoir -// creation func based on the passed InstrumentKind and user defined -// environment variables. -// -// Note: This will only return non-nil values when the experimental exemplar -// feature is enabled and the OTEL_METRICS_EXEMPLAR_FILTER environment variable -// is not set to always_off. -func reservoirFunc[N int64 | float64](agg Aggregation) func() exemplar.FilteredReservoir[N] { - if !x.Exemplars.Enabled() { - return nil - } - // https://github.com/open-telemetry/opentelemetry-specification/blob/d4b241f451674e8f611bb589477680341006ad2b/specification/configuration/sdk-environment-variables.md#exemplar - const filterEnvKey = "OTEL_METRICS_EXEMPLAR_FILTER" +// ExemplarReservoirProviderSelector selects the +// [exemplar.ReservoirProvider] to use +// based on the [Aggregation] of the metric. +type ExemplarReservoirProviderSelector func(Aggregation) exemplar.ReservoirProvider - var filter exemplar.Filter - - switch os.Getenv(filterEnvKey) { - case "always_on": - filter = exemplar.AlwaysOnFilter - case "always_off": - return exemplar.Drop - case "trace_based": - fallthrough - default: - filter = exemplar.SampledFilter +// reservoirFunc returns the appropriately configured exemplar reservoir +// creation func based on the passed InstrumentKind and filter configuration. +func reservoirFunc[N int64 | float64](provider exemplar.ReservoirProvider, filter exemplar.Filter) func(attribute.Set) aggregate.FilteredExemplarReservoir[N] { + return func(attrs attribute.Set) aggregate.FilteredExemplarReservoir[N] { + return aggregate.NewFilteredExemplarReservoir[N](filter, provider(attrs)) } +} +// DefaultExemplarReservoirProviderSelector returns the default +// [exemplar.ReservoirProvider] for the +// provided [Aggregation]. +// +// For explicit bucket histograms with more than 1 bucket, it uses the +// [exemplar.HistogramReservoirProvider]. +// For exponential histograms, it uses the +// [exemplar.FixedSizeReservoirProvider] +// with a size of min(20, max_buckets). +// For all other aggregations, it uses the +// [exemplar.FixedSizeReservoirProvider] +// with a size equal to the number of CPUs. +// +// Exemplar default reservoirs MAY change in a minor version bump. No +// guarantees are made on the shape or statistical properties of returned +// exemplars. +func DefaultExemplarReservoirProviderSelector(agg Aggregation) exemplar.ReservoirProvider { // https://github.com/open-telemetry/opentelemetry-specification/blob/d4b241f451674e8f611bb589477680341006ad2b/specification/metrics/sdk.md#exemplar-defaults // Explicit bucket histogram aggregation with more than 1 bucket will // use AlignedHistogramBucketExemplarReservoir. a, ok := agg.(AggregationExplicitBucketHistogram) if ok && len(a.Boundaries) > 0 { - cp := slices.Clone(a.Boundaries) - return func() exemplar.FilteredReservoir[N] { - bounds := cp - return exemplar.NewFilteredReservoir[N](filter, exemplar.Histogram(bounds)) - } + return exemplar.HistogramReservoirProvider(a.Boundaries) } var n int @@ -75,7 +73,5 @@ func reservoirFunc[N int64 | float64](agg Aggregation) func() exemplar.FilteredR } } - return func() exemplar.FilteredReservoir[N] { - return exemplar.NewFilteredReservoir[N](filter, exemplar.FixedSize(n)) - } + return exemplar.FixedSizeReservoirProvider(n) } diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/README.md b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/README.md new file mode 100644 index 000000000..d1025f5eb --- /dev/null +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/README.md @@ -0,0 +1,3 @@ +# Metric SDK Exemplars + +[![PkgGoDev](https://pkg.go.dev/badge/go.opentelemetry.io/otel/sdk/metric/exemplar)](https://pkg.go.dev/go.opentelemetry.io/otel/sdk/metric/exemplar) diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/doc.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/doc.go similarity index 93% rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/doc.go rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/doc.go index 5394f48e0..9f2389376 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/doc.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/doc.go @@ -3,4 +3,4 @@ // Package exemplar provides an implementation of the OpenTelemetry exemplar // reservoir to be used in metric collection pipelines. -package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" +package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar" diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/exemplar.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/exemplar.go similarity index 98% rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/exemplar.go rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/exemplar.go index fcaa6a469..1ab694678 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/exemplar.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/exemplar.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" +package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar" import ( "time" diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filter.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/filter.go similarity index 75% rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filter.go rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/filter.go index 152a069a0..b595e2ace 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filter.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/filter.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" +package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar" import ( "context" @@ -16,10 +16,10 @@ import ( // Reservoir in making a sampling decision. type Filter func(context.Context) bool -// SampledFilter is a [Filter] that will only offer measurements +// TraceBasedFilter is a [Filter] that will only offer measurements // if the passed context associated with the measurement contains a sampled // [go.opentelemetry.io/otel/trace.SpanContext]. -func SampledFilter(ctx context.Context) bool { +func TraceBasedFilter(ctx context.Context) bool { return trace.SpanContextFromContext(ctx).IsSampled() } @@ -27,3 +27,8 @@ func SampledFilter(ctx context.Context) bool { func AlwaysOnFilter(ctx context.Context) bool { return true } + +// AlwaysOffFilter is a [Filter] that never offers measurements. +func AlwaysOffFilter(ctx context.Context) bool { + return false +} diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/rand.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/fixed_size_reservoir.go similarity index 73% rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/rand.go rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/fixed_size_reservoir.go index 199a2608f..d4aab0aad 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/rand.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/fixed_size_reservoir.go @@ -1,31 +1,69 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" +package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar" import ( "context" "math" "math/rand" - "sync" "time" "go.opentelemetry.io/otel/attribute" ) -var ( +// FixedSizeReservoirProvider returns a provider of [FixedSizeReservoir]. +func FixedSizeReservoirProvider(k int) ReservoirProvider { + return func(_ attribute.Set) Reservoir { + return NewFixedSizeReservoir(k) + } +} + +// NewFixedSizeReservoir returns a [FixedSizeReservoir] that samples at most +// k exemplars. If there are k or less measurements made, the Reservoir will +// sample each one. If there are more than k, the Reservoir will then randomly +// sample all additional measurement with a decreasing probability. +func NewFixedSizeReservoir(k int) *FixedSizeReservoir { + return newFixedSizeReservoir(newStorage(k)) +} + +var _ Reservoir = &FixedSizeReservoir{} + +// FixedSizeReservoir is a [Reservoir] that samples at most k exemplars. If +// there are k or less measurements made, the Reservoir will sample each one. +// If there are more than k, the Reservoir will then randomly sample all +// additional measurement with a decreasing probability. +type FixedSizeReservoir struct { + *storage + + // count is the number of measurement seen. + count int64 + // next is the next count that will store a measurement at a random index + // once the reservoir has been filled. + next int64 + // w is the largest random number in a distribution that is used to compute + // the next next. + w float64 + // rng is used to make sampling decisions. // // Do not use crypto/rand. There is no reason for the decrease in performance // given this is not a security sensitive decision. - rng = rand.New(rand.NewSource(time.Now().UnixNano())) - // Ensure concurrent safe accecess to rng and its underlying source. - rngMu sync.Mutex -) + rng *rand.Rand +} -// random returns, as a float64, a uniform pseudo-random number in the open -// interval (0.0,1.0). -func random() float64 { +func newFixedSizeReservoir(s *storage) *FixedSizeReservoir { + r := &FixedSizeReservoir{ + storage: s, + rng: rand.New(rand.NewSource(time.Now().UnixNano())), + } + r.reset() + return r +} + +// randomFloat64 returns, as a float64, a uniform pseudo-random number in the +// open interval (0.0,1.0). +func (r *FixedSizeReservoir) randomFloat64() float64 { // TODO: This does not return a uniform number. rng.Float64 returns a // uniformly random int in [0,2^53) that is divided by 2^53. Meaning it // returns multiples of 2^-53, and not all floating point numbers between 0 @@ -43,40 +81,25 @@ func random() float64 { // // There are likely many other methods to explore here as well. - rngMu.Lock() - defer rngMu.Unlock() - - f := rng.Float64() + f := r.rng.Float64() for f == 0 { - f = rng.Float64() + f = r.rng.Float64() } return f } -// FixedSize returns a [Reservoir] that samples at most k exemplars. If there -// are k or less measurements made, the Reservoir will sample each one. If -// there are more than k, the Reservoir will then randomly sample all -// additional measurement with a decreasing probability. -func FixedSize(k int) Reservoir { - r := &randRes{storage: newStorage(k)} - r.reset() - return r -} - -type randRes struct { - *storage - - // count is the number of measurement seen. - count int64 - // next is the next count that will store a measurement at a random index - // once the reservoir has been filled. - next int64 - // w is the largest random number in a distribution that is used to compute - // the next next. - w float64 -} - -func (r *randRes) Offer(ctx context.Context, t time.Time, n Value, a []attribute.KeyValue) { +// Offer accepts the parameters associated with a measurement. The +// parameters will be stored as an exemplar if the Reservoir decides to +// sample the measurement. +// +// The passed ctx needs to contain any baggage or span that were active +// when the measurement was made. This information may be used by the +// Reservoir in making a sampling decision. +// +// The time t is the time when the measurement was made. The v and a +// parameters are the value and dropped (filtered) attributes of the +// measurement respectively. +func (r *FixedSizeReservoir) Offer(ctx context.Context, t time.Time, n Value, a []attribute.KeyValue) { // The following algorithm is "Algorithm L" from Li, Kim-Hung (4 December // 1994). "Reservoir-Sampling Algorithms of Time Complexity // O(n(1+log(N/n)))". ACM Transactions on Mathematical Software. 20 (4): @@ -123,7 +146,7 @@ func (r *randRes) Offer(ctx context.Context, t time.Time, n Value, a []attribute } else { if r.count == r.next { // Overwrite a random existing measurement with the one offered. - idx := int(rng.Int63n(int64(cap(r.store)))) + idx := int(r.rng.Int63n(int64(cap(r.store)))) r.store[idx] = newMeasurement(ctx, t, n, a) r.advance() } @@ -132,7 +155,7 @@ func (r *randRes) Offer(ctx context.Context, t time.Time, n Value, a []attribute } // reset resets r to the initial state. -func (r *randRes) reset() { +func (r *FixedSizeReservoir) reset() { // This resets the number of exemplars known. r.count = 0 // Random index inserts should only happen after the storage is full. @@ -147,14 +170,14 @@ func (r *randRes) reset() { // This maps the uniform random number in (0,1) to a geometric distribution // over the same interval. The mean of the distribution is inversely // proportional to the storage capacity. - r.w = math.Exp(math.Log(random()) / float64(cap(r.store))) + r.w = math.Exp(math.Log(r.randomFloat64()) / float64(cap(r.store))) r.advance() } // advance updates the count at which the offered measurement will overwrite an // existing exemplar. -func (r *randRes) advance() { +func (r *FixedSizeReservoir) advance() { // Calculate the next value in the random number series. // // The current value of r.w is based on the max of a distribution of random @@ -167,7 +190,7 @@ func (r *randRes) advance() { // therefore the next r.w will be based on the same distribution (i.e. // `max(u_1,u_2,...,u_k)`). Therefore, we can sample the next r.w by // computing the next random number `u` and take r.w as `w * u^(1/k)`. - r.w *= math.Exp(math.Log(random()) / float64(cap(r.store))) + r.w *= math.Exp(math.Log(r.randomFloat64()) / float64(cap(r.store))) // Use the new random number in the series to calculate the count of the // next measurement that will be stored. // @@ -178,10 +201,13 @@ func (r *randRes) advance() { // // Important to note, the new r.next will always be at least 1 more than // the last r.next. - r.next += int64(math.Log(random())/math.Log(1-r.w)) + 1 + r.next += int64(math.Log(r.randomFloat64())/math.Log(1-r.w)) + 1 } -func (r *randRes) Collect(dest *[]Exemplar) { +// Collect returns all the held exemplars. +// +// The Reservoir state is preserved after this call. +func (r *FixedSizeReservoir) Collect(dest *[]Exemplar) { r.storage.Collect(dest) // Call reset here even though it will reset r.count and restart the random // number series. This will persist any old exemplars as long as no new diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/histogram_reservoir.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/histogram_reservoir.go new file mode 100644 index 000000000..3b76cf305 --- /dev/null +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/histogram_reservoir.go @@ -0,0 +1,70 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar" + +import ( + "context" + "slices" + "sort" + "time" + + "go.opentelemetry.io/otel/attribute" +) + +// HistogramReservoirProvider is a provider of [HistogramReservoir]. +func HistogramReservoirProvider(bounds []float64) ReservoirProvider { + cp := slices.Clone(bounds) + slices.Sort(cp) + return func(_ attribute.Set) Reservoir { + return NewHistogramReservoir(cp) + } +} + +// NewHistogramReservoir returns a [HistogramReservoir] that samples the last +// measurement that falls within a histogram bucket. The histogram bucket +// upper-boundaries are define by bounds. +// +// The passed bounds must be sorted before calling this function. +func NewHistogramReservoir(bounds []float64) *HistogramReservoir { + return &HistogramReservoir{ + bounds: bounds, + storage: newStorage(len(bounds) + 1), + } +} + +var _ Reservoir = &HistogramReservoir{} + +// HistogramReservoir is a [Reservoir] that samples the last measurement that +// falls within a histogram bucket. The histogram bucket upper-boundaries are +// define by bounds. +type HistogramReservoir struct { + *storage + + // bounds are bucket bounds in ascending order. + bounds []float64 +} + +// Offer accepts the parameters associated with a measurement. The +// parameters will be stored as an exemplar if the Reservoir decides to +// sample the measurement. +// +// The passed ctx needs to contain any baggage or span that were active +// when the measurement was made. This information may be used by the +// Reservoir in making a sampling decision. +// +// The time t is the time when the measurement was made. The v and a +// parameters are the value and dropped (filtered) attributes of the +// measurement respectively. +func (r *HistogramReservoir) Offer(ctx context.Context, t time.Time, v Value, a []attribute.KeyValue) { + var x float64 + switch v.Type() { + case Int64ValueType: + x = float64(v.Int64()) + case Float64ValueType: + x = v.Float64() + default: + panic("unknown value type") + } + r.store[sort.SearchFloat64s(r.bounds, x)] = newMeasurement(ctx, t, v, a) +} diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/reservoir.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/reservoir.go similarity index 73% rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/reservoir.go rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/reservoir.go index 80fa59554..ba5cd1a6b 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/reservoir.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/reservoir.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" +package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar" import ( "context" @@ -30,3 +30,11 @@ type Reservoir interface { // The Reservoir state is preserved after this call. Collect(dest *[]Exemplar) } + +// ReservoirProvider creates new [Reservoir]s. +// +// The attributes provided are attributes which are kept by the aggregation, and +// are exclusive with attributes passed to Offer. The combination of these +// attributes and the attributes passed to Offer is the complete set of +// attributes a measurement was made with. +type ReservoirProvider func(attr attribute.Set) Reservoir diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/storage.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/storage.go similarity index 94% rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/storage.go rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/storage.go index 10b2976f7..0e2e26dfb 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/storage.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/storage.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" +package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar" import ( "context" @@ -35,7 +35,7 @@ func (r *storage) Collect(dest *[]Exemplar) { continue } - m.Exemplar(&(*dest)[n]) + m.exemplar(&(*dest)[n]) n++ } *dest = (*dest)[:n] @@ -66,8 +66,8 @@ func newMeasurement(ctx context.Context, ts time.Time, v Value, droppedAttr []at } } -// Exemplar returns m as an [Exemplar]. -func (m measurement) Exemplar(dest *Exemplar) { +// exemplar returns m as an [Exemplar]. +func (m measurement) exemplar(dest *Exemplar) { dest.FilteredAttributes = m.FilteredAttributes dest.Time = m.Time dest.Value = m.Value diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/value.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/value.go similarity index 91% rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/value.go rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/value.go index 1957d6b1e..590b089a8 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/value.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/value.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" +package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar" import "math" @@ -28,7 +28,8 @@ type Value struct { func NewValue[N int64 | float64](value N) Value { switch v := any(value).(type) { case int64: - return Value{t: Int64ValueType, val: uint64(v)} + // This can be later converted back to int64 (overflow not checked). + return Value{t: Int64ValueType, val: uint64(v)} // nolint:gosec case float64: return Value{t: Float64ValueType, val: math.Float64bits(v)} } diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/instrument.go b/vendor/go.opentelemetry.io/otel/sdk/metric/instrument.go index b52a330b3..48b723a7b 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/instrument.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/instrument.go @@ -144,6 +144,12 @@ type Stream struct { // Use NewAllowKeysFilter from "go.opentelemetry.io/otel/attribute" to // provide an allow-list of attribute keys here. AttributeFilter attribute.Filter + // ExemplarReservoirProvider selects the + // [go.opentelemetry.io/otel/sdk/metric/exemplar.ReservoirProvider] based + // on the [Aggregation]. + // + // If unspecified, [DefaultExemplarReservoirProviderSelector] is used. + ExemplarReservoirProviderSelector ExemplarReservoirProviderSelector } // instID are the identifying properties of a instrument. @@ -234,8 +240,8 @@ func (i *float64Inst) aggregate(ctx context.Context, val float64, s attribute.Se } } -// observablID is a comparable unique identifier of an observable. -type observablID[N int64 | float64] struct { +// observableID is a comparable unique identifier of an observable. +type observableID[N int64 | float64] struct { name string description string kind InstrumentKind @@ -287,7 +293,7 @@ func newInt64Observable(m *meter, kind InstrumentKind, name, desc, u string) int type observable[N int64 | float64] struct { metric.Observable - observablID[N] + observableID[N] meter *meter measures measures[N] @@ -296,7 +302,7 @@ type observable[N int64 | float64] struct { func newObservable[N int64 | float64](m *meter, kind InstrumentKind, name, desc, u string) *observable[N] { return &observable[N]{ - observablID: observablID[N]{ + observableID: observableID[N]{ name: name, description: desc, kind: kind, diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/aggregate.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/aggregate.go index b18ee719b..fde219333 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/aggregate.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/aggregate.go @@ -8,7 +8,6 @@ import ( "time" "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" "go.opentelemetry.io/otel/sdk/metric/metricdata" ) @@ -38,8 +37,8 @@ type Builder[N int64 | float64] struct { // create new exemplar reservoirs for a new seen attribute set. // // If this is not provided a default factory function that returns an - // exemplar.Drop reservoir will be used. - ReservoirFunc func() exemplar.FilteredReservoir[N] + // dropReservoir reservoir will be used. + ReservoirFunc func(attribute.Set) FilteredExemplarReservoir[N] // AggregationLimit is the cardinality limit of measurement attributes. Any // measurement for new attributes once the limit has been reached will be // aggregated into a single aggregate for the "otel.metric.overflow" @@ -50,12 +49,12 @@ type Builder[N int64 | float64] struct { AggregationLimit int } -func (b Builder[N]) resFunc() func() exemplar.FilteredReservoir[N] { +func (b Builder[N]) resFunc() func(attribute.Set) FilteredExemplarReservoir[N] { if b.ReservoirFunc != nil { return b.ReservoirFunc } - return exemplar.Drop + return dropReservoir } type fltrMeasure[N int64 | float64] func(ctx context.Context, value N, fltrAttr attribute.Set, droppedAttr []attribute.KeyValue) diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/drop.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/drop.go new file mode 100644 index 000000000..8396faaa4 --- /dev/null +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/drop.go @@ -0,0 +1,27 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package aggregate // import "go.opentelemetry.io/otel/sdk/metric/internal/aggregate" + +import ( + "context" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/metric/exemplar" +) + +// dropReservoir returns a [FilteredReservoir] that drops all measurements it is offered. +func dropReservoir[N int64 | float64](attribute.Set) FilteredExemplarReservoir[N] { + return &dropRes[N]{} +} + +type dropRes[N int64 | float64] struct{} + +// Offer does nothing, all measurements offered will be dropped. +func (r *dropRes[N]) Offer(context.Context, N, []attribute.KeyValue) {} + +// Collect resets dest. No exemplars will ever be returned. +func (r *dropRes[N]) Collect(dest *[]exemplar.Exemplar) { + clear(*dest) // Erase elements to let GC collect objects + *dest = (*dest)[:0] +} diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exemplar.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exemplar.go index 170ae8e58..25d709948 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exemplar.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exemplar.go @@ -6,7 +6,7 @@ package aggregate // import "go.opentelemetry.io/otel/sdk/metric/internal/aggreg import ( "sync" - "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" + "go.opentelemetry.io/otel/sdk/metric/exemplar" "go.opentelemetry.io/otel/sdk/metric/metricdata" ) @@ -17,6 +17,7 @@ var exemplarPool = sync.Pool{ func collectExemplars[N int64 | float64](out *[]metricdata.Exemplar[N], f func(*[]exemplar.Exemplar)) { dest := exemplarPool.Get().(*[]exemplar.Exemplar) defer func() { + clear(*dest) // Erase elements to let GC collect objects. *dest = (*dest)[:0] exemplarPool.Put(dest) }() diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exponential_histogram.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exponential_histogram.go index 707342408..b7aa72165 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exponential_histogram.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exponential_histogram.go @@ -12,7 +12,6 @@ import ( "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" "go.opentelemetry.io/otel/sdk/metric/metricdata" ) @@ -31,7 +30,7 @@ const ( // expoHistogramDataPoint is a single data point in an exponential histogram. type expoHistogramDataPoint[N int64 | float64] struct { attrs attribute.Set - res exemplar.FilteredReservoir[N] + res FilteredExemplarReservoir[N] count uint64 min N @@ -284,7 +283,7 @@ func (b *expoBuckets) downscale(delta int32) { // newExponentialHistogram returns an Aggregator that summarizes a set of // measurements as an exponential histogram. Each histogram is scoped by attributes // and the aggregation cycle the measurements were made in. -func newExponentialHistogram[N int64 | float64](maxSize, maxScale int32, noMinMax, noSum bool, limit int, r func() exemplar.FilteredReservoir[N]) *expoHistogram[N] { +func newExponentialHistogram[N int64 | float64](maxSize, maxScale int32, noMinMax, noSum bool, limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *expoHistogram[N] { return &expoHistogram[N]{ noSum: noSum, noMinMax: noMinMax, @@ -307,7 +306,7 @@ type expoHistogram[N int64 | float64] struct { maxSize int maxScale int32 - newRes func() exemplar.FilteredReservoir[N] + newRes func(attribute.Set) FilteredExemplarReservoir[N] limit limiter[*expoHistogramDataPoint[N]] values map[attribute.Distinct]*expoHistogramDataPoint[N] valuesMu sync.Mutex @@ -328,7 +327,7 @@ func (e *expoHistogram[N]) measure(ctx context.Context, value N, fltrAttr attrib v, ok := e.values[attr.Equivalent()] if !ok { v = newExpoHistogramDataPoint[N](attr, e.maxSize, e.maxScale, e.noMinMax, e.noSum) - v.res = e.newRes() + v.res = e.newRes(attr) e.values[attr.Equivalent()] = v } diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/filtered_reservoir.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/filtered_reservoir.go new file mode 100644 index 000000000..691a91060 --- /dev/null +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/filtered_reservoir.go @@ -0,0 +1,50 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package aggregate // import "go.opentelemetry.io/otel/sdk/metric/internal/aggregate" + +import ( + "context" + "time" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/metric/exemplar" +) + +// FilteredExemplarReservoir wraps a [exemplar.Reservoir] with a filter. +type FilteredExemplarReservoir[N int64 | float64] interface { + // Offer accepts the parameters associated with a measurement. The + // parameters will be stored as an exemplar if the filter decides to + // sample the measurement. + // + // The passed ctx needs to contain any baggage or span that were active + // when the measurement was made. This information may be used by the + // Reservoir in making a sampling decision. + Offer(ctx context.Context, val N, attr []attribute.KeyValue) + // Collect returns all the held exemplars in the reservoir. + Collect(dest *[]exemplar.Exemplar) +} + +// filteredExemplarReservoir handles the pre-sampled exemplar of measurements made. +type filteredExemplarReservoir[N int64 | float64] struct { + filter exemplar.Filter + reservoir exemplar.Reservoir +} + +// NewFilteredExemplarReservoir creates a [FilteredExemplarReservoir] which only offers values +// that are allowed by the filter. +func NewFilteredExemplarReservoir[N int64 | float64](f exemplar.Filter, r exemplar.Reservoir) FilteredExemplarReservoir[N] { + return &filteredExemplarReservoir[N]{ + filter: f, + reservoir: r, + } +} + +func (f *filteredExemplarReservoir[N]) Offer(ctx context.Context, val N, attr []attribute.KeyValue) { + if f.filter(ctx) { + // only record the current time if we are sampling this measurement. + f.reservoir.Offer(ctx, time.Now(), exemplar.NewValue(val), attr) + } +} + +func (f *filteredExemplarReservoir[N]) Collect(dest *[]exemplar.Exemplar) { f.reservoir.Collect(dest) } diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/histogram.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/histogram.go index ade0941f5..d577ae2c1 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/histogram.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/histogram.go @@ -11,13 +11,12 @@ import ( "time" "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" "go.opentelemetry.io/otel/sdk/metric/metricdata" ) type buckets[N int64 | float64] struct { attrs attribute.Set - res exemplar.FilteredReservoir[N] + res FilteredExemplarReservoir[N] counts []uint64 count uint64 @@ -48,13 +47,13 @@ type histValues[N int64 | float64] struct { noSum bool bounds []float64 - newRes func() exemplar.FilteredReservoir[N] + newRes func(attribute.Set) FilteredExemplarReservoir[N] limit limiter[*buckets[N]] values map[attribute.Distinct]*buckets[N] valuesMu sync.Mutex } -func newHistValues[N int64 | float64](bounds []float64, noSum bool, limit int, r func() exemplar.FilteredReservoir[N]) *histValues[N] { +func newHistValues[N int64 | float64](bounds []float64, noSum bool, limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *histValues[N] { // The responsibility of keeping all buckets correctly associated with the // passed boundaries is ultimately this type's responsibility. Make a copy // here so we can always guarantee this. Or, in the case of failure, have @@ -94,7 +93,7 @@ func (s *histValues[N]) measure(ctx context.Context, value N, fltrAttr attribute // // buckets = (-∞, 0], (0, 5.0], (5.0, 10.0], (10.0, +∞) b = newBuckets[N](attr, len(s.bounds)+1) - b.res = s.newRes() + b.res = s.newRes(attr) // Ensure min and max are recorded values (not zero), for new buckets. b.min, b.max = value, value @@ -109,7 +108,7 @@ func (s *histValues[N]) measure(ctx context.Context, value N, fltrAttr attribute // newHistogram returns an Aggregator that summarizes a set of measurements as // an histogram. -func newHistogram[N int64 | float64](boundaries []float64, noMinMax, noSum bool, limit int, r func() exemplar.FilteredReservoir[N]) *histogram[N] { +func newHistogram[N int64 | float64](boundaries []float64, noMinMax, noSum bool, limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *histogram[N] { return &histogram[N]{ histValues: newHistValues[N](boundaries, noSum, limit, r), noMinMax: noMinMax, diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/lastvalue.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/lastvalue.go index c35936840..d3a93f085 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/lastvalue.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/lastvalue.go @@ -9,7 +9,6 @@ import ( "time" "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" "go.opentelemetry.io/otel/sdk/metric/metricdata" ) @@ -17,10 +16,10 @@ import ( type datapoint[N int64 | float64] struct { attrs attribute.Set value N - res exemplar.FilteredReservoir[N] + res FilteredExemplarReservoir[N] } -func newLastValue[N int64 | float64](limit int, r func() exemplar.FilteredReservoir[N]) *lastValue[N] { +func newLastValue[N int64 | float64](limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *lastValue[N] { return &lastValue[N]{ newRes: r, limit: newLimiter[datapoint[N]](limit), @@ -33,7 +32,7 @@ func newLastValue[N int64 | float64](limit int, r func() exemplar.FilteredReserv type lastValue[N int64 | float64] struct { sync.Mutex - newRes func() exemplar.FilteredReservoir[N] + newRes func(attribute.Set) FilteredExemplarReservoir[N] limit limiter[datapoint[N]] values map[attribute.Distinct]datapoint[N] start time.Time @@ -46,7 +45,7 @@ func (s *lastValue[N]) measure(ctx context.Context, value N, fltrAttr attribute. attr := s.limit.Attributes(fltrAttr, s.values) d, ok := s.values[attr.Equivalent()] if !ok { - d.res = s.newRes() + d.res = s.newRes(attr) } d.attrs = attr @@ -115,7 +114,7 @@ func (s *lastValue[N]) copyDpts(dest *[]metricdata.DataPoint[N], t time.Time) in // newPrecomputedLastValue returns an aggregator that summarizes a set of // observations as the last one made. -func newPrecomputedLastValue[N int64 | float64](limit int, r func() exemplar.FilteredReservoir[N]) *precomputedLastValue[N] { +func newPrecomputedLastValue[N int64 | float64](limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *precomputedLastValue[N] { return &precomputedLastValue[N]{lastValue: newLastValue[N](limit, r)} } diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/sum.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/sum.go index 891366922..8e132ad61 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/sum.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/sum.go @@ -9,25 +9,24 @@ import ( "time" "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" "go.opentelemetry.io/otel/sdk/metric/metricdata" ) type sumValue[N int64 | float64] struct { n N - res exemplar.FilteredReservoir[N] + res FilteredExemplarReservoir[N] attrs attribute.Set } // valueMap is the storage for sums. type valueMap[N int64 | float64] struct { sync.Mutex - newRes func() exemplar.FilteredReservoir[N] + newRes func(attribute.Set) FilteredExemplarReservoir[N] limit limiter[sumValue[N]] values map[attribute.Distinct]sumValue[N] } -func newValueMap[N int64 | float64](limit int, r func() exemplar.FilteredReservoir[N]) *valueMap[N] { +func newValueMap[N int64 | float64](limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *valueMap[N] { return &valueMap[N]{ newRes: r, limit: newLimiter[sumValue[N]](limit), @@ -42,7 +41,7 @@ func (s *valueMap[N]) measure(ctx context.Context, value N, fltrAttr attribute.S attr := s.limit.Attributes(fltrAttr, s.values) v, ok := s.values[attr.Equivalent()] if !ok { - v.res = s.newRes() + v.res = s.newRes(attr) } v.attrs = attr @@ -55,7 +54,7 @@ func (s *valueMap[N]) measure(ctx context.Context, value N, fltrAttr attribute.S // newSum returns an aggregator that summarizes a set of measurements as their // arithmetic sum. Each sum is scoped by attributes and the aggregation cycle // the measurements were made in. -func newSum[N int64 | float64](monotonic bool, limit int, r func() exemplar.FilteredReservoir[N]) *sum[N] { +func newSum[N int64 | float64](monotonic bool, limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *sum[N] { return &sum[N]{ valueMap: newValueMap[N](limit, r), monotonic: monotonic, @@ -142,9 +141,9 @@ func (s *sum[N]) cumulative(dest *metricdata.Aggregation) int { } // newPrecomputedSum returns an aggregator that summarizes a set of -// observatrions as their arithmetic sum. Each sum is scoped by attributes and +// observations as their arithmetic sum. Each sum is scoped by attributes and // the aggregation cycle the measurements were made in. -func newPrecomputedSum[N int64 | float64](monotonic bool, limit int, r func() exemplar.FilteredReservoir[N]) *precomputedSum[N] { +func newPrecomputedSum[N int64 | float64](monotonic bool, limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *precomputedSum[N] { return &precomputedSum[N]{ valueMap: newValueMap[N](limit, r), monotonic: monotonic, @@ -152,7 +151,7 @@ func newPrecomputedSum[N int64 | float64](monotonic bool, limit int, r func() ex } } -// precomputedSum summarizes a set of observatrions as their arithmetic sum. +// precomputedSum summarizes a set of observations as their arithmetic sum. type precomputedSum[N int64 | float64] struct { *valueMap[N] diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/drop.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/drop.go deleted file mode 100644 index 5a0f39ae1..000000000 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/drop.go +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" - -import ( - "context" - - "go.opentelemetry.io/otel/attribute" -) - -// Drop returns a [FilteredReservoir] that drops all measurements it is offered. -func Drop[N int64 | float64]() FilteredReservoir[N] { return &dropRes[N]{} } - -type dropRes[N int64 | float64] struct{} - -// Offer does nothing, all measurements offered will be dropped. -func (r *dropRes[N]) Offer(context.Context, N, []attribute.KeyValue) {} - -// Collect resets dest. No exemplars will ever be returned. -func (r *dropRes[N]) Collect(dest *[]Exemplar) { - *dest = (*dest)[:0] -} diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filtered_reservoir.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filtered_reservoir.go deleted file mode 100644 index 9fedfa4be..000000000 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filtered_reservoir.go +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" - -import ( - "context" - "time" - - "go.opentelemetry.io/otel/attribute" -) - -// FilteredReservoir wraps a [Reservoir] with a filter. -type FilteredReservoir[N int64 | float64] interface { - // Offer accepts the parameters associated with a measurement. The - // parameters will be stored as an exemplar if the filter decides to - // sample the measurement. - // - // The passed ctx needs to contain any baggage or span that were active - // when the measurement was made. This information may be used by the - // Reservoir in making a sampling decision. - Offer(ctx context.Context, val N, attr []attribute.KeyValue) - // Collect returns all the held exemplars in the reservoir. - Collect(dest *[]Exemplar) -} - -// filteredReservoir handles the pre-sampled exemplar of measurements made. -type filteredReservoir[N int64 | float64] struct { - filter Filter - reservoir Reservoir -} - -// NewFilteredReservoir creates a [FilteredReservoir] which only offers values -// that are allowed by the filter. -func NewFilteredReservoir[N int64 | float64](f Filter, r Reservoir) FilteredReservoir[N] { - return &filteredReservoir[N]{ - filter: f, - reservoir: r, - } -} - -func (f *filteredReservoir[N]) Offer(ctx context.Context, val N, attr []attribute.KeyValue) { - if f.filter(ctx) { - // only record the current time if we are sampling this measurment. - f.reservoir.Offer(ctx, time.Now(), NewValue(val), attr) - } -} - -func (f *filteredReservoir[N]) Collect(dest *[]Exemplar) { f.reservoir.Collect(dest) } diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/hist.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/hist.go deleted file mode 100644 index a6ff86d02..000000000 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/hist.go +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar" - -import ( - "context" - "slices" - "sort" - "time" - - "go.opentelemetry.io/otel/attribute" -) - -// Histogram returns a [Reservoir] that samples the last measurement that falls -// within a histogram bucket. The histogram bucket upper-boundaries are define -// by bounds. -// -// The passed bounds will be sorted by this function. -func Histogram(bounds []float64) Reservoir { - slices.Sort(bounds) - return &histRes{ - bounds: bounds, - storage: newStorage(len(bounds) + 1), - } -} - -type histRes struct { - *storage - - // bounds are bucket bounds in ascending order. - bounds []float64 -} - -func (r *histRes) Offer(ctx context.Context, t time.Time, v Value, a []attribute.KeyValue) { - var x float64 - switch v.Type() { - case Int64ValueType: - x = float64(v.Int64()) - case Float64ValueType: - x = v.Float64() - default: - panic("unknown value type") - } - r.store[sort.SearchFloat64s(r.bounds, x)] = newMeasurement(ctx, t, v, a) -} diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/x/x.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/x/x.go index 8cd2f3741..089199370 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/x/x.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/x/x.go @@ -10,39 +10,23 @@ package x // import "go.opentelemetry.io/otel/sdk/metric/internal/x" import ( "os" "strconv" - "strings" ) -var ( - // Exemplars is an experimental feature flag that defines if exemplars - // should be recorded for metric data-points. - // - // To enable this feature set the OTEL_GO_X_EXEMPLAR environment variable - // to the case-insensitive string value of "true" (i.e. "True" and "TRUE" - // will also enable this). - Exemplars = newFeature("EXEMPLAR", func(v string) (string, bool) { - if strings.ToLower(v) == "true" { - return v, true - } - return "", false - }) - - // CardinalityLimit is an experimental feature flag that defines if - // cardinality limits should be applied to the recorded metric data-points. - // - // To enable this feature set the OTEL_GO_X_CARDINALITY_LIMIT environment - // variable to the integer limit value you want to use. - // - // Setting OTEL_GO_X_CARDINALITY_LIMIT to a value less than or equal to 0 - // will disable the cardinality limits. - CardinalityLimit = newFeature("CARDINALITY_LIMIT", func(v string) (int, bool) { - n, err := strconv.Atoi(v) - if err != nil { - return 0, false - } - return n, true - }) -) +// CardinalityLimit is an experimental feature flag that defines if +// cardinality limits should be applied to the recorded metric data-points. +// +// To enable this feature set the OTEL_GO_X_CARDINALITY_LIMIT environment +// variable to the integer limit value you want to use. +// +// Setting OTEL_GO_X_CARDINALITY_LIMIT to a value less than or equal to 0 +// will disable the cardinality limits. +var CardinalityLimit = newFeature("CARDINALITY_LIMIT", func(v string) (int, bool) { + n, err := strconv.Atoi(v) + if err != nil { + return 0, false + } + return n, true +}) // Feature is an experimental feature control flag. It provides a uniform way // to interact with these feature flags and parse their values. diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/manual_reader.go b/vendor/go.opentelemetry.io/otel/sdk/metric/manual_reader.go index e0fd86ca7..c495985bc 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/manual_reader.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/manual_reader.go @@ -113,18 +113,17 @@ func (mr *ManualReader) Collect(ctx context.Context, rm *metricdata.ResourceMetr if err != nil { return err } - var errs []error for _, producer := range mr.externalProducers.Load().([]Producer) { - externalMetrics, err := producer.Produce(ctx) - if err != nil { - errs = append(errs, err) + externalMetrics, e := producer.Produce(ctx) + if e != nil { + err = errors.Join(err, e) } rm.ScopeMetrics = append(rm.ScopeMetrics, externalMetrics...) } global.Debug("ManualReader collection", "Data", rm) - return unifyErrors(errs) + return err } // MarshalLog returns logging data about the ManualReader. diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/meter.go b/vendor/go.opentelemetry.io/otel/sdk/metric/meter.go index 2309e5b2b..823cdf2c6 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/meter.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/meter.go @@ -150,6 +150,11 @@ func (m *meter) int64ObservableInstrument(id Instrument, callbacks []metric.Int6 continue } inst.appendMeasures(in) + + // Add the measures to the pipeline. It is required to maintain + // measures per pipeline to avoid calling the measure that + // is not part of the pipeline. + insert.pipeline.addInt64Measure(inst.observableID, in) for _, cback := range callbacks { inst := int64Observer{measures: in} fn := cback @@ -309,6 +314,11 @@ func (m *meter) float64ObservableInstrument(id Instrument, callbacks []metric.Fl continue } inst.appendMeasures(in) + + // Add the measures to the pipeline. It is required to maintain + // measures per pipeline to avoid calling the measure that + // is not part of the pipeline. + insert.pipeline.addFloat64Measure(inst.observableID, in) for _, cback := range callbacks { inst := float64Observer{measures: in} fn := cback @@ -441,73 +451,80 @@ func (m *meter) RegisterCallback(f metric.Callback, insts ...metric.Observable) return noopRegister{}, nil } - reg := newObserver() - var errs multierror + var err error + validInstruments := make([]metric.Observable, 0, len(insts)) for _, inst := range insts { - // Unwrap any global. - if u, ok := inst.(interface { - Unwrap() metric.Observable - }); ok { - inst = u.Unwrap() - } - switch o := inst.(type) { case int64Observable: - if err := o.registerable(m); err != nil { - if !errors.Is(err, errEmptyAgg) { - errs.append(err) + if e := o.registerable(m); e != nil { + if !errors.Is(e, errEmptyAgg) { + err = errors.Join(err, e) } continue } - reg.registerInt64(o.observablID) + + validInstruments = append(validInstruments, inst) case float64Observable: - if err := o.registerable(m); err != nil { - if !errors.Is(err, errEmptyAgg) { - errs.append(err) + if e := o.registerable(m); e != nil { + if !errors.Is(e, errEmptyAgg) { + err = errors.Join(err, e) } continue } - reg.registerFloat64(o.observablID) + + validInstruments = append(validInstruments, inst) default: // Instrument external to the SDK. return nil, fmt.Errorf("invalid observable: from different implementation") } } - err := errs.errorOrNil() - if reg.len() == 0 { + if len(validInstruments) == 0 { // All insts use drop aggregation or are invalid. return noopRegister{}, err } - // Some or all instruments were valid. - cback := func(ctx context.Context) error { return f(ctx, reg) } - return m.pipes.registerMultiCallback(cback), err + unregs := make([]func(), len(m.pipes)) + for ix, pipe := range m.pipes { + reg := newObserver(pipe) + for _, inst := range validInstruments { + switch o := inst.(type) { + case int64Observable: + reg.registerInt64(o.observableID) + case float64Observable: + reg.registerFloat64(o.observableID) + } + } + + // Some or all instruments were valid. + cBack := func(ctx context.Context) error { return f(ctx, reg) } + unregs[ix] = pipe.addMultiCallback(cBack) + } + + return unregisterFuncs{f: unregs}, err } type observer struct { embedded.Observer - float64 map[observablID[float64]]struct{} - int64 map[observablID[int64]]struct{} + pipe *pipeline + float64 map[observableID[float64]]struct{} + int64 map[observableID[int64]]struct{} } -func newObserver() observer { +func newObserver(p *pipeline) observer { return observer{ - float64: make(map[observablID[float64]]struct{}), - int64: make(map[observablID[int64]]struct{}), + pipe: p, + float64: make(map[observableID[float64]]struct{}), + int64: make(map[observableID[int64]]struct{}), } } -func (r observer) len() int { - return len(r.float64) + len(r.int64) -} - -func (r observer) registerFloat64(id observablID[float64]) { +func (r observer) registerFloat64(id observableID[float64]) { r.float64[id] = struct{}{} } -func (r observer) registerInt64(id observablID[int64]) { +func (r observer) registerInt64(id observableID[int64]) { r.int64[id] = struct{}{} } @@ -521,22 +538,12 @@ func (r observer) ObserveFloat64(o metric.Float64Observable, v float64, opts ... switch conv := o.(type) { case float64Observable: oImpl = conv - case interface { - Unwrap() metric.Observable - }: - // Unwrap any global. - async := conv.Unwrap() - var ok bool - if oImpl, ok = async.(float64Observable); !ok { - global.Error(errUnknownObserver, "failed to record asynchronous") - return - } default: global.Error(errUnknownObserver, "failed to record") return } - if _, registered := r.float64[oImpl.observablID]; !registered { + if _, registered := r.float64[oImpl.observableID]; !registered { if !oImpl.dropAggregation { global.Error(errUnregObserver, "failed to record", "name", oImpl.name, @@ -548,7 +555,12 @@ func (r observer) ObserveFloat64(o metric.Float64Observable, v float64, opts ... return } c := metric.NewObserveConfig(opts) - oImpl.observe(v, c.Attributes()) + // Access to r.pipe.float64Measure is already guarded by a lock in pipeline.produce. + // TODO (#5946): Refactor pipeline and observable measures. + measures := r.pipe.float64Measures[oImpl.observableID] + for _, m := range measures { + m(context.Background(), v, c.Attributes()) + } } func (r observer) ObserveInt64(o metric.Int64Observable, v int64, opts ...metric.ObserveOption) { @@ -556,22 +568,12 @@ func (r observer) ObserveInt64(o metric.Int64Observable, v int64, opts ...metric switch conv := o.(type) { case int64Observable: oImpl = conv - case interface { - Unwrap() metric.Observable - }: - // Unwrap any global. - async := conv.Unwrap() - var ok bool - if oImpl, ok = async.(int64Observable); !ok { - global.Error(errUnknownObserver, "failed to record asynchronous") - return - } default: global.Error(errUnknownObserver, "failed to record") return } - if _, registered := r.int64[oImpl.observablID]; !registered { + if _, registered := r.int64[oImpl.observableID]; !registered { if !oImpl.dropAggregation { global.Error(errUnregObserver, "failed to record", "name", oImpl.name, @@ -583,7 +585,12 @@ func (r observer) ObserveInt64(o metric.Int64Observable, v int64, opts ...metric return } c := metric.NewObserveConfig(opts) - oImpl.observe(v, c.Attributes()) + // Access to r.pipe.int64Measures is already guarded b a lock in pipeline.produce. + // TODO (#5946): Refactor pipeline and observable measures. + measures := r.pipe.int64Measures[oImpl.observableID] + for _, m := range measures { + m(context.Background(), v, c.Attributes()) + } } type noopRegister struct{ embedded.Registration } diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/periodic_reader.go b/vendor/go.opentelemetry.io/otel/sdk/metric/periodic_reader.go index 67ee1b11a..dcd2182d9 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/periodic_reader.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/periodic_reader.go @@ -251,18 +251,17 @@ func (r *PeriodicReader) collect(ctx context.Context, p interface{}, rm *metricd if err != nil { return err } - var errs []error for _, producer := range r.externalProducers.Load().([]Producer) { - externalMetrics, err := producer.Produce(ctx) - if err != nil { - errs = append(errs, err) + externalMetrics, e := producer.Produce(ctx) + if e != nil { + err = errors.Join(err, e) } rm.ScopeMetrics = append(rm.ScopeMetrics, externalMetrics...) } global.Debug("PeriodicReader collection", "Data", rm) - return unifyErrors(errs) + return err } // export exports metric data m using r's exporter. diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/pipeline.go b/vendor/go.opentelemetry.io/otel/sdk/metric/pipeline.go index 823bf2fe3..775e24526 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/pipeline.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/pipeline.go @@ -8,14 +8,13 @@ import ( "context" "errors" "fmt" - "strings" "sync" "sync/atomic" "go.opentelemetry.io/otel/internal/global" - "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/metric/embedded" "go.opentelemetry.io/otel/sdk/instrumentation" + "go.opentelemetry.io/otel/sdk/metric/exemplar" "go.opentelemetry.io/otel/sdk/metric/internal" "go.opentelemetry.io/otel/sdk/metric/internal/aggregate" "go.opentelemetry.io/otel/sdk/metric/internal/x" @@ -38,14 +37,17 @@ type instrumentSync struct { compAgg aggregate.ComputeAggregation } -func newPipeline(res *resource.Resource, reader Reader, views []View) *pipeline { +func newPipeline(res *resource.Resource, reader Reader, views []View, exemplarFilter exemplar.Filter) *pipeline { if res == nil { res = resource.Empty() } return &pipeline{ - resource: res, - reader: reader, - views: views, + resource: res, + reader: reader, + views: views, + int64Measures: map[observableID[int64]][]aggregate.Measure[int64]{}, + float64Measures: map[observableID[float64]][]aggregate.Measure[float64]{}, + exemplarFilter: exemplarFilter, // aggregations is lazy allocated when needed. } } @@ -63,9 +65,26 @@ type pipeline struct { views []View sync.Mutex - aggregations map[instrumentation.Scope][]instrumentSync - callbacks []func(context.Context) error - multiCallbacks list.List + int64Measures map[observableID[int64]][]aggregate.Measure[int64] + float64Measures map[observableID[float64]][]aggregate.Measure[float64] + aggregations map[instrumentation.Scope][]instrumentSync + callbacks []func(context.Context) error + multiCallbacks list.List + exemplarFilter exemplar.Filter +} + +// addInt64Measure adds a new int64 measure to the pipeline for each observer. +func (p *pipeline) addInt64Measure(id observableID[int64], m []aggregate.Measure[int64]) { + p.Lock() + defer p.Unlock() + p.int64Measures[id] = m +} + +// addFloat64Measure adds a new float64 measure to the pipeline for each observer. +func (p *pipeline) addFloat64Measure(id observableID[float64], m []aggregate.Measure[float64]) { + p.Lock() + defer p.Unlock() + p.float64Measures[id] = m } // addSync adds the instrumentSync to pipeline p with scope. This method is not @@ -105,14 +124,15 @@ func (p *pipeline) produce(ctx context.Context, rm *metricdata.ResourceMetrics) p.Lock() defer p.Unlock() - var errs multierror + var err error for _, c := range p.callbacks { // TODO make the callbacks parallel. ( #3034 ) - if err := c(ctx); err != nil { - errs.append(err) + if e := c(ctx); e != nil { + err = errors.Join(err, e) } if err := ctx.Err(); err != nil { rm.Resource = nil + clear(rm.ScopeMetrics) // Erase elements to let GC collect objects. rm.ScopeMetrics = rm.ScopeMetrics[:0] return err } @@ -120,12 +140,13 @@ func (p *pipeline) produce(ctx context.Context, rm *metricdata.ResourceMetrics) for e := p.multiCallbacks.Front(); e != nil; e = e.Next() { // TODO make the callbacks parallel. ( #3034 ) f := e.Value.(multiCallback) - if err := f(ctx); err != nil { - errs.append(err) + if e := f(ctx); e != nil { + err = errors.Join(err, e) } if err := ctx.Err(); err != nil { // This means the context expired before we finished running callbacks. rm.Resource = nil + clear(rm.ScopeMetrics) // Erase elements to let GC collect objects. rm.ScopeMetrics = rm.ScopeMetrics[:0] return err } @@ -157,7 +178,7 @@ func (p *pipeline) produce(ctx context.Context, rm *metricdata.ResourceMetrics) rm.ScopeMetrics = rm.ScopeMetrics[:i] - return errs.errorOrNil() + return err } // inserter facilitates inserting of new instruments from a single scope into a @@ -219,7 +240,7 @@ func (i *inserter[N]) Instrument(inst Instrument, readerAggregation Aggregation) measures []aggregate.Measure[N] ) - errs := &multierror{wrapped: errCreatingAggregators} + var err error seen := make(map[uint64]struct{}) for _, v := range i.pipeline.views { stream, match := v(inst) @@ -227,9 +248,9 @@ func (i *inserter[N]) Instrument(inst Instrument, readerAggregation Aggregation) continue } matched = true - in, id, err := i.cachedAggregator(inst.Scope, inst.Kind, stream, readerAggregation) - if err != nil { - errs.append(err) + in, id, e := i.cachedAggregator(inst.Scope, inst.Kind, stream, readerAggregation) + if e != nil { + err = errors.Join(err, e) } if in == nil { // Drop aggregation. continue @@ -242,8 +263,12 @@ func (i *inserter[N]) Instrument(inst Instrument, readerAggregation Aggregation) measures = append(measures, in) } + if err != nil { + err = errors.Join(errCreatingAggregators, err) + } + if matched { - return measures, errs.errorOrNil() + return measures, err } // Apply implicit default view if no explicit matched. @@ -252,15 +277,18 @@ func (i *inserter[N]) Instrument(inst Instrument, readerAggregation Aggregation) Description: inst.Description, Unit: inst.Unit, } - in, _, err := i.cachedAggregator(inst.Scope, inst.Kind, stream, readerAggregation) - if err != nil { - errs.append(err) + in, _, e := i.cachedAggregator(inst.Scope, inst.Kind, stream, readerAggregation) + if e != nil { + if err == nil { + err = errCreatingAggregators + } + err = errors.Join(err, e) } if in != nil { // Ensured to have not seen given matched was false. measures = append(measures, in) } - return measures, errs.errorOrNil() + return measures, err } // addCallback registers a single instrument callback to be run when @@ -329,6 +357,9 @@ func (i *inserter[N]) cachedAggregator(scope instrumentation.Scope, kind Instrum // The view explicitly requested the default aggregation. stream.Aggregation = DefaultAggregationSelector(kind) } + if stream.ExemplarReservoirProviderSelector == nil { + stream.ExemplarReservoirProviderSelector = DefaultExemplarReservoirProviderSelector + } if err := isAggregatorCompatible(kind, stream.Aggregation); err != nil { return nil, 0, fmt.Errorf( @@ -349,7 +380,7 @@ func (i *inserter[N]) cachedAggregator(scope instrumentation.Scope, kind Instrum cv := i.aggregators.Lookup(normID, func() aggVal[N] { b := aggregate.Builder[N]{ Temporality: i.pipeline.reader.temporality(kind), - ReservoirFunc: reservoirFunc[N](stream.Aggregation), + ReservoirFunc: reservoirFunc[N](stream.ExemplarReservoirProviderSelector(stream.Aggregation), i.pipeline.exemplarFilter), } b.Filter = stream.AttributeFilter // A value less than or equal to zero will disable the aggregation @@ -552,24 +583,16 @@ func isAggregatorCompatible(kind InstrumentKind, agg Aggregation) error { // measurement. type pipelines []*pipeline -func newPipelines(res *resource.Resource, readers []Reader, views []View) pipelines { +func newPipelines(res *resource.Resource, readers []Reader, views []View, exemplarFilter exemplar.Filter) pipelines { pipes := make([]*pipeline, 0, len(readers)) for _, r := range readers { - p := newPipeline(res, r, views) + p := newPipeline(res, r, views, exemplarFilter) r.register(p) pipes = append(pipes, p) } return pipes } -func (p pipelines) registerMultiCallback(c multiCallback) metric.Registration { - unregs := make([]func(), len(p)) - for i, pipe := range p { - unregs[i] = pipe.addMultiCallback(c) - } - return unregisterFuncs{f: unregs} -} - type unregisterFuncs struct { embedded.Registration f []func() @@ -602,15 +625,15 @@ func newResolver[N int64 | float64](p pipelines, vc *cache[string, instID]) reso func (r resolver[N]) Aggregators(id Instrument) ([]aggregate.Measure[N], error) { var measures []aggregate.Measure[N] - errs := &multierror{} + var err error for _, i := range r.inserters { - in, err := i.Instrument(id, i.readerDefaultAggregation(id.Kind)) - if err != nil { - errs.append(err) + in, e := i.Instrument(id, i.readerDefaultAggregation(id.Kind)) + if e != nil { + err = errors.Join(err, e) } measures = append(measures, in...) } - return measures, errs.errorOrNil() + return measures, err } // HistogramAggregators returns the histogram Aggregators that must be updated by the instrument @@ -619,37 +642,18 @@ func (r resolver[N]) Aggregators(id Instrument) ([]aggregate.Measure[N], error) func (r resolver[N]) HistogramAggregators(id Instrument, boundaries []float64) ([]aggregate.Measure[N], error) { var measures []aggregate.Measure[N] - errs := &multierror{} + var err error for _, i := range r.inserters { agg := i.readerDefaultAggregation(id.Kind) if histAgg, ok := agg.(AggregationExplicitBucketHistogram); ok && len(boundaries) > 0 { histAgg.Boundaries = boundaries agg = histAgg } - in, err := i.Instrument(id, agg) - if err != nil { - errs.append(err) + in, e := i.Instrument(id, agg) + if e != nil { + err = errors.Join(err, e) } measures = append(measures, in...) } - return measures, errs.errorOrNil() -} - -type multierror struct { - wrapped error - errors []string -} - -func (m *multierror) errorOrNil() error { - if len(m.errors) == 0 { - return nil - } - if m.wrapped == nil { - return errors.New(strings.Join(m.errors, "; ")) - } - return fmt.Errorf("%w: %s", m.wrapped, strings.Join(m.errors, "; ")) -} - -func (m *multierror) append(err error) { - m.errors = append(m.errors, err.Error()) + return measures, err } diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/provider.go b/vendor/go.opentelemetry.io/otel/sdk/metric/provider.go index a82af538e..2fca89e5a 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/provider.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/provider.go @@ -42,7 +42,7 @@ func NewMeterProvider(options ...Option) *MeterProvider { flush, sdown := conf.readerSignals() mp := &MeterProvider{ - pipes: newPipelines(conf.res, conf.readers, conf.views), + pipes: newPipelines(conf.res, conf.readers, conf.views, conf.exemplarFilter), forceFlush: flush, shutdown: sdown, } @@ -76,15 +76,17 @@ func (mp *MeterProvider) Meter(name string, options ...metric.MeterOption) metri c := metric.NewMeterConfig(options...) s := instrumentation.Scope{ - Name: name, - Version: c.InstrumentationVersion(), - SchemaURL: c.SchemaURL(), + Name: name, + Version: c.InstrumentationVersion(), + SchemaURL: c.SchemaURL(), + Attributes: c.InstrumentationAttributes(), } global.Info("Meter created", "Name", s.Name, "Version", s.Version, "SchemaURL", s.SchemaURL, + "Attributes", s.Attributes, ) return mp.meters.Lookup(s, func() *meter { diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/version.go b/vendor/go.opentelemetry.io/otel/sdk/metric/version.go index 44316caa1..6347060bf 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/version.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/version.go @@ -5,5 +5,5 @@ package metric // import "go.opentelemetry.io/otel/sdk/metric" // version is the current release version of the metric SDK in use. func version() string { - return "1.29.0" + return "1.32.0" } diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/view.go b/vendor/go.opentelemetry.io/otel/sdk/metric/view.go index cd08c6732..630890f42 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/metric/view.go +++ b/vendor/go.opentelemetry.io/otel/sdk/metric/view.go @@ -96,11 +96,12 @@ func NewView(criteria Instrument, mask Stream) View { return func(i Instrument) (Stream, bool) { if matchFunc(i) { return Stream{ - Name: nonZero(mask.Name, i.Name), - Description: nonZero(mask.Description, i.Description), - Unit: nonZero(mask.Unit, i.Unit), - Aggregation: agg, - AttributeFilter: mask.AttributeFilter, + Name: nonZero(mask.Name, i.Name), + Description: nonZero(mask.Description, i.Description), + Unit: nonZero(mask.Unit, i.Unit), + Aggregation: agg, + AttributeFilter: mask.AttributeFilter, + ExemplarReservoirProviderSelector: mask.ExemplarReservoirProviderSelector, }, true } return Stream{}, false diff --git a/vendor/go.opentelemetry.io/otel/sdk/resource/auto.go b/vendor/go.opentelemetry.io/otel/sdk/resource/auto.go index 95a61d61d..c02aeefdd 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/resource/auto.go +++ b/vendor/go.opentelemetry.io/otel/sdk/resource/auto.go @@ -7,7 +7,6 @@ import ( "context" "errors" "fmt" - "strings" ) // ErrPartialResource is returned by a detector when complete source @@ -57,62 +56,37 @@ func Detect(ctx context.Context, detectors ...Detector) (*Resource, error) { // these errors will be returned. Otherwise, nil is returned. func detect(ctx context.Context, res *Resource, detectors []Detector) error { var ( - r *Resource - errs detectErrs - err error + r *Resource + err error + e error ) for _, detector := range detectors { if detector == nil { continue } - r, err = detector.Detect(ctx) - if err != nil { - errs = append(errs, err) - if !errors.Is(err, ErrPartialResource) { + r, e = detector.Detect(ctx) + if e != nil { + err = errors.Join(err, e) + if !errors.Is(e, ErrPartialResource) { continue } } - r, err = Merge(res, r) - if err != nil { - errs = append(errs, err) + r, e = Merge(res, r) + if e != nil { + err = errors.Join(err, e) } *res = *r } - if len(errs) == 0 { - return nil - } - if errors.Is(errs, ErrSchemaURLConflict) { - // If there has been a merge conflict, ensure the resource has no - // schema URL. - res.schemaURL = "" - } - return errs -} - -type detectErrs []error - -func (e detectErrs) Error() string { - errStr := make([]string, len(e)) - for i, err := range e { - errStr[i] = fmt.Sprintf("* %s", err) - } - - format := "%d errors occurred detecting resource:\n\t%s" - return fmt.Sprintf(format, len(e), strings.Join(errStr, "\n\t")) -} + if err != nil { + if errors.Is(err, ErrSchemaURLConflict) { + // If there has been a merge conflict, ensure the resource has no + // schema URL. + res.schemaURL = "" + } -func (e detectErrs) Unwrap() error { - switch len(e) { - case 0: - return nil - case 1: - return e[0] + err = fmt.Errorf("error detecting resource: %w", err) } - return e[1:] -} - -func (e detectErrs) Is(target error) bool { - return len(e) != 0 && errors.Is(e[0], target) + return err } diff --git a/vendor/go.opentelemetry.io/otel/sdk/resource/builtin.go b/vendor/go.opentelemetry.io/otel/sdk/resource/builtin.go index 6ac1cdbf7..cf3c88e15 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/resource/builtin.go +++ b/vendor/go.opentelemetry.io/otel/sdk/resource/builtin.go @@ -20,15 +20,13 @@ type ( // telemetrySDK is a Detector that provides information about // the OpenTelemetry SDK used. This Detector is included as a // builtin. If these resource attributes are not wanted, use - // the WithTelemetrySDK(nil) or WithoutBuiltin() options to - // explicitly disable them. + // resource.New() to explicitly disable them. telemetrySDK struct{} // host is a Detector that provides information about the host // being run on. This Detector is included as a builtin. If // these resource attributes are not wanted, use the - // WithHost(nil) or WithoutBuiltin() options to explicitly - // disable them. + // resource.New() to explicitly disable them. host struct{} stringDetector struct { diff --git a/vendor/go.opentelemetry.io/otel/sdk/resource/host_id_windows.go b/vendor/go.opentelemetry.io/otel/sdk/resource/host_id_windows.go index 71386e2da..3677c83d7 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/resource/host_id_windows.go +++ b/vendor/go.opentelemetry.io/otel/sdk/resource/host_id_windows.go @@ -10,17 +10,16 @@ import ( "golang.org/x/sys/windows/registry" ) -// implements hostIDReader +// implements hostIDReader. type hostIDReaderWindows struct{} -// read reads MachineGuid from the windows registry key: -// SOFTWARE\Microsoft\Cryptography +// read reads MachineGuid from the Windows registry key: +// SOFTWARE\Microsoft\Cryptography. func (*hostIDReaderWindows) read() (string, error) { k, err := registry.OpenKey( registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Cryptography`, registry.QUERY_VALUE|registry.WOW64_64KEY, ) - if err != nil { return "", err } diff --git a/vendor/go.opentelemetry.io/otel/sdk/resource/os_windows.go b/vendor/go.opentelemetry.io/otel/sdk/resource/os_windows.go index 5e3d199d7..a6a5a53c0 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/resource/os_windows.go +++ b/vendor/go.opentelemetry.io/otel/sdk/resource/os_windows.go @@ -17,7 +17,6 @@ import ( func platformOSDescription() (string, error) { k, err := registry.OpenKey( registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion`, registry.QUERY_VALUE) - if err != nil { return "", err } diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/batch_span_processor.go b/vendor/go.opentelemetry.io/otel/sdk/trace/batch_span_processor.go index 1d399a75d..ccc97e1b6 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/trace/batch_span_processor.go +++ b/vendor/go.opentelemetry.io/otel/sdk/trace/batch_span_processor.go @@ -280,6 +280,7 @@ func (bsp *batchSpanProcessor) exportSpans(ctx context.Context) error { // // It is up to the exporter to implement any type of retry logic if a batch is failing // to be exported, since it is specific to the protocol and backend being sent to. + clear(bsp.batch) // Erase elements to let GC collect objects bsp.batch = bsp.batch[:0] if err != nil { @@ -316,7 +317,11 @@ func (bsp *batchSpanProcessor) processQueue() { bsp.batchMutex.Unlock() if shouldExport { if !bsp.timer.Stop() { - <-bsp.timer.C + // Handle both GODEBUG=asynctimerchan=[0|1] properly. + select { + case <-bsp.timer.C: + default: + } } if err := bsp.exportSpans(ctx); err != nil { otel.Handle(err) diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/evictedqueue.go b/vendor/go.opentelemetry.io/otel/sdk/trace/evictedqueue.go index 821c83faa..8c308dd60 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/trace/evictedqueue.go +++ b/vendor/go.opentelemetry.io/otel/sdk/trace/evictedqueue.go @@ -12,25 +12,26 @@ import ( // evictedQueue is a FIFO queue with a configurable capacity. type evictedQueue[T any] struct { - queue []T - capacity int - droppedCount int - logDropped func() + queue []T + capacity int + droppedCount int + logDroppedMsg string + logDroppedOnce sync.Once } func newEvictedQueueEvent(capacity int) evictedQueue[Event] { // Do not pre-allocate queue, do this lazily. return evictedQueue[Event]{ - capacity: capacity, - logDropped: sync.OnceFunc(func() { global.Warn("limit reached: dropping trace trace.Event") }), + capacity: capacity, + logDroppedMsg: "limit reached: dropping trace trace.Event", } } func newEvictedQueueLink(capacity int) evictedQueue[Link] { // Do not pre-allocate queue, do this lazily. return evictedQueue[Link]{ - capacity: capacity, - logDropped: sync.OnceFunc(func() { global.Warn("limit reached: dropping trace trace.Link") }), + capacity: capacity, + logDroppedMsg: "limit reached: dropping trace trace.Link", } } @@ -53,6 +54,10 @@ func (eq *evictedQueue[T]) add(value T) { eq.queue = append(eq.queue, value) } +func (eq *evictedQueue[T]) logDropped() { + eq.logDroppedOnce.Do(func() { global.Warn(eq.logDroppedMsg) }) +} + // copy returns a copy of the evictedQueue. func (eq *evictedQueue[T]) copy() []T { return slices.Clone(eq.queue) diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/provider.go b/vendor/go.opentelemetry.io/otel/sdk/trace/provider.go index 14c2e5beb..185aa7c08 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/trace/provider.go +++ b/vendor/go.opentelemetry.io/otel/sdk/trace/provider.go @@ -139,9 +139,10 @@ func (p *TracerProvider) Tracer(name string, opts ...trace.TracerOption) trace.T name = defaultTracerName } is := instrumentation.Scope{ - Name: name, - Version: c.InstrumentationVersion(), - SchemaURL: c.SchemaURL(), + Name: name, + Version: c.InstrumentationVersion(), + SchemaURL: c.SchemaURL(), + Attributes: c.InstrumentationAttributes(), } t, ok := func() (trace.Tracer, bool) { @@ -168,7 +169,7 @@ func (p *TracerProvider) Tracer(name string, opts ...trace.TracerOption) trace.T // slowing down all tracing consumers. // - Logging code may be instrumented with tracing and deadlock because it could try // acquiring the same non-reentrant mutex. - global.Info("Tracer created", "name", name, "version", is.Version, "schemaURL", is.SchemaURL) + global.Info("Tracer created", "name", name, "version", is.Version, "schemaURL", is.SchemaURL, "attributes", is.Attributes) } return t } diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/span.go b/vendor/go.opentelemetry.io/otel/sdk/trace/span.go index 4945f5083..17f883c2c 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/trace/span.go +++ b/vendor/go.opentelemetry.io/otel/sdk/trace/span.go @@ -174,6 +174,17 @@ func (s *recordingSpan) IsRecording() bool { s.mu.Lock() defer s.mu.Unlock() + return s.isRecording() +} + +// isRecording returns if this span is being recorded. If this span has ended +// this will return false. +// +// This method assumes s.mu.Lock is held by the caller. +func (s *recordingSpan) isRecording() bool { + if s == nil { + return false + } return s.endTime.IsZero() } @@ -182,11 +193,15 @@ func (s *recordingSpan) IsRecording() bool { // included in the set status when the code is for an error. If this span is // not being recorded than this method does nothing. func (s *recordingSpan) SetStatus(code codes.Code, description string) { - if !s.IsRecording() { + if s == nil { return } + s.mu.Lock() defer s.mu.Unlock() + if !s.isRecording() { + return + } if s.status.Code > code { return } @@ -210,12 +225,15 @@ func (s *recordingSpan) SetStatus(code codes.Code, description string) { // attributes the span is configured to have, the last added attributes will // be dropped. func (s *recordingSpan) SetAttributes(attributes ...attribute.KeyValue) { - if !s.IsRecording() { + if s == nil || len(attributes) == 0 { return } s.mu.Lock() defer s.mu.Unlock() + if !s.isRecording() { + return + } limit := s.tracer.provider.spanLimits.AttributeCountLimit if limit == 0 { @@ -233,7 +251,7 @@ func (s *recordingSpan) SetAttributes(attributes ...attribute.KeyValue) { // Otherwise, add without deduplication. When attributes are read they // will be deduplicated, optimizing the operation. - s.attributes = slices.Grow(s.attributes, len(s.attributes)+len(attributes)) + s.attributes = slices.Grow(s.attributes, len(attributes)) for _, a := range attributes { if !a.Valid() { // Drop all invalid attributes. @@ -280,13 +298,17 @@ func (s *recordingSpan) addOverCapAttrs(limit int, attrs []attribute.KeyValue) { // Do not set a capacity when creating this map. Benchmark testing has // showed this to only add unused memory allocations in general use. - exists := make(map[attribute.Key]int) - s.dedupeAttrsFromRecord(&exists) + exists := make(map[attribute.Key]int, len(s.attributes)) + s.dedupeAttrsFromRecord(exists) // Now that s.attributes is deduplicated, adding unique attributes up to // the capacity of s will not over allocate s.attributes. - sum := len(attrs) + len(s.attributes) - s.attributes = slices.Grow(s.attributes, min(sum, limit)) + + // max size = limit + maxCap := min(len(attrs)+len(s.attributes), limit) + if cap(s.attributes) < maxCap { + s.attributes = slices.Grow(s.attributes, maxCap-cap(s.attributes)) + } for _, a := range attrs { if !a.Valid() { // Drop all invalid attributes. @@ -296,6 +318,7 @@ func (s *recordingSpan) addOverCapAttrs(limit int, attrs []attribute.KeyValue) { if idx, ok := exists[a.Key]; ok { // Perform all updates before dropping, even when at capacity. + a = truncateAttr(s.tracer.provider.spanLimits.AttributeValueLengthLimit, a) s.attributes[idx] = a continue } @@ -386,9 +409,10 @@ func (s *recordingSpan) End(options ...trace.SpanEndOption) { // the span's duration in case some operation below takes a while. et := monotonicEndTime(s.startTime) - // Do relative expensive check now that we have an end time and see if we - // need to do any more processing. - if !s.IsRecording() { + // Lock the span now that we have an end time and see if we need to do any more processing. + s.mu.Lock() + if !s.isRecording() { + s.mu.Unlock() return } @@ -413,10 +437,11 @@ func (s *recordingSpan) End(options ...trace.SpanEndOption) { } if s.executionTracerTaskEnd != nil { + s.mu.Unlock() s.executionTracerTaskEnd() + s.mu.Lock() } - s.mu.Lock() // Setting endTime to non-zero marks the span as ended and not recording. if config.Timestamp().IsZero() { s.endTime = et @@ -450,7 +475,13 @@ func monotonicEndTime(start time.Time) time.Time { // does not change the Span status. If this span is not being recorded or err is nil // than this method does nothing. func (s *recordingSpan) RecordError(err error, opts ...trace.EventOption) { - if s == nil || err == nil || !s.IsRecording() { + if s == nil || err == nil { + return + } + + s.mu.Lock() + defer s.mu.Unlock() + if !s.isRecording() { return } @@ -486,14 +517,23 @@ func recordStackTrace() string { } // AddEvent adds an event with the provided name and options. If this span is -// not being recorded than this method does nothing. +// not being recorded then this method does nothing. func (s *recordingSpan) AddEvent(name string, o ...trace.EventOption) { - if !s.IsRecording() { + if s == nil { + return + } + + s.mu.Lock() + defer s.mu.Unlock() + if !s.isRecording() { return } s.addEvent(name, o...) } +// addEvent adds an event with the provided name and options. +// +// This method assumes s.mu.Lock is held by the caller. func (s *recordingSpan) addEvent(name string, o ...trace.EventOption) { c := trace.NewEventConfig(o...) e := Event{Name: name, Attributes: c.Attributes(), Time: c.Timestamp()} @@ -510,20 +550,21 @@ func (s *recordingSpan) addEvent(name string, o ...trace.EventOption) { e.Attributes = e.Attributes[:limit] } - s.mu.Lock() s.events.add(e) - s.mu.Unlock() } // SetName sets the name of this span. If this span is not being recorded than // this method does nothing. func (s *recordingSpan) SetName(name string) { - if !s.IsRecording() { + if s == nil { return } s.mu.Lock() defer s.mu.Unlock() + if !s.isRecording() { + return + } s.name = name } @@ -579,29 +620,26 @@ func (s *recordingSpan) Attributes() []attribute.KeyValue { func (s *recordingSpan) dedupeAttrs() { // Do not set a capacity when creating this map. Benchmark testing has // showed this to only add unused memory allocations in general use. - exists := make(map[attribute.Key]int) - s.dedupeAttrsFromRecord(&exists) + exists := make(map[attribute.Key]int, len(s.attributes)) + s.dedupeAttrsFromRecord(exists) } // dedupeAttrsFromRecord deduplicates the attributes of s to fit capacity // using record as the record of unique attribute keys to their index. // // This method assumes s.mu.Lock is held by the caller. -func (s *recordingSpan) dedupeAttrsFromRecord(record *map[attribute.Key]int) { +func (s *recordingSpan) dedupeAttrsFromRecord(record map[attribute.Key]int) { // Use the fact that slices share the same backing array. unique := s.attributes[:0] for _, a := range s.attributes { - if idx, ok := (*record)[a.Key]; ok { + if idx, ok := record[a.Key]; ok { unique[idx] = a } else { unique = append(unique, a) - (*record)[a.Key] = len(unique) - 1 + record[a.Key] = len(unique) - 1 } } - // s.attributes have element types of attribute.KeyValue. These types are - // not pointers and they themselves do not contain pointer fields, - // therefore the duplicate values do not need to be zeroed for them to be - // garbage collected. + clear(s.attributes[len(unique):]) // Erase unneeded elements to let GC collect objects. s.attributes = unique } @@ -657,7 +695,7 @@ func (s *recordingSpan) Resource() *resource.Resource { } func (s *recordingSpan) AddLink(link trace.Link) { - if !s.IsRecording() { + if s == nil { return } if !link.SpanContext.IsValid() && len(link.Attributes) == 0 && @@ -665,6 +703,12 @@ func (s *recordingSpan) AddLink(link trace.Link) { return } + s.mu.Lock() + defer s.mu.Unlock() + if !s.isRecording() { + return + } + l := Link{SpanContext: link.SpanContext, Attributes: link.Attributes} // Discard attributes over limit. @@ -678,9 +722,7 @@ func (s *recordingSpan) AddLink(link trace.Link) { l.Attributes = l.Attributes[:limit] } - s.mu.Lock() s.links.add(l) - s.mu.Unlock() } // DroppedAttributes returns the number of attributes dropped by the span @@ -755,12 +797,16 @@ func (s *recordingSpan) snapshot() ReadOnlySpan { } func (s *recordingSpan) addChild() { - if !s.IsRecording() { + if s == nil { return } + s.mu.Lock() + defer s.mu.Unlock() + if !s.isRecording() { + return + } s.childSpanCount++ - s.mu.Unlock() } func (*recordingSpan) private() {} diff --git a/vendor/go.opentelemetry.io/otel/sdk/version.go b/vendor/go.opentelemetry.io/otel/sdk/version.go index b7cede891..0b214d3fe 100644 --- a/vendor/go.opentelemetry.io/otel/sdk/version.go +++ b/vendor/go.opentelemetry.io/otel/sdk/version.go @@ -5,5 +5,5 @@ package sdk // import "go.opentelemetry.io/otel/sdk" // Version is the current release version of the OpenTelemetry SDK in use. func Version() string { - return "1.29.0" + return "1.32.0" } diff --git a/vendor/go.opentelemetry.io/otel/version.go b/vendor/go.opentelemetry.io/otel/version.go index 6d3c7b1f4..59e248161 100644 --- a/vendor/go.opentelemetry.io/otel/version.go +++ b/vendor/go.opentelemetry.io/otel/version.go @@ -5,5 +5,5 @@ package otel // import "go.opentelemetry.io/otel" // Version is the current release version of OpenTelemetry in use. func Version() string { - return "1.31.0" + return "1.32.0" } diff --git a/vendor/go.opentelemetry.io/otel/versions.yaml b/vendor/go.opentelemetry.io/otel/versions.yaml index cdebdb5eb..c04b12f6b 100644 --- a/vendor/go.opentelemetry.io/otel/versions.yaml +++ b/vendor/go.opentelemetry.io/otel/versions.yaml @@ -3,19 +3,13 @@ module-sets: stable-v1: - version: v1.31.0 + version: v1.32.0 modules: - go.opentelemetry.io/otel - go.opentelemetry.io/otel/bridge/opencensus - go.opentelemetry.io/otel/bridge/opencensus/test - go.opentelemetry.io/otel/bridge/opentracing - go.opentelemetry.io/otel/bridge/opentracing/test - - go.opentelemetry.io/otel/example/dice - - go.opentelemetry.io/otel/example/namedtracer - - go.opentelemetry.io/otel/example/opencensus - - go.opentelemetry.io/otel/example/otel-collector - - go.opentelemetry.io/otel/example/passthrough - - go.opentelemetry.io/otel/example/zipkin - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp - go.opentelemetry.io/otel/exporters/otlp/otlptrace @@ -29,12 +23,11 @@ module-sets: - go.opentelemetry.io/otel/sdk/metric - go.opentelemetry.io/otel/trace experimental-metrics: - version: v0.53.0 + version: v0.54.0 modules: - - go.opentelemetry.io/otel/example/prometheus - go.opentelemetry.io/otel/exporters/prometheus experimental-logs: - version: v0.7.0 + version: v0.8.0 modules: - go.opentelemetry.io/otel/log - go.opentelemetry.io/otel/sdk/log @@ -42,7 +35,7 @@ module-sets: - go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp - go.opentelemetry.io/otel/exporters/stdout/stdoutlog experimental-schema: - version: v0.0.10 + version: v0.0.11 modules: - go.opentelemetry.io/otel/schema excluded-modules: diff --git a/vendor/modules.txt b/vendor/modules.txt index 9448d2207..1b6c2fd86 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -202,7 +202,7 @@ github.com/google/uuid ## explicit; go 1.20 github.com/gopacket/gopacket github.com/gopacket/gopacket/layers -# github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 +# github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 ## explicit; go 1.21 github.com/grpc-ecosystem/grpc-gateway/v2/internal/httprule github.com/grpc-ecosystem/grpc-gateway/v2/runtime @@ -222,8 +222,8 @@ github.com/jpillora/backoff # github.com/json-iterator/go v1.1.12 ## explicit; go 1.12 github.com/json-iterator/go -# github.com/klauspost/compress v1.17.9 -## explicit; go 1.20 +# github.com/klauspost/compress v1.17.11 +## explicit; go 1.21 github.com/klauspost/compress github.com/klauspost/compress/flate github.com/klauspost/compress/fse @@ -253,8 +253,8 @@ github.com/mdlayher/ethernet # github.com/minio/md5-simd v1.1.2 ## explicit; go 1.14 github.com/minio/md5-simd -# github.com/minio/minio-go/v7 v7.0.77 -## explicit; go 1.21 +# github.com/minio/minio-go/v7 v7.0.82 +## explicit; go 1.22 github.com/minio/minio-go/v7 github.com/minio/minio-go/v7/pkg/cors github.com/minio/minio-go/v7/pkg/credentials @@ -282,7 +282,7 @@ github.com/munnerz/goautoneg # github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f ## explicit github.com/mwitkow/go-conntrack -# github.com/netobserv/flowlogs-pipeline v1.7.0-community.0.20241126155124-18e017f67001 +# github.com/netobserv/flowlogs-pipeline v1.7.0-community.0.20241217113023-fa0540a1658e ## explicit; go 1.22.3 github.com/netobserv/flowlogs-pipeline/pkg/api github.com/netobserv/flowlogs-pipeline/pkg/config @@ -324,7 +324,7 @@ github.com/netobserv/loki-client-go/pkg/labelutil github.com/netobserv/loki-client-go/pkg/logproto github.com/netobserv/loki-client-go/pkg/metric github.com/netobserv/loki-client-go/pkg/urlutil -# github.com/netobserv/netobserv-ebpf-agent v1.6.1-crc2.0.20241008130234-a20397fb8f88 => github.com/msherif1234/netobserv-ebpf-agent v0.0.0-20241213142900-5c07db3a6c0b +# github.com/netobserv/netobserv-ebpf-agent v1.7.0-community.0.20241213165959-7e7f8c42a3f6 => github.com/msherif1234/netobserv-ebpf-agent v0.0.0-20250107120019-7532f1cb527c ## explicit; go 1.22.3 github.com/netobserv/netobserv-ebpf-agent/pkg/agent github.com/netobserv/netobserv-ebpf-agent/pkg/decode @@ -618,7 +618,7 @@ github.com/xdg-go/stringprep # github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 ## explicit; go 1.15.0 github.com/xrash/smetrics -# go.opentelemetry.io/otel v1.31.0 +# go.opentelemetry.io/otel v1.32.0 ## explicit; go 1.22 go.opentelemetry.io/otel go.opentelemetry.io/otel/attribute @@ -631,8 +631,8 @@ go.opentelemetry.io/otel/internal/global go.opentelemetry.io/otel/propagation go.opentelemetry.io/otel/semconv/v1.21.0 go.opentelemetry.io/otel/semconv/v1.26.0 -# go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.29.0 -## explicit; go 1.21 +# go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0 +## explicit; go 1.22 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/envconfig @@ -665,28 +665,28 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp/internal go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp/internal/envconfig go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp/internal/otlpconfig go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp/internal/retry -# go.opentelemetry.io/otel/metric v1.31.0 +# go.opentelemetry.io/otel/metric v1.32.0 ## explicit; go 1.22 go.opentelemetry.io/otel/metric go.opentelemetry.io/otel/metric/embedded go.opentelemetry.io/otel/metric/noop -# go.opentelemetry.io/otel/sdk v1.29.0 -## explicit; go 1.21 +# go.opentelemetry.io/otel/sdk v1.32.0 +## explicit; go 1.22 go.opentelemetry.io/otel/sdk go.opentelemetry.io/otel/sdk/instrumentation go.opentelemetry.io/otel/sdk/internal/env go.opentelemetry.io/otel/sdk/internal/x go.opentelemetry.io/otel/sdk/resource go.opentelemetry.io/otel/sdk/trace -# go.opentelemetry.io/otel/sdk/metric v1.29.0 -## explicit; go 1.21 +# go.opentelemetry.io/otel/sdk/metric v1.32.0 +## explicit; go 1.22 go.opentelemetry.io/otel/sdk/metric +go.opentelemetry.io/otel/sdk/metric/exemplar go.opentelemetry.io/otel/sdk/metric/internal go.opentelemetry.io/otel/sdk/metric/internal/aggregate -go.opentelemetry.io/otel/sdk/metric/internal/exemplar go.opentelemetry.io/otel/sdk/metric/internal/x go.opentelemetry.io/otel/sdk/metric/metricdata -# go.opentelemetry.io/otel/trace v1.31.0 +# go.opentelemetry.io/otel/trace v1.32.0 ## explicit; go 1.22 go.opentelemetry.io/otel/trace go.opentelemetry.io/otel/trace/embedded @@ -790,10 +790,10 @@ golang.org/x/tools/go/ast/inspector # gomodules.xyz/jsonpatch/v2 v2.4.0 ## explicit; go 1.20 gomodules.xyz/jsonpatch/v2 -# google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 +# google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 ## explicit; go 1.21 google.golang.org/genproto/googleapis/api/httpbody -# google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 +# google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 ## explicit; go 1.21 google.golang.org/genproto/googleapis/rpc/errdetails google.golang.org/genproto/googleapis/rpc/status @@ -1511,4 +1511,4 @@ sigs.k8s.io/structured-merge-diff/v4/value ## explicit; go 1.12 sigs.k8s.io/yaml sigs.k8s.io/yaml/goyaml.v2 -# github.com/netobserv/netobserv-ebpf-agent => github.com/msherif1234/netobserv-ebpf-agent v0.0.0-20241213142900-5c07db3a6c0b +# github.com/netobserv/netobserv-ebpf-agent => github.com/msherif1234/netobserv-ebpf-agent v0.0.0-20250107120019-7532f1cb527c