diff --git a/README.md b/README.md index 9d1e4b0..7923745 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,8 @@ BF16 Tensor Core with Sparsity (TFLOPS) | 1513 | 733 | 624 FP8 (TFLOPS) | N/A | N/A | N/A FP8 Tensor Core (TFLOPS) | ? | 733 | N/A FP8 Tensor Core with Sparsity (TFLOPS) | 3026 | 1466 | N/A +FP4 (TFLOPS) | N/A | N/A | N/A +FP4 Tensor Core (TFLOPS) | N/A | N/A | N/A INT8 (TOPS) | ? | ? | ? INT8 Tensor Core (TOPS) | ? | 733 | 624 INT8 Tensor Core with Sparsity (TOPS) | 3026 | 1466 | 1248 @@ -60,6 +62,7 @@ INT4 Tensor Core with Sparsity (TOPS) | ? | 1466 | ? GPU Name | H100 | L40S | A100 PCIe 80GB Manufacturer | NVIDIA | NVIDIA | NVIDIA Architecture | Hopper | Ada Lovelace | Ampere +Manufacturing Process | ? | ? | ? NVIDIA RT Cores | ? | 142 (3rd gen) | ? NVIDIA Tensor Cores | 456 (4th gen) | 568 (4th gen) | 432 (3rd gen) NVIDIA CUDA Cores | 14592 | 18176 | 6912 @@ -69,4 +72,5 @@ Interconnect Type | PCIe Gen5 | PCIe Gen4 | PCIe Gen4 Encoders and Decoders | 0, 7 | 3, 3 | 0, 5 CUDA Compute Capability | 9 | 8.9 | 8 Power Consumption (W) | 350 | 300 | 300 +Die Size (mm2) | ? | ? | ? diff --git a/data/specs.json b/data/specs.json index c1b0f84..8caa940 100644 --- a/data/specs.json +++ b/data/specs.json @@ -15,6 +15,8 @@ "fp8": { "full_name": "FP8", "unit": "TFLOPS" }, "fp8_tensor_core": { "full_name": "FP8 Tensor Core", "unit": "TFLOPS" }, "fp8_tensor_core_sparsity": { "full_name": "FP8 Tensor Core with Sparsity", "unit": "TFLOPS" }, + "fp4": { "full_name": "FP4", "unit": "TFLOPS" }, + "fp4_tensor_core": { "full_name": "FP4 Tensor Core", "unit": "TFLOPS" }, "int8": { "full_name": "INT8", "unit": "TOPS" }, "int8_tensor_core": { "full_name": "INT8 Tensor Core", "unit": "TOPS" }, "int8_tensor_core_sparsity": { "full_name": "INT8 Tensor Core with Sparsity", "unit": "TOPS" }, @@ -23,6 +25,7 @@ "int4_tensor_core_sparsity": { "full_name": "INT4 Tensor Core with Sparsity", "unit": "TOPS" }, "manufacturer": { "full_name": "Manufacturer" }, "architecture": { "full_name": "Architecture" }, + "process": { "full_name": "Manufacturing Process" }, "nvidia_rt_cores": { "full_name": "NVIDIA RT Cores"}, "nvidia_rt_cores_generation": { "full_name": "NVIDIA RT Cores Generation"}, "nvidia_tensor_cores": { "full_name": "NVIDIA Tensor Cores" }, @@ -33,7 +36,8 @@ "interconnect": { "full_name": "Interconnect Type" }, "encoders_decoders": { "full_name": "Encoders and Decoders" }, "cuda_compute_capability": { "full_name": "CUDA Compute Capability" }, - "power_consumption": { "full_name": "Power Consumption", "unit": "W" } + "power_consumption": { "full_name": "Power Consumption", "unit": "W" }, + "die_size": { "full_name": "Die Size", "unit": "mm2" } }, "h100": { "name": "H100", @@ -50,6 +54,8 @@ "bf16_tensor_core_sparsity": 1513, "fp8": 0, "fp8_tensor_core": null, + "fp4": 0, + "fp4_tensor_core": 0, "fp8_tensor_core_sparsity": 3026, "int8": null, "int8_tensor_core": null, @@ -59,6 +65,7 @@ "int4_tensor_core_sparsity": null, "manufacturer": "NVIDIA", "architecture": "Hopper", + "process": null, "nvidia_rt_cores": null, "nvidia_rt_cores_generation": null, "nvidia_tensor_cores": 456, @@ -70,6 +77,7 @@ "encoders_decoders": "0, 7", "cuda_compute_capability": "9", "power_consumption": 350, + "die_size": null, "sources": [ "https://getdeploying.com/reference/cloud-gpu/nvidia-h100", "https://www.techpowerup.com/gpu-specs/h100-pcie-80-gb.c3899", @@ -92,6 +100,8 @@ "fp8": 0, "fp8_tensor_core": 733, "fp8_tensor_core_sparsity": 1466, + "fp4": 0, + "fp4_tensor_core": 0, "int8": null, "int8_tensor_core": 733, "int8_tensor_core_sparsity": 1466, @@ -100,6 +110,7 @@ "int4_tensor_core_sparsity": 1466, "manufacturer": "NVIDIA", "architecture": "Ada Lovelace", + "process": null, "nvidia_rt_cores": 142, "nvidia_rt_cores_generation": 3, "nvidia_tensor_cores": 568, @@ -111,6 +122,7 @@ "encoders_decoders": "3, 3", "cuda_compute_capability": "8.9", "power_consumption": 300, + "die_size": null, "sources": [ "https://resources.nvidia.com/en-us-l40s/l40s-datasheet-28413", "https://www.techpowerup.com/gpu-specs/l40s.c4173" @@ -132,6 +144,8 @@ "fp8": 0, "fp8_tensor_core": 242, "fp8_tensor_core_sparsity": 485, + "fp4": 0, + "fp4_tensor_core": 0, "int8": null, "int8_tensor_core": 242, "int8_tensor_core_sparsity": 485, @@ -140,6 +154,7 @@ "int4_tensor_core_sparsity": null, "manufacturer": "NVIDIA", "architecture": "Ada Lovelace", + "process": null, "nvidia_rt_cores": 60, "nvidia_rt_cores_generation": 3, "nvidia_tensor_cores": 240, @@ -151,6 +166,7 @@ "encoders_decoders": "2, 4", "cuda_compute_capability": "8.9", "power_consumption": 72, + "die_size": null, "sources": [ "https://resources.nvidia.com/en-us-data-center-overview/l4-gpu-datasheet", "https://www.techpowerup.com/gpu-specs/l4.c4091" @@ -172,6 +188,8 @@ "fp8": 0, "fp8_tensor_core": 0, "fp8_tensor_core_sparsity": 0, + "fp4": 0, + "fp4_tensor_core": 0, "int8": null, "int8_tensor_core": 624, "int8_tensor_core_sparsity": 1248, @@ -180,6 +198,7 @@ "int4_tensor_core_sparsity": null, "manufacturer": "NVIDIA", "architecture": "Ampere", + "process": null, "nvidia_rt_cores": null, "nvidia_rt_cores_generation": null, "nvidia_tensor_cores": 432, @@ -191,6 +210,7 @@ "encoders_decoders": "0, 5", "cuda_compute_capability": "8.0", "power_consumption": 250, + "die_size": null, "sources": [ "https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf" ] @@ -211,6 +231,8 @@ "fp8": 0, "fp8_tensor_core": 0, "fp8_tensor_core_sparsity": 0, + "fp4": 0, + "fp4_tensor_core": 0, "int8": null, "int8_tensor_core": 624, "int8_tensor_core_sparsity": 1248, @@ -219,6 +241,7 @@ "int4_tensor_core_sparsity": null, "manufacturer": "NVIDIA", "architecture": "Ampere", + "process": null, "nvidia_rt_cores": null, "nvidia_rt_cores_generation": null, "nvidia_tensor_cores": 432, @@ -230,6 +253,7 @@ "encoders_decoders": "0, 5", "cuda_compute_capability": "8", "power_consumption": 300, + "die_size": null, "sources": [ "https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf", "https://www.techpowerup.com/gpu-specs/a100-pcie-80-gb.c3821", @@ -252,6 +276,8 @@ "fp8": 0, "fp8_tensor_core": 0, "fp8_tensor_core_sparsity": 0, + "fp4": 0, + "fp4_tensor_core": 0, "int8": null, "int8_tensor_core": 624, "int8_tensor_core_sparsity": 1248, @@ -260,6 +286,7 @@ "int4_tensor_core_sparsity": null, "manufacturer": "NVIDIA", "architecture": "Ampere", + "process": null, "nvidia_rt_cores": null, "nvidia_rt_cores_generation": null, "nvidia_tensor_cores": 432, @@ -271,6 +298,7 @@ "encoders_decoders": "0, 5", "cuda_compute_capability": "8.0", "power_consumption": 400, + "die_size": null, "sources": [ "https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf" ] @@ -291,6 +319,8 @@ "fp8": 0, "fp8_tensor_core": 0, "fp8_tensor_core_sparsity": 0, + "fp4": 0, + "fp4_tensor_core": 0, "int8": null, "int8_tensor_core": 624, "int8_tensor_core_sparsity": 1248, @@ -299,6 +329,7 @@ "int4_tensor_core_sparsity": null, "manufacturer": "NVIDIA", "architecture": "Ampere", + "process": null, "nvidia_rt_cores": null, "nvidia_rt_cores_generation": null, "nvidia_tensor_cores": 432, @@ -310,6 +341,7 @@ "encoders_decoders": "0, 5", "cuda_compute_capability": "8", "power_consumption": 400, + "die_size": null, "sources": [ "https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf", "https://www.techpowerup.com/gpu-specs/a100-pcie-80-gb.c3821", @@ -332,6 +364,8 @@ "fp8": 0, "fp8_tensor_core": 0, "fp8_tensor_core_sparsity": 0, + "fp4": 0, + "fp4_tensor_core": 0, "int8": null, "int8_tensor_core": 250, "int8_tensor_core_sparsity": 500, @@ -340,6 +374,7 @@ "int4_tensor_core_sparsity": 1000, "manufacturer": "NVIDIA", "architecture": "Ampere", + "process": null, "nvidia_rt_cores": 72, "nvidia_rt_cores_generation": 2, "nvidia_tensor_cores": 288, @@ -351,6 +386,7 @@ "encoders_decoders": "1, 2", "cuda_compute_capability": "8.6", "power_consumption": 150, + "die_size": null, "sources": [ "https://resources.nvidia.com/en-us-gpu/a10-datasheet-nvidia", "https://www.techpowerup.com/gpu-specs/a10-pcie.c3793", @@ -373,6 +409,8 @@ "fp8": 0, "fp8_tensor_core": 0, "fp8_tensor_core_sparsity": 0, + "fp4": 0, + "fp4_tensor_core": 0, "int8": 130, "int8_tensor_core": null, "int8_tensor_core_sparsity": null, @@ -381,6 +419,7 @@ "int4_tensor_core_sparsity": null, "manufacturer": "NVIDIA", "architecture": "Turing", + "process": null, "nvidia_rt_cores": null, "nvidia_rt_cores_generation": null, "nvidia_tensor_cores": 320, @@ -392,6 +431,7 @@ "encoders_decoders": "1, 2", "cuda_compute_capability": "7.5", "power_consumption": 70, + "die_size": null, "sources": [ "https://getdeploying.com/reference/cloud-gpu/nvidia-t4", "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new", @@ -414,6 +454,8 @@ "fp8": 0, "fp8_tensor_core": 0, "fp8_tensor_core_sparsity": 0, + "fp4": 0, + "fp4_tensor_core": 0, "int8": null, "int8_tensor_core": null, "int8_tensor_core_sparsity": null, @@ -422,6 +464,7 @@ "int4_tensor_core_sparsity": null, "manufacturer": "NVIDIA", "architecture": "Turing", + "process": null, "nvidia_rt_cores": 48, "nvidia_rt_cores_generation": null, "nvidia_tensor_cores": 384, @@ -433,6 +476,7 @@ "encoders_decoders": "1, 2", "cuda_compute_capability": "7.5", "power_consumption": 230, + "die_size": null, "sources": [ "https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/quadro-product-literature/quadro-rtx-5000-data-sheet-us-nvidia-704120-r4-web.pdf", "https://www.techpowerup.com/gpu-specs/quadro-rtx-5000.c3308" @@ -454,6 +498,8 @@ "fp8": 0, "fp8_tensor_core": 0, "fp8_tensor_core_sparsity": 0, + "fp4": 0, + "fp4_tensor_core": 0, "int8": null, "int8_tensor_core": 0, "int8_tensor_core_sparsity": 0, @@ -462,6 +508,7 @@ "int4_tensor_core_sparsity": 0, "manufacturer": "NVIDIA", "architecture": "Volta", + "process": null, "nvidia_rt_cores": 0, "nvidia_tensor_cores": 640, "nvidia_tensor_cores_generation": 1, @@ -472,6 +519,7 @@ "encoders_decoders": "3, 1", "cuda_compute_capability": "7", "power_consumption": 250, + "die_size": null, "sources": [ "https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf", "https://www.techpowerup.com/gpu-specs/tesla-v100-pcie-32-gb.c3184", @@ -494,6 +542,8 @@ "fp8": 0, "fp8_tensor_core": 0, "fp8_tensor_core_sparsity": 0, + "fp4": 0, + "fp4_tensor_core": 0, "int8": null, "int8_tensor_core": 0, "int8_tensor_core_sparsity": 0, @@ -502,6 +552,7 @@ "int4_tensor_core_sparsity": 0, "manufacturer": "NVIDIA", "architecture": "Volta", + "process": null, "nvidia_rt_cores": 0, "nvidia_tensor_cores": 640, "nvidia_tensor_cores_generation": 1, @@ -512,6 +563,7 @@ "encoders_decoders": "3, 1", "cuda_compute_capability": "7", "power_consumption": 300, + "die_size": null, "sources": [ "https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf", "https://www.techpowerup.com/gpu-specs/tesla-v100-sxm2-16-gb.c3018", @@ -535,6 +587,8 @@ "fp8": 0, "fp8_tensor_core": 0, "fp8_tensor_core_sparsity": 0, + "fp4": 0, + "fp4_tensor_core": 0, "int8": null, "int8_tensor_core": 0, "int8_tensor_core_sparsity": 0, @@ -543,6 +597,7 @@ "int4_tensor_core_sparsity": 0, "manufacturer": "NVIDIA", "architecture": "Volta", + "process": null, "nvidia_rt_cores": 0, "nvidia_tensor_cores": 640, "nvidia_tensor_cores_generation": 1, @@ -553,10 +608,55 @@ "encoders_decoders": "3, 1", "cuda_compute_capability": "7", "power_consumption": 250, + "die_size": null, "sources": [ "https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf", "https://www.techpowerup.com/gpu-specs/tesla-v100s-pcie-32-gb.c3584", "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" ] - } + }, + "RTX4090": { + "name": "RTX4090", + "fp64": 2.3, + "fp64_tensor_core": null, + "fp32": 73.2, + "tf32_tensor_core": 292.8, + "tf32_tensor_core_sparsity": null, + "fp16": 292.8, + "fp16_tensor_core": 1171.2, + "fp16_tensor_core_sparsity": null, + "bf16": null, + "bf16_tensor_core": null, + "bf16_tensor_core_sparsity": null, + "fp8": null, + "fp8_tensor_core": null, + "fp4": null, + "fp4_tensor_core": null, + "fp8_tensor_core_sparsity": null, + "int8": 292.8, + "int8_tensor_core": 1171.2, + "int8_tensor_core_sparsity": null, + "int4": null, + "int4_tensor_core": null, + "int4_tensor_core_sparsity": null, + "manufacturer": "NVIDIA", + "architecture": "Ada Lovelace", + "process": "TSMC 4N", + "nvidia_rt_cores": 128, + "nvidia_rt_cores_generation": 4, + "nvidia_tensor_cores": 512, + "nvidia_tensor_cores_generation": 4, + "nvidia_cuda_cores": 16384, + "gpu_memory": 24, + "memory_bandwidth": 1008, + "interconnect": "PCIe Gen4", + "encoders_decoders": "2 NVENC 8.0, 1 NVDEC", + "cuda_compute_capability": "8.9", + "power_consumption": 450, + "die_size": null, + "sources": [ + "https://www.techpowerup.com/gpu-specs/rtx-4090.c3883", + "https://www.nvidia.com/en-us/geforce/graphics-cards/40-series/rtx-4090/" + ] + } } diff --git a/specs.md b/specs.md index 3ad04a7..692df66 100644 --- a/specs.md +++ b/specs.md @@ -1,40 +1,44 @@ # GPU Specs -Attribute (Unit) | H100 | L40S | L4 | A100 PCIe 40GB | A100 PCIe 80GB | A100 SXM4 40GB | A100 SXM4 80GB | A10 | T4 | Quadro RTX 5000 | V100 PCIe | V100 SXM2 | V100S PCIe ---- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- -FP64 (TFLOPS) | 25.6 | 1.4 | 0.5 | 9.7 | 9.7 | 9.7 | 9.7 | 1 | ? | 0.3 | 7.1 | 7.8 | 8.2 -FP64 Tensor Core (TFLOPS) | 51 | ? | ? | 19.5 | 19.5 | 19.5 | 19.5 | ? | N/A | N/A | N/A | N/A | N/A -FP32 (TFLOPS) | 51.2 | 91.6 | 30.3 | 19.5 | 19.5 | 19.5 | 19.5 | 31.2 | 8.1 | 11.2 | 14.1 | 15.7 | 16.4 -TF32 Tensor Core (TFLOPS) | ? | 183 | 60 | 156 | 156 | 156 | 156 | 62.5 | N/A | N/A | 112 | 125 | N/A -TF32 Tensor Core with Sparsity (TFLOPS) | 756 | 366 | 120 | 312 | 312 | 312 | 312 | 125 | N/A | N/A | N/A | N/A | N/A -FP16 (TFLOPS) | 204.9 | 91.6 | 30.3 | 78 | 78 | 78 | 78 | 31.2 | 65 | 22.3 | 28.3 | 31.3 | 32.8 -FP16 Tensor Core (TFLOPS) | ? | 362 | 121 | 312 | 312 | 312 | 312 | ? | ? | ? | 112 | 125 | 130 -FP16 Tensor Core with Sparsity (TFLOPS) | ? | 733 | 242 | 624 | 624 | 624 | 624 | ? | ? | ? | ? | ? | ? -BF16 (TFLOPS) | ? | ? | ? | ? | ? | ? | ? | ? | N/A | N/A | N/A | N/A | N/A -BF16 Tensor Core (TFLOPS) | ? | 362 | 121 | 312 | 312 | 312 | 312 | 125 | N/A | N/A | N/A | N/A | N/A -BF16 Tensor Core with Sparsity (TFLOPS) | 1513 | 733 | 242 | 624 | 624 | 624 | 624 | 250 | N/A | N/A | N/A | N/A | N/A -FP8 (TFLOPS) | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A -FP8 Tensor Core (TFLOPS) | ? | 733 | 242 | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A -FP8 Tensor Core with Sparsity (TFLOPS) | 3026 | 1466 | 485 | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A -INT8 (TOPS) | ? | ? | ? | ? | ? | ? | ? | ? | 130 | ? | ? | ? | ? -INT8 Tensor Core (TOPS) | ? | 733 | 242 | 624 | 624 | 624 | 624 | 250 | ? | ? | N/A | N/A | N/A -INT8 Tensor Core with Sparsity (TOPS) | 3026 | 1466 | 485 | 1248 | 1248 | 1248 | 1248 | 500 | ? | ? | N/A | N/A | N/A -INT4 (TOPS) | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A -INT4 Tensor Core (TOPS) | ? | 733 | ? | ? | ? | ? | ? | 500 | 260 | ? | N/A | N/A | N/A -INT4 Tensor Core with Sparsity (TOPS) | ? | 1466 | ? | ? | ? | ? | ? | 1000 | ? | ? | N/A | N/A | N/A -**Architecture Details** | | | | | | | | | | | | | | -GPU Name | H100 | L40S | L4 | A100 PCIe 40GB | A100 PCIe 80GB | A100 SXM4 40GB | A100 SXM4 80GB | A10 | T4 | Quadro RTX 5000 | V100 PCIe | V100 SXM2 | V100S PCIe -Manufacturer | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA -Architecture | Hopper | Ada Lovelace | Ada Lovelace | Ampere | Ampere | Ampere | Ampere | Ampere | Turing | Turing | Volta | Volta | Volta -NVIDIA RT Cores | ? | 142 (3rd gen) | 60 (3rd gen) | ? | ? | ? | ? | 72 (2nd gen) | ? | 48 | N/A | N/A | N/A -NVIDIA Tensor Cores | 456 (4th gen) | 568 (4th gen) | 240 (4th gen) | 432 (3rd gen) | 432 (3rd gen) | 432 (3rd gen) | 432 (3rd gen) | 288 (3rd gen) | 320 (2nd gen) | 384 (2nd gen) | 640 (1st gen) | 640 (1st gen) | 640 (1st gen) -NVIDIA CUDA Cores | 14592 | 18176 | 7424 | 6912 | 6912 | 6912 | 6912 | 9216 | 2560 | 3072 | 5120 | 5120 | 5120 -GPU Memory (GB) | 80 | 48 | 24 | 40 | 80 | 40 | 80 | 24 | 16 | 16 | 16/32 | 16/32 | 32 -Memory Bandwidth (GB/s) | 2048 | 864 | 300 | 1555 | 1935 | 1555 | 2039 | 600 | 300 | 448 | 900 | 900 | 1134 -Interconnect Type | PCIe Gen5 | PCIe Gen4 | PCIe Gen4 | PCIe Gen4 | PCIe Gen4 | NVLink | NVLink | PCIe Gen4 | PCIe Gen3 | PCIe Gen3 | PCIe Gen3 | NVLink | PCIe Gen3 -Encoders and Decoders | 0, 7 | 3, 3 | 2, 4 | 0, 5 | 0, 5 | 0, 5 | 0, 5 | 1, 2 | 1, 2 | 1, 2 | 3, 1 | 3, 1 | 3, 1 -CUDA Compute Capability | 9 | 8.9 | 8.9 | 8.0 | 8 | 8.0 | 8 | 8.6 | 7.5 | 7.5 | 7 | 7 | 7 -Power Consumption (W) | 350 | 300 | 72 | 250 | 300 | 400 | 400 | 150 | 70 | 230 | 250 | 300 | 250 +Attribute (Unit) | H100 | L40S | L4 | A100 PCIe 40GB | A100 PCIe 80GB | A100 SXM4 40GB | A100 SXM4 80GB | A10 | T4 | Quadro RTX 5000 | V100 PCIe | V100 SXM2 | V100S PCIe | RTX4090 +--- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- +FP64 (TFLOPS) | 25.6 | 1.4 | 0.5 | 9.7 | 9.7 | 9.7 | 9.7 | 1 | ? | 0.3 | 7.1 | 7.8 | 8.2 | 2.3 +FP64 Tensor Core (TFLOPS) | 51 | ? | ? | 19.5 | 19.5 | 19.5 | 19.5 | ? | N/A | N/A | N/A | N/A | N/A | ? +FP32 (TFLOPS) | 51.2 | 91.6 | 30.3 | 19.5 | 19.5 | 19.5 | 19.5 | 31.2 | 8.1 | 11.2 | 14.1 | 15.7 | 16.4 | 73.2 +TF32 Tensor Core (TFLOPS) | ? | 183 | 60 | 156 | 156 | 156 | 156 | 62.5 | N/A | N/A | 112 | 125 | N/A | 292.8 +TF32 Tensor Core with Sparsity (TFLOPS) | 756 | 366 | 120 | 312 | 312 | 312 | 312 | 125 | N/A | N/A | N/A | N/A | N/A | ? +FP16 (TFLOPS) | 204.9 | 91.6 | 30.3 | 78 | 78 | 78 | 78 | 31.2 | 65 | 22.3 | 28.3 | 31.3 | 32.8 | 292.8 +FP16 Tensor Core (TFLOPS) | ? | 362 | 121 | 312 | 312 | 312 | 312 | ? | ? | ? | 112 | 125 | 130 | 1171.2 +FP16 Tensor Core with Sparsity (TFLOPS) | ? | 733 | 242 | 624 | 624 | 624 | 624 | ? | ? | ? | ? | ? | ? | ? +BF16 (TFLOPS) | ? | ? | ? | ? | ? | ? | ? | ? | N/A | N/A | N/A | N/A | N/A | ? +BF16 Tensor Core (TFLOPS) | ? | 362 | 121 | 312 | 312 | 312 | 312 | 125 | N/A | N/A | N/A | N/A | N/A | ? +BF16 Tensor Core with Sparsity (TFLOPS) | 1513 | 733 | 242 | 624 | 624 | 624 | 624 | 250 | N/A | N/A | N/A | N/A | N/A | ? +FP8 (TFLOPS) | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | ? +FP8 Tensor Core (TFLOPS) | ? | 733 | 242 | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | ? +FP8 Tensor Core with Sparsity (TFLOPS) | 3026 | 1466 | 485 | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | ? +FP4 (TFLOPS) | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | ? +FP4 Tensor Core (TFLOPS) | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | ? +INT8 (TOPS) | ? | ? | ? | ? | ? | ? | ? | ? | 130 | ? | ? | ? | ? | 292.8 +INT8 Tensor Core (TOPS) | ? | 733 | 242 | 624 | 624 | 624 | 624 | 250 | ? | ? | N/A | N/A | N/A | 1171.2 +INT8 Tensor Core with Sparsity (TOPS) | 3026 | 1466 | 485 | 1248 | 1248 | 1248 | 1248 | 500 | ? | ? | N/A | N/A | N/A | ? +INT4 (TOPS) | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | ? +INT4 Tensor Core (TOPS) | ? | 733 | ? | ? | ? | ? | ? | 500 | 260 | ? | N/A | N/A | N/A | ? +INT4 Tensor Core with Sparsity (TOPS) | ? | 1466 | ? | ? | ? | ? | ? | 1000 | ? | ? | N/A | N/A | N/A | ? +**Architecture Details** | | | | | | | | | | | | | | | +GPU Name | H100 | L40S | L4 | A100 PCIe 40GB | A100 PCIe 80GB | A100 SXM4 40GB | A100 SXM4 80GB | A10 | T4 | Quadro RTX 5000 | V100 PCIe | V100 SXM2 | V100S PCIe | RTX4090 +Manufacturer | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA | NVIDIA +Architecture | Hopper | Ada Lovelace | Ada Lovelace | Ampere | Ampere | Ampere | Ampere | Ampere | Turing | Turing | Volta | Volta | Volta | Ada Lovelace +Manufacturing Process | ? | ? | ? | ? | ? | ? | ? | ? | ? | ? | ? | ? | ? | TSMC 4N +NVIDIA RT Cores | ? | 142 (3rd gen) | 60 (3rd gen) | ? | ? | ? | ? | 72 (2nd gen) | ? | 48 | N/A | N/A | N/A | 128 (4th gen) +NVIDIA Tensor Cores | 456 (4th gen) | 568 (4th gen) | 240 (4th gen) | 432 (3rd gen) | 432 (3rd gen) | 432 (3rd gen) | 432 (3rd gen) | 288 (3rd gen) | 320 (2nd gen) | 384 (2nd gen) | 640 (1st gen) | 640 (1st gen) | 640 (1st gen) | 512 (4th gen) +NVIDIA CUDA Cores | 14592 | 18176 | 7424 | 6912 | 6912 | 6912 | 6912 | 9216 | 2560 | 3072 | 5120 | 5120 | 5120 | 16384 +GPU Memory (GB) | 80 | 48 | 24 | 40 | 80 | 40 | 80 | 24 | 16 | 16 | 16/32 | 16/32 | 32 | 24 +Memory Bandwidth (GB/s) | 2048 | 864 | 300 | 1555 | 1935 | 1555 | 2039 | 600 | 300 | 448 | 900 | 900 | 1134 | 1008 +Interconnect Type | PCIe Gen5 | PCIe Gen4 | PCIe Gen4 | PCIe Gen4 | PCIe Gen4 | NVLink | NVLink | PCIe Gen4 | PCIe Gen3 | PCIe Gen3 | PCIe Gen3 | NVLink | PCIe Gen3 | PCIe Gen4 +Encoders and Decoders | 0, 7 | 3, 3 | 2, 4 | 0, 5 | 0, 5 | 0, 5 | 0, 5 | 1, 2 | 1, 2 | 1, 2 | 3, 1 | 3, 1 | 3, 1 | 2 NVENC 8.0, 1 NVDEC +CUDA Compute Capability | 9 | 8.9 | 8.9 | 8.0 | 8 | 8.0 | 8 | 8.6 | 7.5 | 7.5 | 7 | 7 | 7 | 8.9 +Power Consumption (W) | 350 | 300 | 72 | 250 | 300 | 400 | 400 | 150 | 70 | 230 | 250 | 300 | 250 | 450 +Die Size (mm2) | ? | ? | ? | ? | ? | ? | ? | ? | ? | ? | ? | ? | ? | ? ## Data Conventions