diff --git a/README.md b/README.md index 687728d..0f6500b 100644 --- a/README.md +++ b/README.md @@ -37,26 +37,21 @@ Below is a sample of the GPU specifications data included in the repository: Attribute (Unit) | H100 | L40S | A100 PCIe 80GB --- | --- | --- | --- FP64 (TFLOPS) | 25.6 | 1.4 | 9.7 -FP64 Tensor Core (TFLOPS) | 51 | ? | 19.5 +FP64 Tensor Core (TFLOPS) | 51 | 1.4 | 19.5 FP32 (TFLOPS) | 51.2 | 91.6 | 19.5 TF32 Tensor Core (TFLOPS) | ? | 183 | 156 TF32 Tensor Core with Sparsity (TFLOPS) | 756 | 366 | 312 FP16 (TFLOPS) | 204.9 | 91.6 | 78 FP16 Tensor Core (TFLOPS) | ? | 362 | 312 FP16 Tensor Core with Sparsity (TFLOPS) | ? | 733 | 624 -BF16 (TFLOPS) | ? | ? | ? BF16 Tensor Core (TFLOPS) | ? | 362 | 312 BF16 Tensor Core with Sparsity (TFLOPS) | 1513 | 733 | 624 -FP8 (TFLOPS) | N/A | N/A | N/A FP8 Tensor Core (TFLOPS) | ? | 733 | N/A FP8 Tensor Core with Sparsity (TFLOPS) | 3026 | 1466 | N/A -FP4 (TFLOPS) | N/A | N/A | N/A FP4 Tensor Core (TFLOPS) | N/A | N/A | N/A FP4 Tensor Core with Sparsity (TFLOPS) | N/A | N/A | N/A -INT8 (TOPS) | ? | ? | ? INT8 Tensor Core (TOPS) | ? | 733 | 624 INT8 Tensor Core with Sparsity (TOPS) | 3026 | 1466 | 1248 -INT4 (TOPS) | N/A | N/A | N/A INT4 Tensor Core (TOPS) | ? | 733 | ? INT4 Tensor Core with Sparsity (TOPS) | ? | 1466 | ? **Architecture Details** | | | | @@ -68,6 +63,7 @@ NVIDIA RT Cores | ? | 142 (3rd gen) | ? NVIDIA Tensor Cores | 456 (4th gen) | 568 (4th gen) | 432 (3rd gen) NVIDIA CUDA Cores | 14592 | 18176 | 6912 GPU Memory (GB) | 80 | 48 | 80 +Memory Type | HBM2e | GDDR6 | HBM2e Memory Bandwidth (GB/s) | 2048 | 864 | 1935 Interconnect Type | PCIe Gen5 | PCIe Gen4 | PCIe Gen4 Encoders and Decoders | 0, 7 | 3, 3 | 0, 5 diff --git a/data/specs.json b/data/specs.json index 0022f1e..a07d81e 100644 --- a/data/specs.json +++ b/data/specs.json @@ -1,678 +1,618 @@ { - "_header": { - "name": { "full_name": "GPU Name" }, - "fp64": { "full_name": "FP64", "unit": "TFLOPS" }, - "fp64_tensor_core": { "full_name": "FP64 Tensor Core", "unit": "TFLOPS" }, - "fp32": { "full_name": "FP32", "unit": "TFLOPS" }, - "tf32_tensor_core": { "full_name": "TF32 Tensor Core", "unit": "TFLOPS" }, - "tf32_tensor_core_sparsity": { "full_name": "TF32 Tensor Core with Sparsity", "unit": "TFLOPS" }, - "fp16": { "full_name": "FP16", "unit": "TFLOPS" }, - "fp16_tensor_core": { "full_name": "FP16 Tensor Core", "unit": "TFLOPS" }, - "fp16_tensor_core_sparsity": { "full_name": "FP16 Tensor Core with Sparsity", "unit": "TFLOPS" }, - "bf16": { "full_name": "BF16", "unit": "TFLOPS" }, - "bf16_tensor_core": { "full_name": "BF16 Tensor Core", "unit": "TFLOPS" }, - "bf16_tensor_core_sparsity": { "full_name": "BF16 Tensor Core with Sparsity", "unit": "TFLOPS" }, - "fp8": { "full_name": "FP8", "unit": "TFLOPS" }, - "fp8_tensor_core": { "full_name": "FP8 Tensor Core", "unit": "TFLOPS" }, - "fp8_tensor_core_sparsity": { "full_name": "FP8 Tensor Core with Sparsity", "unit": "TFLOPS" }, - "fp4": { "full_name": "FP4", "unit": "TFLOPS" }, - "fp4_tensor_core": { "full_name": "FP4 Tensor Core", "unit": "TFLOPS" }, - "fp4_tensor_core_sparsity": { "full_name": "FP4 Tensor Core with Sparsity", "unit": "TFLOPS" }, - "int8": { "full_name": "INT8", "unit": "TOPS" }, - "int8_tensor_core": { "full_name": "INT8 Tensor Core", "unit": "TOPS" }, - "int8_tensor_core_sparsity": { "full_name": "INT8 Tensor Core with Sparsity", "unit": "TOPS" }, - "int4": { "full_name": "INT4", "unit": "TOPS" }, - "int4_tensor_core": { "full_name": "INT4 Tensor Core", "unit": "TOPS" }, - "int4_tensor_core_sparsity": { "full_name": "INT4 Tensor Core with Sparsity", "unit": "TOPS" }, - "manufacturer": { "full_name": "Manufacturer" }, - "architecture": { "full_name": "Architecture" }, - "process": { "full_name": "Manufacturing Process" }, - "nvidia_rt_cores": { "full_name": "NVIDIA RT Cores"}, - "nvidia_rt_cores_generation": { "full_name": "NVIDIA RT Cores Generation"}, - "nvidia_tensor_cores": { "full_name": "NVIDIA Tensor Cores" }, - "nvidia_tensor_cores_generation": { "full_name": "NVIDIA Tensor Cores Generation" }, - "nvidia_cuda_cores": { "full_name": "NVIDIA CUDA Cores" }, - "gpu_memory": { "full_name": "GPU Memory", "unit": "GB" }, - "memory_bandwidth": { "full_name": "Memory Bandwidth", "unit": "GB/s" }, - "interconnect": { "full_name": "Interconnect Type" }, - "encoders_decoders": { "full_name": "Encoders and Decoders" }, - "cuda_compute_capability": { "full_name": "CUDA Compute Capability" }, - "power_consumption": { "full_name": "Power Consumption", "unit": "W" }, - "die_size": { "full_name": "Die Size", "unit": "mm2" } - }, - "h100": { - "name": "H100", - "fp64": 25.6, - "fp64_tensor_core": 51, - "fp32": 51.2, - "tf32_tensor_core": null, - "tf32_tensor_core_sparsity": 756, - "fp16": 204.9, - "fp16_tensor_core": null, - "fp16_tensor_core_sparsity": null, - "bf16": null, - "bf16_tensor_core": null, - "bf16_tensor_core_sparsity": 1513, - "fp8": 0, - "fp8_tensor_core": null, - "fp8_tensor_core_sparsity": 3026, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": null, - "int8_tensor_core": null, - "int8_tensor_core_sparsity": 3026, - "int4": 0, - "int4_tensor_core": null, - "int4_tensor_core_sparsity": null, - "manufacturer": "NVIDIA", - "architecture": "Hopper", - "process": null, - "nvidia_rt_cores": null, - "nvidia_rt_cores_generation": null, - "nvidia_tensor_cores": 456, - "nvidia_tensor_cores_generation": 4, - "nvidia_cuda_cores": 14592, - "gpu_memory": 80, - "memory_bandwidth": 2048, - "interconnect": "PCIe Gen5", - "encoders_decoders": "0, 7", - "cuda_compute_capability": "9", - "power_consumption": 350, - "die_size": null, - "sources": [ - "https://getdeploying.com/reference/cloud-gpu/nvidia-h100", - "https://www.techpowerup.com/gpu-specs/h100-pcie-80-gb.c3899", - "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" - ] - }, - "l40s": { - "name": "L40S", - "fp64": 1.4, - "fp64_tensor_core": null, - "fp32": 91.6, - "tf32_tensor_core": 183, - "tf32_tensor_core_sparsity": 366, - "fp16": 91.6, - "fp16_tensor_core": 362, - "fp16_tensor_core_sparsity": 733, - "bf16": null, - "bf16_tensor_core": 362, - "bf16_tensor_core_sparsity": 733, - "fp8": 0, - "fp8_tensor_core": 733, - "fp8_tensor_core_sparsity": 1466, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": null, - "int8_tensor_core": 733, - "int8_tensor_core_sparsity": 1466, - "int4": 0, - "int4_tensor_core": 733, - "int4_tensor_core_sparsity": 1466, - "manufacturer": "NVIDIA", - "architecture": "Ada Lovelace", - "process": null, - "nvidia_rt_cores": 142, - "nvidia_rt_cores_generation": 3, - "nvidia_tensor_cores": 568, - "nvidia_tensor_cores_generation": 4, - "nvidia_cuda_cores": 18176, - "gpu_memory": 48, - "memory_bandwidth": 864, - "interconnect": "PCIe Gen4", - "encoders_decoders": "3, 3", - "cuda_compute_capability": "8.9", - "power_consumption": 300, - "die_size": null, - "sources": [ - "https://resources.nvidia.com/en-us-l40s/l40s-datasheet-28413", - "https://www.techpowerup.com/gpu-specs/l40s.c4173" - ] - }, - "l4": { - "name": "L4", - "fp64": 0.5, - "fp64_tensor_core": null, - "fp32": 30.3, - "tf32_tensor_core": 60, - "tf32_tensor_core_sparsity": 120, - "fp16": 30.3, - "fp16_tensor_core": 121, - "fp16_tensor_core_sparsity": 242, - "bf16": null, - "bf16_tensor_core": 121, - "bf16_tensor_core_sparsity": 242, - "fp8": 0, - "fp8_tensor_core": 242, - "fp8_tensor_core_sparsity": 485, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": null, - "int8_tensor_core": 242, - "int8_tensor_core_sparsity": 485, - "int4": 0, - "int4_tensor_core": null, - "int4_tensor_core_sparsity": null, - "manufacturer": "NVIDIA", - "architecture": "Ada Lovelace", - "process": null, - "nvidia_rt_cores": 60, - "nvidia_rt_cores_generation": 3, - "nvidia_tensor_cores": 240, - "nvidia_tensor_cores_generation": 4, - "nvidia_cuda_cores": 7424, - "gpu_memory": 24, - "memory_bandwidth": 300, - "interconnect": "PCIe Gen4", - "encoders_decoders": "2, 4", - "cuda_compute_capability": "8.9", - "power_consumption": 72, - "die_size": null, - "sources": [ - "https://resources.nvidia.com/en-us-data-center-overview/l4-gpu-datasheet", - "https://www.techpowerup.com/gpu-specs/l4.c4091" - ] - }, - "rtx4090": { - "name": "GeForce RTX 4090", - "fp64": 1.3, - "fp64_tensor_core": null, - "fp32": 82.6, - "tf32_tensor_core": 82.6, - "tf32_tensor_core_sparsity": 165.2, - "fp16": 82.6, - "fp16_tensor_core": 330.3, - "fp16_tensor_core_sparsity": 660.6, - "bf16": null, - "bf16_tensor_core": 165.2, - "bf16_tensor_core_sparsity": 330.4, - "fp8": null, - "fp8_tensor_core": 660.6, - "fp8_tensor_core_sparsity": 1321.2, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": 292.8, - "int8_tensor_core": 660.6, - "int8_tensor_core_sparsity": 1321.2, - "int4": null, - "int4_tensor_core": 1321.2, - "int4_tensor_core_sparsity": 2642.4, - "manufacturer": "NVIDIA", - "architecture": "Ada Lovelace", - "process": "TSMC 4N", - "nvidia_rt_cores": 128, - "nvidia_rt_cores_generation": 3, - "nvidia_tensor_cores": 512, - "nvidia_tensor_cores_generation": 4, - "nvidia_cuda_cores": 16384, - "gpu_memory": 24, - "memory_bandwidth": 1008, - "interconnect": "PCIe Gen4", - "encoders_decoders": "2, 1", - "cuda_compute_capability": "8.9", - "power_consumption": 450, - "die_size": 608.5, - "sources": [ - "https://www.techpowerup.com/gpu-specs/geforce-rtx-4090.c3889", - "https://images.nvidia.com/aem-dam/Solutions/geforce/ada/nvidia-ada-gpu-architecture.pdf", - "https://developer.nvidia.com/cuda-gpus" - ] - }, - "a100_pcie_40gb": { - "name": "A100 PCIe 40GB", - "fp64": 9.7, - "fp64_tensor_core": 19.5, - "fp32": 19.5, - "tf32_tensor_core": 156, - "tf32_tensor_core_sparsity": 312, - "fp16": 78, - "fp16_tensor_core": 312, - "fp16_tensor_core_sparsity": 624, - "bf16": null, - "bf16_tensor_core": 312, - "bf16_tensor_core_sparsity": 624, - "fp8": 0, - "fp8_tensor_core": 0, - "fp8_tensor_core_sparsity": 0, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": null, - "int8_tensor_core": 624, - "int8_tensor_core_sparsity": 1248, - "int4": 0, - "int4_tensor_core": null, - "int4_tensor_core_sparsity": null, - "manufacturer": "NVIDIA", - "architecture": "Ampere", - "process": null, - "nvidia_rt_cores": null, - "nvidia_rt_cores_generation": null, - "nvidia_tensor_cores": 432, - "nvidia_tensor_cores_generation": 3, - "nvidia_cuda_cores": 6912, - "gpu_memory": 40, - "memory_bandwidth": 1555, - "interconnect": "PCIe Gen4", - "encoders_decoders": "0, 5", - "cuda_compute_capability": "8.0", - "power_consumption": 250, - "die_size": null, - "sources": [ - "https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf" - ] - }, - "a100_pcie_80gb": { - "name": "A100 PCIe 80GB", - "fp64": 9.7, - "fp64_tensor_core": 19.5, - "fp32": 19.5, - "tf32_tensor_core": 156, - "tf32_tensor_core_sparsity": 312, - "fp16": 78, - "fp16_tensor_core": 312, - "fp16_tensor_core_sparsity": 624, - "bf16": null, - "bf16_tensor_core": 312, - "bf16_tensor_core_sparsity": 624, - "fp8": 0, - "fp8_tensor_core": 0, - "fp8_tensor_core_sparsity": 0, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": null, - "int8_tensor_core": 624, - "int8_tensor_core_sparsity": 1248, - "int4": 0, - "int4_tensor_core": null, - "int4_tensor_core_sparsity": null, - "manufacturer": "NVIDIA", - "architecture": "Ampere", - "process": null, - "nvidia_rt_cores": null, - "nvidia_rt_cores_generation": null, - "nvidia_tensor_cores": 432, - "nvidia_tensor_cores_generation": 3, - "nvidia_cuda_cores": 6912, - "gpu_memory": 80, - "memory_bandwidth": 1935, - "interconnect": "PCIe Gen4", - "encoders_decoders": "0, 5", - "cuda_compute_capability": "8", - "power_consumption": 300, - "die_size": null, - "sources": [ - "https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf", - "https://www.techpowerup.com/gpu-specs/a100-pcie-80-gb.c3821", - "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" - ] - }, - "a100_sxm4_40gb": { - "name": "A100 SXM4 40GB", - "fp64": 9.7, - "fp64_tensor_core": 19.5, - "fp32": 19.5, - "tf32_tensor_core": 156, - "tf32_tensor_core_sparsity": 312, - "fp16": 78, - "fp16_tensor_core": 312, - "fp16_tensor_core_sparsity": 624, - "bf16": null, - "bf16_tensor_core": 312, - "bf16_tensor_core_sparsity": 624, - "fp8": 0, - "fp8_tensor_core": 0, - "fp8_tensor_core_sparsity": 0, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": null, - "int8_tensor_core": 624, - "int8_tensor_core_sparsity": 1248, - "int4": 0, - "int4_tensor_core": null, - "int4_tensor_core_sparsity": null, - "manufacturer": "NVIDIA", - "architecture": "Ampere", - "process": null, - "nvidia_rt_cores": null, - "nvidia_rt_cores_generation": null, - "nvidia_tensor_cores": 432, - "nvidia_tensor_cores_generation": 3, - "nvidia_cuda_cores": 6912, - "gpu_memory": 40, - "memory_bandwidth": 1555, - "interconnect": "NVLink", - "encoders_decoders": "0, 5", - "cuda_compute_capability": "8.0", - "power_consumption": 400, - "die_size": null, - "sources": [ - "https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf" - ] - }, - "a100_sxm4_80gb": { - "name": "A100 SXM4 80GB", - "fp64": 9.7, - "fp64_tensor_core": 19.5, - "fp32": 19.5, - "tf32_tensor_core": 156, - "tf32_tensor_core_sparsity": 312, - "fp16": 78, - "fp16_tensor_core": 312, - "fp16_tensor_core_sparsity": 624, - "bf16": null, - "bf16_tensor_core": 312, - "bf16_tensor_core_sparsity": 624, - "fp8": 0, - "fp8_tensor_core": 0, - "fp8_tensor_core_sparsity": 0, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": null, - "int8_tensor_core": 624, - "int8_tensor_core_sparsity": 1248, - "int4": 0, - "int4_tensor_core": null, - "int4_tensor_core_sparsity": null, - "manufacturer": "NVIDIA", - "architecture": "Ampere", - "process": null, - "nvidia_rt_cores": null, - "nvidia_rt_cores_generation": null, - "nvidia_tensor_cores": 432, - "nvidia_tensor_cores_generation": 3, - "nvidia_cuda_cores": 6912, - "gpu_memory": 80, - "memory_bandwidth": 2039, - "interconnect": "NVLink", - "encoders_decoders": "0, 5", - "cuda_compute_capability": "8", - "power_consumption": 400, - "die_size": null, - "sources": [ - "https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf", - "https://www.techpowerup.com/gpu-specs/a100-pcie-80-gb.c3821", - "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" - ] - }, - "a10": { - "name": "A10", - "fp64": 1, - "fp64_tensor_core": null, - "fp32": 31.2, - "tf32_tensor_core": 62.5, - "tf32_tensor_core_sparsity": 125, - "fp16": 31.2, - "fp16_tensor_core": null, - "fp16_tensor_core_sparsity": null, - "bf16": null, - "bf16_tensor_core": 125, - "bf16_tensor_core_sparsity": 250, - "fp8": 0, - "fp8_tensor_core": 0, - "fp8_tensor_core_sparsity": 0, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": null, - "int8_tensor_core": 250, - "int8_tensor_core_sparsity": 500, - "int4": 0, - "int4_tensor_core": 500, - "int4_tensor_core_sparsity": 1000, - "manufacturer": "NVIDIA", - "architecture": "Ampere", - "process": null, - "nvidia_rt_cores": 72, - "nvidia_rt_cores_generation": 2, - "nvidia_tensor_cores": 288, - "nvidia_tensor_cores_generation": 3, - "nvidia_cuda_cores": 9216, - "gpu_memory": 24, - "memory_bandwidth": 600, - "interconnect": "PCIe Gen4", - "encoders_decoders": "1, 2", - "cuda_compute_capability": "8.6", - "power_consumption": 150, - "die_size": null, - "sources": [ - "https://resources.nvidia.com/en-us-gpu/a10-datasheet-nvidia", - "https://www.techpowerup.com/gpu-specs/a10-pcie.c3793", - "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" - ] - }, - "t4": { - "name": "T4", - "fp64": null, - "fp64_tensor_core": 0, - "fp32": 8.1, - "tf32_tensor_core": 0, - "tf32_tensor_core_sparsity": 0, - "fp16": 65, - "fp16_tensor_core": null, - "fp16_tensor_core_sparsity": null, - "bf16": 0, - "bf16_tensor_core": 0, - "bf16_tensor_core_sparsity": 0, - "fp8": 0, - "fp8_tensor_core": 0, - "fp8_tensor_core_sparsity": 0, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": 130, - "int8_tensor_core": null, - "int8_tensor_core_sparsity": null, - "int4": 0, - "int4_tensor_core": 260, - "int4_tensor_core_sparsity": null, - "manufacturer": "NVIDIA", - "architecture": "Turing", - "process": null, - "nvidia_rt_cores": null, - "nvidia_rt_cores_generation": null, - "nvidia_tensor_cores": 320, - "nvidia_tensor_cores_generation": 2, - "nvidia_cuda_cores": 2560, - "gpu_memory": 16, - "memory_bandwidth": 300, - "interconnect": "PCIe Gen3", - "encoders_decoders": "1, 2", - "cuda_compute_capability": "7.5", - "power_consumption": 70, - "die_size": null, - "sources": [ - "https://getdeploying.com/reference/cloud-gpu/nvidia-t4", - "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new", - "https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/solutions/resources/documents1/Datasheet_NVIDIA_T4_Virtualization.pdf" - ] - }, - "quadro_rtx_5000": { - "name": "Quadro RTX 5000", - "fp64": 0.3, - "fp64_tensor_core": 0, - "fp32": 11.2, - "tf32_tensor_core": 0, - "tf32_tensor_core_sparsity": 0, - "fp16": 22.3, - "fp16_tensor_core": null, - "fp16_tensor_core_sparsity": null, - "bf16": 0, - "bf16_tensor_core": 0, - "bf16_tensor_core_sparsity": 0, - "fp8": 0, - "fp8_tensor_core": 0, - "fp8_tensor_core_sparsity": 0, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": null, - "int8_tensor_core": null, - "int8_tensor_core_sparsity": null, - "int4": 0, - "int4_tensor_core": null, - "int4_tensor_core_sparsity": null, - "manufacturer": "NVIDIA", - "architecture": "Turing", - "process": null, - "nvidia_rt_cores": 48, - "nvidia_rt_cores_generation": null, - "nvidia_tensor_cores": 384, - "nvidia_tensor_cores_generation": 2, - "nvidia_cuda_cores": 3072, - "gpu_memory": 16, - "memory_bandwidth": 448, - "interconnect": "PCIe Gen3", - "encoders_decoders": "1, 2", - "cuda_compute_capability": "7.5", - "power_consumption": 230, - "die_size": null, - "sources": [ - "https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/quadro-product-literature/quadro-rtx-5000-data-sheet-us-nvidia-704120-r4-web.pdf", - "https://www.techpowerup.com/gpu-specs/quadro-rtx-5000.c3308" - ] - }, - "v100_pcie": { - "name": "V100 PCIe", - "fp64": 7.1, - "fp64_tensor_core": 0, - "fp32": 14.1, - "tf32_tensor_core": 112, - "tf32_tensor_core_sparsity": 0, - "fp16": 28.3, - "fp16_tensor_core": 112, - "fp16_tensor_core_sparsity": null, - "bf16": 0, - "bf16_tensor_core": 0, - "bf16_tensor_core_sparsity": 0, - "fp8": 0, - "fp8_tensor_core": 0, - "fp8_tensor_core_sparsity": 0, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": null, - "int8_tensor_core": 0, - "int8_tensor_core_sparsity": 0, - "int4": 0, - "int4_tensor_core": 0, - "int4_tensor_core_sparsity": 0, - "manufacturer": "NVIDIA", - "architecture": "Volta", - "process": null, - "nvidia_rt_cores": 0, - "nvidia_tensor_cores": 640, - "nvidia_tensor_cores_generation": 1, - "nvidia_cuda_cores": 5120, - "gpu_memory": "16/32", - "memory_bandwidth": 900, - "interconnect": "PCIe Gen3", - "encoders_decoders": "3, 1", - "cuda_compute_capability": "7", - "power_consumption": 250, - "die_size": null, - "sources": [ - "https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf", - "https://www.techpowerup.com/gpu-specs/tesla-v100-pcie-32-gb.c3184", - "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" - ] - }, - "v100_sxm2": { - "name": "V100 SXM2", - "fp64": 7.8, - "fp64_tensor_core": 0, - "fp32": 15.7, - "tf32_tensor_core": 125, - "tf32_tensor_core_sparsity": 0, - "fp16": 31.3, - "fp16_tensor_core": 125, - "fp16_tensor_core_sparsity": null, - "bf16": 0, - "bf16_tensor_core": 0, - "bf16_tensor_core_sparsity": 0, - "fp8": 0, - "fp8_tensor_core": 0, - "fp8_tensor_core_sparsity": 0, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": null, - "int8_tensor_core": 0, - "int8_tensor_core_sparsity": 0, - "int4": 0, - "int4_tensor_core": 0, - "int4_tensor_core_sparsity": 0, - "manufacturer": "NVIDIA", - "architecture": "Volta", - "process": null, - "nvidia_rt_cores": 0, - "nvidia_tensor_cores": 640, - "nvidia_tensor_cores_generation": 1, - "nvidia_cuda_cores": 5120, - "gpu_memory": "16/32", - "memory_bandwidth": 900, - "interconnect": "NVLink", - "encoders_decoders": "3, 1", - "cuda_compute_capability": "7", - "power_consumption": 300, - "die_size": null, - "sources": [ - "https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf", - "https://www.techpowerup.com/gpu-specs/tesla-v100-sxm2-16-gb.c3018", - "https://www.techpowerup.com/gpu-specs/tesla-v100-sxm2-32-gb.c3183", - "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" - ] - }, - "v100s_pcie": { - "name": "V100S PCIe", - "fp64": 8.2, - "fp64_tensor_core": 0, - "fp32": 16.4, - "tf32_tensor_core": 0, - "tf32_tensor_core_sparsity": 0, - "fp16": 32.8, - "fp16_tensor_core": 130, - "fp16_tensor_core_sparsity": null, - "bf16": 0, - "bf16_tensor_core": 0, - "bf16_tensor_core_sparsity": 0, - "fp8": 0, - "fp8_tensor_core": 0, - "fp8_tensor_core_sparsity": 0, - "fp4": 0, - "fp4_tensor_core": 0, - "fp4_tensor_core_sparsity": 0, - "int8": null, - "int8_tensor_core": 0, - "int8_tensor_core_sparsity": 0, - "int4": 0, - "int4_tensor_core": 0, - "int4_tensor_core_sparsity": 0, - "manufacturer": "NVIDIA", - "architecture": "Volta", - "process": null, - "nvidia_rt_cores": 0, - "nvidia_tensor_cores": 640, - "nvidia_tensor_cores_generation": 1, - "nvidia_cuda_cores": 5120, - "gpu_memory": 32, - "memory_bandwidth": 1134, - "interconnect": "PCIe Gen3", - "encoders_decoders": "3, 1", - "cuda_compute_capability": "7", - "power_consumption": 250, - "die_size": null, - "sources": [ - "https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf", - "https://www.techpowerup.com/gpu-specs/tesla-v100s-pcie-32-gb.c3584", - "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" - ] - } -} + "_header": { + "name": { "full_name": "GPU Name" }, + "fp64": { "full_name": "FP64", "unit": "TFLOPS" }, + "fp64_tensor_core": { "full_name": "FP64 Tensor Core", "unit": "TFLOPS" }, + "fp32": { "full_name": "FP32", "unit": "TFLOPS" }, + "tf32_tensor_core": { "full_name": "TF32 Tensor Core", "unit": "TFLOPS" }, + "tf32_tensor_core_sparsity": { "full_name": "TF32 Tensor Core with Sparsity", "unit": "TFLOPS" }, + "fp16": { "full_name": "FP16", "unit": "TFLOPS" }, + "fp16_tensor_core": { "full_name": "FP16 Tensor Core", "unit": "TFLOPS" }, + "fp16_tensor_core_sparsity": { "full_name": "FP16 Tensor Core with Sparsity", "unit": "TFLOPS" }, + "bf16_tensor_core": { "full_name": "BF16 Tensor Core", "unit": "TFLOPS" }, + "bf16_tensor_core_sparsity": { "full_name": "BF16 Tensor Core with Sparsity", "unit": "TFLOPS" }, + "fp8_tensor_core": { "full_name": "FP8 Tensor Core", "unit": "TFLOPS" }, + "fp8_tensor_core_sparsity": { "full_name": "FP8 Tensor Core with Sparsity", "unit": "TFLOPS" }, + "fp4_tensor_core": { "full_name": "FP4 Tensor Core", "unit": "TFLOPS" }, + "fp4_tensor_core_sparsity": { "full_name": "FP4 Tensor Core with Sparsity", "unit": "TFLOPS" }, + "int8_tensor_core": { "full_name": "INT8 Tensor Core", "unit": "TOPS" }, + "int8_tensor_core_sparsity": { "full_name": "INT8 Tensor Core with Sparsity", "unit": "TOPS" }, + "int4_tensor_core": { "full_name": "INT4 Tensor Core", "unit": "TOPS" }, + "int4_tensor_core_sparsity": { "full_name": "INT4 Tensor Core with Sparsity", "unit": "TOPS" }, + "manufacturer": { "full_name": "Manufacturer" }, + "architecture": { "full_name": "Architecture" }, + "process": { "full_name": "Manufacturing Process" }, + "nvidia_rt_cores": { "full_name": "NVIDIA RT Cores"}, + "nvidia_rt_cores_generation": { "full_name": "NVIDIA RT Cores Generation"}, + "nvidia_tensor_cores": { "full_name": "NVIDIA Tensor Cores" }, + "nvidia_tensor_cores_generation": { "full_name": "NVIDIA Tensor Cores Generation" }, + "nvidia_cuda_cores": { "full_name": "NVIDIA CUDA Cores" }, + "gpu_memory": { "full_name": "GPU Memory", "unit": "GB" }, + "memory_type": { "full_name": "Memory Type" }, + "memory_bandwidth": { "full_name": "Memory Bandwidth", "unit": "GB/s" }, + "interconnect": { "full_name": "Interconnect Type" }, + "encoders_decoders": { "full_name": "Encoders and Decoders" }, + "cuda_compute_capability": { "full_name": "CUDA Compute Capability" }, + "power_consumption": { "full_name": "Power Consumption", "unit": "W" }, + "die_size": { "full_name": "Die Size", "unit": "mm2" } + }, + "h100": { + "name": "H100", + "fp64": 25.6, + "fp64_tensor_core": 51, + "fp32": 51.2, + "tf32_tensor_core": null, + "tf32_tensor_core_sparsity": 756, + "fp16": 204.9, + "fp16_tensor_core": null, + "fp16_tensor_core_sparsity": null, + "bf16_tensor_core": null, + "bf16_tensor_core_sparsity": 1513, + "fp8_tensor_core": null, + "fp8_tensor_core_sparsity": 3026, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": null, + "int8_tensor_core_sparsity": 3026, + "int4_tensor_core": null, + "int4_tensor_core_sparsity": null, + "manufacturer": "NVIDIA", + "architecture": "Hopper", + "process": null, + "nvidia_rt_cores": null, + "nvidia_rt_cores_generation": null, + "nvidia_tensor_cores": 456, + "nvidia_tensor_cores_generation": 4, + "nvidia_cuda_cores": 14592, + "gpu_memory": 80, + "memory_type": "HBM2e", + "memory_bandwidth": 2048, + "interconnect": "PCIe Gen5", + "encoders_decoders": "0, 7", + "cuda_compute_capability": "9", + "power_consumption": 350, + "die_size": null, + "sources": [ + "https://getdeploying.com/reference/cloud-gpu/nvidia-h100", + "https://www.techpowerup.com/gpu-specs/h100-pcie-80-gb.c3899", + "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" + ] + }, + "l40s": { + "name": "L40S", + "fp64": 1.4, + "fp64_tensor_core": 1.4, + "fp32": 91.6, + "tf32_tensor_core": 183, + "tf32_tensor_core_sparsity": 366, + "fp16": 91.6, + "fp16_tensor_core": 362, + "fp16_tensor_core_sparsity": 733, + "bf16_tensor_core": 362, + "bf16_tensor_core_sparsity": 733, + "fp8_tensor_core": 733, + "fp8_tensor_core_sparsity": 1466, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": 733, + "int8_tensor_core_sparsity": 1466, + "int4_tensor_core": 733, + "int4_tensor_core_sparsity": 1466, + "manufacturer": "NVIDIA", + "architecture": "Ada Lovelace", + "process": null, + "nvidia_rt_cores": 142, + "nvidia_rt_cores_generation": 3, + "nvidia_tensor_cores": 568, + "nvidia_tensor_cores_generation": 4, + "nvidia_cuda_cores": 18176, + "gpu_memory": 48, + "memory_type": "GDDR6", + "memory_bandwidth": 864, + "interconnect": "PCIe Gen4", + "encoders_decoders": "3, 3", + "cuda_compute_capability": "8.9", + "power_consumption": 300, + "die_size": null, + "sources": [ + "https://resources.nvidia.com/en-us-l40s/l40s-datasheet-28413", + "https://images.nvidia.com/aem-dam/Solutions/geforce/ada/nvidia-ada-gpu-architecture.pdf" + ] + }, + "l4": { + "name": "L4", + "fp64": 0.5, + "fp64_tensor_core": 0.5, + "fp32": 30.3, + "tf32_tensor_core": 60, + "tf32_tensor_core_sparsity": 120, + "fp16": 30.3, + "fp16_tensor_core": 121, + "fp16_tensor_core_sparsity": 242, + "bf16_tensor_core": 121, + "bf16_tensor_core_sparsity": 242, + "fp8_tensor_core": 242, + "fp8_tensor_core_sparsity": 485, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": 242, + "int8_tensor_core_sparsity": 485, + "int4_tensor_core": null, + "int4_tensor_core_sparsity": null, + "manufacturer": "NVIDIA", + "architecture": "Ada Lovelace", + "process": null, + "nvidia_rt_cores": 60, + "nvidia_rt_cores_generation": 3, + "nvidia_tensor_cores": 240, + "nvidia_tensor_cores_generation": 4, + "nvidia_cuda_cores": 7424, + "gpu_memory": 24, + "memory_type": "GDDR6", + "memory_bandwidth": 300, + "interconnect": "PCIe Gen4", + "encoders_decoders": "2, 4", + "cuda_compute_capability": "8.9", + "power_consumption": 72, + "die_size": null, + "sources": [ + "https://resources.nvidia.com/en-us-data-center-overview/l4-gpu-datasheet", + "https://images.nvidia.com/aem-dam/Solutions/geforce/ada/nvidia-ada-gpu-architecture.pdf" + ] + }, + "rtx4090": { + "name": "GeForce RTX 4090", + "fp64": 1.3, + "fp64_tensor_core": 1.3, + "fp32": 82.6, + "tf32_tensor_core": 82.6, + "tf32_tensor_core_sparsity": 165.2, + "fp16": 82.6, + "fp16_tensor_core": 330.3, + "fp16_tensor_core_sparsity": 660.6, + "bf16_tensor_core": 165.2, + "bf16_tensor_core_sparsity": 330.4, + "fp8_tensor_core": 660.6, + "fp8_tensor_core_sparsity": 1321.2, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": 660.6, + "int8_tensor_core_sparsity": 1321.2, + "int4_tensor_core": 1321.2, + "int4_tensor_core_sparsity": 2642.4, + "manufacturer": "NVIDIA", + "architecture": "Ada Lovelace", + "process": "TSMC 4N", + "nvidia_rt_cores": 128, + "nvidia_rt_cores_generation": 3, + "nvidia_tensor_cores": 512, + "nvidia_tensor_cores_generation": 4, + "nvidia_cuda_cores": 16384, + "gpu_memory": 24, + "memory_type": "GDDR6X", + "memory_bandwidth": 1008, + "interconnect": "PCIe Gen4", + "encoders_decoders": "2, 1", + "cuda_compute_capability": "8.9", + "power_consumption": 450, + "die_size": 608.5, + "sources": [ + "https://www.techpowerup.com/gpu-specs/geforce-rtx-4090.c3889", + "https://images.nvidia.com/aem-dam/Solutions/geforce/ada/nvidia-ada-gpu-architecture.pdf", + "https://developer.nvidia.com/cuda-gpus" + ] + }, + "a100_pcie_40gb": { + "name": "A100 PCIe 40GB", + "fp64": 9.7, + "fp64_tensor_core": 19.5, + "fp32": 19.5, + "tf32_tensor_core": 156, + "tf32_tensor_core_sparsity": 312, + "fp16": 78, + "fp16_tensor_core": 312, + "fp16_tensor_core_sparsity": 624, + "bf16_tensor_core": 312, + "bf16_tensor_core_sparsity": 624, + "fp8_tensor_core": 0, + "fp8_tensor_core_sparsity": 0, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": 624, + "int8_tensor_core_sparsity": 1248, + "int4_tensor_core": null, + "int4_tensor_core_sparsity": null, + "manufacturer": "NVIDIA", + "architecture": "Ampere", + "process": null, + "nvidia_rt_cores": null, + "nvidia_rt_cores_generation": null, + "nvidia_tensor_cores": 432, + "nvidia_tensor_cores_generation": 3, + "nvidia_cuda_cores": 6912, + "gpu_memory": 40, + "memory_type": "HBM2e", + "memory_bandwidth": 1555, + "interconnect": "PCIe Gen4", + "encoders_decoders": "0, 5", + "cuda_compute_capability": "8.0", + "power_consumption": 250, + "die_size": null, + "sources": [ + "https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf" + ] + }, + "a100_pcie_80gb": { + "name": "A100 PCIe 80GB", + "fp64": 9.7, + "fp64_tensor_core": 19.5, + "fp32": 19.5, + "tf32_tensor_core": 156, + "tf32_tensor_core_sparsity": 312, + "fp16": 78, + "fp16_tensor_core": 312, + "fp16_tensor_core_sparsity": 624, + "bf16_tensor_core": 312, + "bf16_tensor_core_sparsity": 624, + "fp8_tensor_core": 0, + "fp8_tensor_core_sparsity": 0, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": 624, + "int8_tensor_core_sparsity": 1248, + "int4_tensor_core": null, + "int4_tensor_core_sparsity": null, + "manufacturer": "NVIDIA", + "architecture": "Ampere", + "process": null, + "nvidia_rt_cores": null, + "nvidia_rt_cores_generation": null, + "nvidia_tensor_cores": 432, + "nvidia_tensor_cores_generation": 3, + "nvidia_cuda_cores": 6912, + "gpu_memory": 80, + "memory_type": "HBM2e", + "memory_bandwidth": 1935, + "interconnect": "PCIe Gen4", + "encoders_decoders": "0, 5", + "cuda_compute_capability": "8", + "power_consumption": 300, + "die_size": null, + "sources": [ + "https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf", + "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" + ] + }, + "a100_sxm4_40gb": { + "name": "A100 SXM4 40GB", + "fp64": 9.7, + "fp64_tensor_core": 19.5, + "fp32": 19.5, + "tf32_tensor_core": 156, + "tf32_tensor_core_sparsity": 312, + "fp16": 78, + "fp16_tensor_core": 312, + "fp16_tensor_core_sparsity": 624, + "bf16_tensor_core": 312, + "bf16_tensor_core_sparsity": 624, + "fp8_tensor_core": 0, + "fp8_tensor_core_sparsity": 0, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": 624, + "int8_tensor_core_sparsity": 1248, + "int4_tensor_core": null, + "int4_tensor_core_sparsity": null, + "manufacturer": "NVIDIA", + "architecture": "Ampere", + "process": null, + "nvidia_rt_cores": null, + "nvidia_rt_cores_generation": null, + "nvidia_tensor_cores": 432, + "nvidia_tensor_cores_generation": 3, + "nvidia_cuda_cores": 6912, + "gpu_memory": 40, + "memory_type": "HBM2e", + "memory_bandwidth": 1555, + "interconnect": "NVLink", + "encoders_decoders": "0, 5", + "cuda_compute_capability": "8.0", + "power_consumption": 400, + "die_size": null, + "sources": [ + "https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf" + ] + }, + "a100_sxm4_80gb": { + "name": "A100 SXM4 80GB", + "fp64": 9.7, + "fp64_tensor_core": 19.5, + "fp32": 19.5, + "tf32_tensor_core": 156, + "tf32_tensor_core_sparsity": 312, + "fp16": 78, + "fp16_tensor_core": 312, + "fp16_tensor_core_sparsity": 624, + "bf16_tensor_core": 312, + "bf16_tensor_core_sparsity": 624, + "fp8_tensor_core": 0, + "fp8_tensor_core_sparsity": 0, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": 624, + "int8_tensor_core_sparsity": 1248, + "int4_tensor_core": null, + "int4_tensor_core_sparsity": null, + "manufacturer": "NVIDIA", + "architecture": "Ampere", + "process": null, + "nvidia_rt_cores": null, + "nvidia_rt_cores_generation": null, + "nvidia_tensor_cores": 432, + "nvidia_tensor_cores_generation": 3, + "nvidia_cuda_cores": 6912, + "gpu_memory": 80, + "memory_type": "HBM2e", + "memory_bandwidth": 2039, + "interconnect": "NVLink", + "encoders_decoders": "0, 5", + "cuda_compute_capability": "8", + "power_consumption": 400, + "die_size": null, + "sources": [ + "https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf", + "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" + ] + }, + "a10": { + "name": "A10", + "fp64": 0.5, + "fp64_tensor_core": 0.5, + "fp32": 31.2, + "tf32_tensor_core": 62.5, + "tf32_tensor_core_sparsity": 125, + "fp16": 31.2, + "fp16_tensor_core": null, + "fp16_tensor_core_sparsity": null, + "bf16_tensor_core": 125, + "bf16_tensor_core_sparsity": 250, + "fp8_tensor_core": 0, + "fp8_tensor_core_sparsity": 0, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": 250, + "int8_tensor_core_sparsity": 500, + "int4_tensor_core": 500, + "int4_tensor_core_sparsity": 1000, + "manufacturer": "NVIDIA", + "architecture": "Ampere", + "process": null, + "nvidia_rt_cores": 72, + "nvidia_rt_cores_generation": 2, + "nvidia_tensor_cores": 288, + "nvidia_tensor_cores_generation": 3, + "nvidia_cuda_cores": 9216, + "gpu_memory": 24, + "memory_type": "GDDR6", + "memory_bandwidth": 600, + "interconnect": "PCIe Gen4", + "encoders_decoders": "1, 2", + "cuda_compute_capability": "8.6", + "power_consumption": 150, + "die_size": null, + "sources": [ + "https://resources.nvidia.com/en-us-gpu/a10-datasheet-nvidia", + "https://www.nvidia.com/content/PDF/nvidia-ampere-ga-102-gpu-architecture-whitepaper-v2.1.pdf", + "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" + ] + }, + "t4": { + "name": "T4", + "fp64": 0.2, + "fp64_tensor_core": 0, + "fp32": 8.1, + "tf32_tensor_core": 0, + "tf32_tensor_core_sparsity": 0, + "fp16": 65, + "fp16_tensor_core": null, + "fp16_tensor_core_sparsity": null, + "bf16_tensor_core": 0, + "bf16_tensor_core_sparsity": 0, + "fp8_tensor_core": 0, + "fp8_tensor_core_sparsity": 0, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": 130, + "int8_tensor_core_sparsity": null, + "int4_tensor_core": 260, + "int4_tensor_core_sparsity": null, + "manufacturer": "NVIDIA", + "architecture": "Turing", + "process": null, + "nvidia_rt_cores": null, + "nvidia_rt_cores_generation": null, + "nvidia_tensor_cores": 320, + "nvidia_tensor_cores_generation": 2, + "nvidia_cuda_cores": 2560, + "gpu_memory": 16, + "memory_type": "GDDR6", + "memory_bandwidth": 300, + "interconnect": "PCIe Gen3", + "encoders_decoders": "1, 2", + "cuda_compute_capability": "7.5", + "power_consumption": 70, + "die_size": null, + "sources": [ + "https://getdeploying.com/reference/cloud-gpu/nvidia-t4", + "https://images.nvidia.com/aem-dam/en-zz/Solutions/design-visualization/technologies/turing-architecture/NVIDIA-Turing-Architecture-Whitepaper.pdf", + "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new", + "https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/solutions/resources/documents1/Datasheet_NVIDIA_T4_Virtualization.pdf" + ] + }, + "quadro_rtx_5000": { + "name": "Quadro RTX 5000", + "fp64": 0.3, + "fp64_tensor_core": 0, + "fp32": 11.2, + "tf32_tensor_core": 0, + "tf32_tensor_core_sparsity": 0, + "fp16": 22.3, + "fp16_tensor_core": null, + "fp16_tensor_core_sparsity": null, + "bf16_tensor_core": 0, + "bf16_tensor_core_sparsity": 0, + "fp8_tensor_core": 0, + "fp8_tensor_core_sparsity": 0, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": null, + "int8_tensor_core_sparsity": null, + "int4_tensor_core": null, + "int4_tensor_core_sparsity": null, + "manufacturer": "NVIDIA", + "architecture": "Turing", + "process": null, + "nvidia_rt_cores": 48, + "nvidia_rt_cores_generation": null, + "nvidia_tensor_cores": 384, + "nvidia_tensor_cores_generation": 2, + "nvidia_cuda_cores": 3072, + "gpu_memory": 16, + "memory_type": "GDDR6", + "memory_bandwidth": 448, + "interconnect": "PCIe Gen3", + "encoders_decoders": "1, 2", + "cuda_compute_capability": "7.5", + "power_consumption": 230, + "die_size": null, + "sources": [ + "https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/quadro-product-literature/quadro-rtx-5000-data-sheet-us-nvidia-704120-r4-web.pdf", + "https://images.nvidia.com/aem-dam/en-zz/Solutions/design-visualization/technologies/turing-architecture/NVIDIA-Turing-Architecture-Whitepaper.pdf" + + ] + }, + "v100_pcie": { + "name": "V100 PCIe", + "fp64": 7.1, + "fp64_tensor_core": 0, + "fp32": 14.1, + "tf32_tensor_core": 112, + "tf32_tensor_core_sparsity": 0, + "fp16": 28.3, + "fp16_tensor_core": 112, + "fp16_tensor_core_sparsity": null, + "bf16_tensor_core": 0, + "bf16_tensor_core_sparsity": 0, + "fp8_tensor_core": 0, + "fp8_tensor_core_sparsity": 0, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": 0, + "int8_tensor_core_sparsity": 0, + "int4_tensor_core": 0, + "int4_tensor_core_sparsity": 0, + "manufacturer": "NVIDIA", + "architecture": "Volta", + "process": null, + "nvidia_rt_cores": 0, + "nvidia_tensor_cores": 640, + "nvidia_tensor_cores_generation": 1, + "nvidia_cuda_cores": 5120, + "gpu_memory": "16/32", + "memory_type": "HBM2", + "memory_bandwidth": 900, + "interconnect": "PCIe Gen3", + "encoders_decoders": "3, 1", + "cuda_compute_capability": "7", + "power_consumption": 250, + "die_size": null, + "sources": [ + "https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf", + "https://www.techpowerup.com/gpu-specs/tesla-v100-pcie-32-gb.c3184", + "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" + ] + }, + "v100_sxm2": { + "name": "V100 SXM2", + "fp64": 7.8, + "fp64_tensor_core": 0, + "fp32": 15.7, + "tf32_tensor_core": 125, + "tf32_tensor_core_sparsity": 0, + "fp16": 31.3, + "fp16_tensor_core": 125, + "fp16_tensor_core_sparsity": null, + "bf16_tensor_core": 0, + "bf16_tensor_core_sparsity": 0, + "fp8_tensor_core": 0, + "fp8_tensor_core_sparsity": 0, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": 0, + "int8_tensor_core_sparsity": 0, + "int4_tensor_core": 0, + "int4_tensor_core_sparsity": 0, + "manufacturer": "NVIDIA", + "architecture": "Volta", + "process": null, + "nvidia_rt_cores": 0, + "nvidia_tensor_cores": 640, + "nvidia_tensor_cores_generation": 1, + "nvidia_cuda_cores": 5120, + "gpu_memory": "16/32", + "memory_type": "HBM2", + "memory_bandwidth": 900, + "interconnect": "NVLink", + "encoders_decoders": "3, 1", + "cuda_compute_capability": "7", + "power_consumption": 300, + "die_size": null, + "sources": [ + "https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf", + "https://www.techpowerup.com/gpu-specs/tesla-v100-sxm2-16-gb.c3018", + "https://www.techpowerup.com/gpu-specs/tesla-v100-sxm2-32-gb.c3183", + "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" + ] + }, + "v100s_pcie": { + "name": "V100S PCIe", + "fp64": 8.2, + "fp64_tensor_core": 0, + "fp32": 16.4, + "tf32_tensor_core": 0, + "tf32_tensor_core_sparsity": 0, + "fp16": 32.8, + "fp16_tensor_core": 130, + "fp16_tensor_core_sparsity": null, + "bf16_tensor_core": 0, + "bf16_tensor_core_sparsity": 0, + "fp8_tensor_core": 0, + "fp8_tensor_core_sparsity": 0, + "fp4_tensor_core": 0, + "fp4_tensor_core_sparsity": 0, + "int8_tensor_core": 0, + "int8_tensor_core_sparsity": 0, + "int4_tensor_core": 0, + "int4_tensor_core_sparsity": 0, + "manufacturer": "NVIDIA", + "architecture": "Volta", + "process": null, + "nvidia_rt_cores": 0, + "nvidia_tensor_cores": 640, + "nvidia_tensor_cores_generation": 1, + "nvidia_cuda_cores": 5120, + "gpu_memory": 32, + "memory_type": "HBM2", + "memory_bandwidth": 1134, + "interconnect": "PCIe Gen3", + "encoders_decoders": "3, 1", + "cuda_compute_capability": "7", + "power_consumption": 250, + "die_size": null, + "sources": [ + "https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf", + "https://www.techpowerup.com/gpu-specs/tesla-v100s-pcie-32-gb.c3584", + "https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new" + ] + } +} \ No newline at end of file diff --git a/site/index.html b/site/index.html index 9dec78e..45aa948 100644 --- a/site/index.html +++ b/site/index.html @@ -55,7 +55,7 @@ .colHeader { font-weight: bold; } - .precision-FP32, .precision-TF32, .precision-FP8, .precision-INT4 { + .precision-FP32, .precision-TF32, .precision-FP8, .precision-INT8 { background-color: #f0f0f0 !important; /* alternate backgound color */ } .architecture-details { @@ -64,7 +64,7 @@ .architecture-details-border { border-top: 1px solid #00acee !important; } - .precision-FP32, .precision-TF32, .precision-FP8, .precision-INT4 { + .precision-FP32, .precision-TF32, .precision-FP8, .precision-INT8 { background-color: #f0f0f0 !important; /* alternate backgound color */ } diff --git a/specs.md b/specs.md index c863676..13d2947 100644 --- a/specs.md +++ b/specs.md @@ -2,27 +2,22 @@ Attribute (Unit) | H100 | L40S | L4 | GeForce RTX 4090 | A100 PCIe 40GB | A100 PCIe 80GB | A100 SXM4 40GB | A100 SXM4 80GB | A10 | T4 | Quadro RTX 5000 | V100 PCIe | V100 SXM2 | V100S PCIe --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- -FP64 (TFLOPS) | 25.6 | 1.4 | 0.5 | 1.3 | 9.7 | 9.7 | 9.7 | 9.7 | 1 | ? | 0.3 | 7.1 | 7.8 | 8.2 -FP64 Tensor Core (TFLOPS) | 51 | ? | ? | ? | 19.5 | 19.5 | 19.5 | 19.5 | ? | N/A | N/A | N/A | N/A | N/A +FP64 (TFLOPS) | 25.6 | 1.4 | 0.5 | 1.3 | 9.7 | 9.7 | 9.7 | 9.7 | 0.5 | 0.2 | 0.3 | 7.1 | 7.8 | 8.2 +FP64 Tensor Core (TFLOPS) | 51 | 1.4 | 0.5 | 1.3 | 19.5 | 19.5 | 19.5 | 19.5 | 0.5 | N/A | N/A | N/A | N/A | N/A FP32 (TFLOPS) | 51.2 | 91.6 | 30.3 | 82.6 | 19.5 | 19.5 | 19.5 | 19.5 | 31.2 | 8.1 | 11.2 | 14.1 | 15.7 | 16.4 TF32 Tensor Core (TFLOPS) | ? | 183 | 60 | 82.6 | 156 | 156 | 156 | 156 | 62.5 | N/A | N/A | 112 | 125 | N/A TF32 Tensor Core with Sparsity (TFLOPS) | 756 | 366 | 120 | 165.2 | 312 | 312 | 312 | 312 | 125 | N/A | N/A | N/A | N/A | N/A FP16 (TFLOPS) | 204.9 | 91.6 | 30.3 | 82.6 | 78 | 78 | 78 | 78 | 31.2 | 65 | 22.3 | 28.3 | 31.3 | 32.8 FP16 Tensor Core (TFLOPS) | ? | 362 | 121 | 330.3 | 312 | 312 | 312 | 312 | ? | ? | ? | 112 | 125 | 130 FP16 Tensor Core with Sparsity (TFLOPS) | ? | 733 | 242 | 660.6 | 624 | 624 | 624 | 624 | ? | ? | ? | ? | ? | ? -BF16 (TFLOPS) | ? | ? | ? | ? | ? | ? | ? | ? | ? | N/A | N/A | N/A | N/A | N/A BF16 Tensor Core (TFLOPS) | ? | 362 | 121 | 165.2 | 312 | 312 | 312 | 312 | 125 | N/A | N/A | N/A | N/A | N/A BF16 Tensor Core with Sparsity (TFLOPS) | 1513 | 733 | 242 | 330.4 | 624 | 624 | 624 | 624 | 250 | N/A | N/A | N/A | N/A | N/A -FP8 (TFLOPS) | N/A | N/A | N/A | ? | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A FP8 Tensor Core (TFLOPS) | ? | 733 | 242 | 660.6 | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A FP8 Tensor Core with Sparsity (TFLOPS) | 3026 | 1466 | 485 | 1321.2 | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A -FP4 (TFLOPS) | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A FP4 Tensor Core (TFLOPS) | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A FP4 Tensor Core with Sparsity (TFLOPS) | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A -INT8 (TOPS) | ? | ? | ? | 292.8 | ? | ? | ? | ? | ? | 130 | ? | ? | ? | ? -INT8 Tensor Core (TOPS) | ? | 733 | 242 | 660.6 | 624 | 624 | 624 | 624 | 250 | ? | ? | N/A | N/A | N/A +INT8 Tensor Core (TOPS) | ? | 733 | 242 | 660.6 | 624 | 624 | 624 | 624 | 250 | 130 | ? | N/A | N/A | N/A INT8 Tensor Core with Sparsity (TOPS) | 3026 | 1466 | 485 | 1321.2 | 1248 | 1248 | 1248 | 1248 | 500 | ? | ? | N/A | N/A | N/A -INT4 (TOPS) | N/A | N/A | N/A | ? | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A INT4 Tensor Core (TOPS) | ? | 733 | ? | 1321.2 | ? | ? | ? | ? | 500 | 260 | ? | N/A | N/A | N/A INT4 Tensor Core with Sparsity (TOPS) | ? | 1466 | ? | 2642.4 | ? | ? | ? | ? | 1000 | ? | ? | N/A | N/A | N/A **Architecture Details** | | | | | | | | | | | | | | | @@ -34,6 +29,7 @@ NVIDIA RT Cores | ? | 142 (3rd gen) | 60 (3rd gen) | 128 (3rd gen) | ? | ? | ? | NVIDIA Tensor Cores | 456 (4th gen) | 568 (4th gen) | 240 (4th gen) | 512 (4th gen) | 432 (3rd gen) | 432 (3rd gen) | 432 (3rd gen) | 432 (3rd gen) | 288 (3rd gen) | 320 (2nd gen) | 384 (2nd gen) | 640 (1st gen) | 640 (1st gen) | 640 (1st gen) NVIDIA CUDA Cores | 14592 | 18176 | 7424 | 16384 | 6912 | 6912 | 6912 | 6912 | 9216 | 2560 | 3072 | 5120 | 5120 | 5120 GPU Memory (GB) | 80 | 48 | 24 | 24 | 40 | 80 | 40 | 80 | 24 | 16 | 16 | 16/32 | 16/32 | 32 +Memory Type | HBM2e | GDDR6 | GDDR6 | GDDR6X | HBM2e | HBM2e | HBM2e | HBM2e | GDDR6 | GDDR6 | GDDR6 | HBM2 | HBM2 | HBM2 Memory Bandwidth (GB/s) | 2048 | 864 | 300 | 1008 | 1555 | 1935 | 1555 | 2039 | 600 | 300 | 448 | 900 | 900 | 1134 Interconnect Type | PCIe Gen5 | PCIe Gen4 | PCIe Gen4 | PCIe Gen4 | PCIe Gen4 | PCIe Gen4 | NVLink | NVLink | PCIe Gen4 | PCIe Gen3 | PCIe Gen3 | PCIe Gen3 | NVLink | PCIe Gen3 Encoders and Decoders | 0, 7 | 3, 3 | 2, 4 | 2, 1 | 0, 5 | 0, 5 | 0, 5 | 0, 5 | 1, 2 | 1, 2 | 1, 2 | 3, 1 | 3, 1 | 3, 1