diff --git a/data_dashboard/app.js b/data_dashboard/app.js
new file mode 100644
index 0000000..9fb46fb
--- /dev/null
+++ b/data_dashboard/app.js
@@ -0,0 +1,218 @@
+const kernelSelect = document.getElementById('kernelSelect');
+const metricSelect = document.getElementById('metricSelect');
+const tableHead = document.getElementById('tableHead');
+const tableBody = document.getElementById('tableBody');
+const avgSpeedup = document.getElementById('avgSpeedup');
+const maxSpeedup = document.getElementById('maxSpeedup');
+const darkModeToggle = document.getElementById('darkModeToggle');
+const modeIcon = document.getElementById('modeIcon');
+const modeText = document.getElementById('modeText');
+const downloadBtn = document.getElementById('downloadBtn');
+
+// Dark mode functionality
+function initDarkMode() {
+ const savedMode = localStorage.getItem('darkMode');
+ if (savedMode === 'true') {
+ document.body.classList.add('dark-mode');
+ modeIcon.textContent = '☀️';
+ modeText.textContent = 'Light';
+ }
+}
+
+darkModeToggle.addEventListener('click', () => {
+ document.body.classList.toggle('dark-mode');
+ const isDark = document.body.classList.contains('dark-mode');
+ localStorage.setItem('darkMode', isDark);
+ modeIcon.textContent = isDark ? '☀️' : '🌙';
+ modeText.textContent = isDark ? 'Light' : 'Dark';
+});
+
+// CSV Download functionality
+downloadBtn.addEventListener('click', () => {
+ let csvContent = "Kernel,Metric,Configuration,CUDA,Triton,Helion\n";
+
+ for (const [kernelKey, kernelData] of Object.entries(DATA)) {
+ const kernelName = kernelKey.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
+
+ for (const [metricKey, metricData] of Object.entries(kernelData.metrics)) {
+ const metricName = metricData.name;
+
+ kernelData.configs.forEach((config, idx) => {
+ let configName = config.name;
+ if (config.detail) {
+ configName += ` (${config.detail})`;
+ }
+
+ const cudaVal = metricData.cuda[idx];
+ const tritonVal = metricData.triton[idx];
+ const helionVal = metricData.helion[idx];
+
+ csvContent += `"${kernelName}","${metricName}","${configName}",${cudaVal},${tritonVal},${helionVal}\n`;
+ });
+ }
+ }
+
+ const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
+ const link = document.createElement('a');
+ link.href = URL.createObjectURL(blob);
+ link.download = 'gpu_benchmark_complete_data.csv';
+ link.click();
+});
+
+function populateMetrics() {
+ const kernel = kernelSelect.value;
+ const metrics = DATA[kernel].metrics;
+
+ metricSelect.innerHTML = '';
+ Object.keys(metrics).forEach(metricKey => {
+ const option = document.createElement('option');
+ option.value = metricKey;
+ option.textContent = metrics[metricKey].name;
+ metricSelect.appendChild(option);
+ });
+}
+
+function calculateCellClass(value, values, lowerIsBetter) {
+ const sorted = [...values].sort((a, b) => lowerIsBetter ? a - b : b - a);
+ const best = sorted[0];
+ const second = sorted[1];
+ if (value === best) return 'best';
+ if (value === second) return 'second';
+ return 'worst';
+}
+
+function formatValue(value) {
+ if (value >= 1e6) return value.toExponential(2);
+ if (value >= 1000) return value.toFixed(0);
+ if (value >= 10) return value.toFixed(1);
+ if (value >= 1) return value.toFixed(2);
+ return value.toFixed(4);
+}
+
+function calculateSpeedup(values, lowerIsBetter) {
+ const sorted = [...values].sort((a, b) => lowerIsBetter ? a - b : b - a);
+ return lowerIsBetter ? sorted[2] / sorted[0] : sorted[0] / sorted[2];
+}
+
+function getWinner(values, lowerIsBetter) {
+ const sorted = [...values].sort((a, b) => lowerIsBetter ? a - b : b - a);
+ const best = sorted[0];
+ if (values[0] === best) return 'CUDA';
+ if (values[1] === best) return 'Triton';
+ return 'Helion';
+}
+
+function renderTable() {
+ const kernel = kernelSelect.value;
+ const metric = metricSelect.value;
+ const kernelData = DATA[kernel];
+ const metricData = kernelData.metrics[metric];
+
+ // Extract unit from metric name
+ const metricName = metricData.name;
+ let unit = '';
+ const unitMatch = metricName.match(/\(([^)]+)\)/);
+ if (unitMatch) {
+ unit = unitMatch[1];
+ } else {
+ // For metrics without explicit units in parentheses
+ if (metric === 'registers_per_thread') {
+ unit = 'registers';
+ } else if (metric === 'register_limited_blocks') {
+ unit = 'blocks';
+ } else if (metric === 'gflops') {
+ unit = 'GFLOPs';
+ }
+ }
+
+ // Only show speedup for duration metric
+ const isDuration = metric === 'duration';
+
+ let speedups = [];
+ let winners = { CUDA: 0, Triton: 0, Helion: 0 };
+
+ // Build header with conditional speedup column and units
+ tableHead.innerHTML = `
+
+ | Configuration |
+
+
+ ${unit ? `` : ''}
+ |
+
+
+ ${unit ? `` : ''}
+ |
+
+
+ ${unit ? `` : ''}
+ |
+ ${isDuration ? ' | ' : ''}
+
+ `;
+
+ tableBody.innerHTML = '';
+ kernelData.configs.forEach((config, idx) => {
+ const cuda = metricData.cuda[idx];
+ const triton = metricData.triton[idx];
+ const helion = metricData.helion[idx];
+ const values = [cuda, triton, helion];
+
+ const cudaClass = calculateCellClass(cuda, values, metricData.lower_is_better);
+ const tritonClass = calculateCellClass(triton, values, metricData.lower_is_better);
+ const helionClass = calculateCellClass(helion, values, metricData.lower_is_better);
+
+ let speedupCell = '';
+ if (isDuration) {
+ const speedup = calculateSpeedup(values, metricData.lower_is_better);
+ speedups.push(speedup);
+
+ let speedupClass = 'speedup-medium';
+ if (speedup > 50) speedupClass = 'speedup-extreme';
+ else if (speedup > 10) speedupClass = 'speedup-high';
+ else if (speedup < 1) speedupClass = 'speedup-regression';
+
+ speedupCell = `${speedup.toFixed(1)}x | `;
+ }
+
+ const winner = getWinner(values, metricData.lower_is_better);
+ winners[winner]++;
+
+ const row = document.createElement('tr');
+ row.innerHTML = `
+
+ ${config.name}
+ ${config.detail ? `${config.detail}` : ''}
+ |
+ ${formatValue(cuda)} |
+ ${formatValue(triton)} |
+ ${formatValue(helion)} |
+ ${speedupCell}
+ `;
+ tableBody.appendChild(row);
+ });
+
+ // Update stats - show speedup stats only for duration
+ if (isDuration && speedups.length > 0) {
+ const avgSpeed = speedups.reduce((a, b) => a + b, 0) / speedups.length;
+ const maxSpeed = Math.max(...speedups);
+ avgSpeedup.textContent = avgSpeed.toFixed(1) + 'x';
+ maxSpeedup.textContent = maxSpeed.toFixed(1) + 'x';
+ avgSpeedup.parentElement.style.display = 'block';
+ maxSpeedup.parentElement.style.display = 'block';
+ } else {
+ avgSpeedup.parentElement.style.display = 'none';
+ maxSpeedup.parentElement.style.display = 'none';
+ }
+}
+
+kernelSelect.addEventListener('change', () => {
+ populateMetrics();
+ renderTable();
+});
+
+metricSelect.addEventListener('change', renderTable);
+
+initDarkMode();
+populateMetrics();
+renderTable();
diff --git a/data_dashboard/data.js b/data_dashboard/data.js
new file mode 100644
index 0000000..dc1702d
--- /dev/null
+++ b/data_dashboard/data.js
@@ -0,0 +1,2123 @@
+const DATA = {
+ "vector_addition": {
+ "configs": [
+ {
+ "name": "Config A",
+ "detail": "B=32, S=4K, H=256"
+ },
+ {
+ "name": "Config B",
+ "detail": "B=64, S=8K, H=512"
+ },
+ {
+ "name": "BF16",
+ "detail": "B=64, S=8K, H=1024"
+ },
+ {
+ "name": "FP32",
+ "detail": "B=64, S=8K, H=1024"
+ },
+ {
+ "name": "FP16",
+ "detail": "B=64, S=8K, H=1024"
+ },
+ {
+ "name": "Config G",
+ "detail": "B=48, S=12K, H=1536"
+ },
+ {
+ "name": "Config C",
+ "detail": "B=64, S=16K, H=1024"
+ },
+ {
+ "name": "FP16 Large",
+ "detail": "B=64, S=16K, H=2048"
+ },
+ {
+ "name": "Config H",
+ "detail": "B=24, S=48K, H=2048"
+ },
+ {
+ "name": "Config D",
+ "detail": "B=32, S=32K, H=2048"
+ },
+ {
+ "name": "Config E",
+ "detail": "B=16, S=65K, H=4096"
+ },
+ {
+ "name": "Config F",
+ "detail": "B=8, S=131K, H=8192"
+ }
+ ],
+ "metrics": {
+ "duration": {
+ "name": "Duration (ms)",
+ "lower_is_better": true,
+ "cuda": [
+ 0.10573,
+ 0.83632,
+ 1.67,
+ 1.95,
+ 2.33,
+ 2.64,
+ 4.58,
+ 5.1,
+ 7.21,
+ 8.78,
+ 15.6,
+ 31.2
+ ],
+ "triton": [
+ 0.08842,
+ 0.71251,
+ 0.734,
+ 1.42,
+ 1.97,
+ 2.41,
+ 3.98,
+ 2.85,
+ 6.42,
+ 6.71,
+ 11.71,
+ 23.47
+ ],
+ "helion": [
+ 0.088,
+ 0.70326,
+ 0.71,
+ 1.4,
+ 1.75,
+ 2.41,
+ 4.09,
+ 2.85,
+ 6.42,
+ 6.98,
+ 11.94,
+ 23.9
+ ]
+ },
+ "dram_throughput": {
+ "name": "DRAM Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 36.67,
+ 38.92,
+ 40.06,
+ 66.99,
+ 28.6,
+ 81.78,
+ 58.42,
+ 50.15,
+ 79.83,
+ 60.96,
+ 68.66,
+ 68.66
+ ],
+ "triton": [
+ 86.64,
+ 89.38,
+ 90.8,
+ 89.72,
+ 33.9,
+ 89.67,
+ 67.23,
+ 89.86,
+ 89.72,
+ 79.73,
+ 91.44,
+ 91.26
+ ],
+ "helion": [
+ 88.35,
+ 92.36,
+ 92.48,
+ 92.85,
+ 38.12,
+ 89.58,
+ 65.44,
+ 89.8,
+ 89.7,
+ 76.63,
+ 89.65,
+ 89.61
+ ]
+ },
+ "compute_throughput": {
+ "name": "Compute Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 38.09,
+ 38.01,
+ 38.21,
+ 29.39,
+ 23.7,
+ 18.52,
+ 23.63,
+ 35.75,
+ 16.64,
+ 26.17,
+ 29.5,
+ 29.34
+ ],
+ "triton": [
+ 14.94,
+ 14.66,
+ 22.51,
+ 14.55,
+ 11.02,
+ 7.43,
+ 11.1,
+ 12.73,
+ 8.88,
+ 11.85,
+ 12.41,
+ 12.33
+ ],
+ "helion": [
+ 15.2,
+ 14.86,
+ 14.88,
+ 14.78,
+ 11.43,
+ 7.27,
+ 10.68,
+ 14.55,
+ 6.16,
+ 8.26,
+ 5.72,
+ 5.71
+ ]
+ },
+ "no_eligible_warps": {
+ "name": "No Eligible Warps (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 61.27,
+ 61.82,
+ 61.72,
+ 70.68,
+ 63.18,
+ 81.46,
+ 71.18,
+ 67.2,
+ 83.34,
+ 71.19,
+ 70.5,
+ 70.65
+ ],
+ "triton": [
+ 89.18,
+ 89.18,
+ 77.34,
+ 89.76,
+ 82.78,
+ 92.55,
+ 89.78,
+ 87.25,
+ 91.12,
+ 89.07,
+ 87.67,
+ 87.68
+ ],
+ "helion": [
+ 89.68,
+ 90.2,
+ 90.21,
+ 90.27,
+ 86.15,
+ 94.91,
+ 92.32,
+ 92.35,
+ 93.83,
+ 90.17,
+ 95.92,
+ 95.93
+ ]
+ }
+ }
+ },
+ "batched_matmul": {
+ "configs": [
+ {
+ "name": "Config A",
+ "detail": "B=32, S=4K, H=256"
+ },
+ {
+ "name": "Config B",
+ "detail": "B=64, S=8K, H=512"
+ },
+ {
+ "name": "FP32",
+ "detail": "B=64, S=8K, H=1024"
+ },
+ {
+ "name": "FP16",
+ "detail": "B=64, S=8K, H=1024"
+ },
+ {
+ "name": "Config G",
+ "detail": "B=48, S=12K, H=1536"
+ },
+ {
+ "name": "Config C",
+ "detail": "B=64, S=16K, H=1024"
+ },
+ {
+ "name": "FP16 Large",
+ "detail": "B=64, S=16K, H=2048"
+ },
+ {
+ "name": "Config D",
+ "detail": "B=32, S=32K, H=2048"
+ },
+ {
+ "name": "Config H",
+ "detail": "B=24, S=48K, H=2048"
+ },
+ {
+ "name": "Config E",
+ "detail": "B=16, S=65K, H=4096"
+ },
+ {
+ "name": "Config F",
+ "detail": "B=8, S=131K, H=8192"
+ }
+ ],
+ "metrics": {
+ "duration": {
+ "name": "Duration (ms)",
+ "lower_is_better": true,
+ "cuda": [
+ 2.78,
+ 44.19,
+ 176.13,
+ 174.52,
+ 520.74,
+ 352.45,
+ 1830,
+ 1410,
+ 2160,
+ 5590,
+ 22370
+ ],
+ "triton": [
+ 0.238,
+ 7.95,
+ 32.35,
+ 2.8,
+ 72.66,
+ 65.47,
+ 26.95,
+ 236.11,
+ 249.6,
+ 916.01,
+ 3640
+ ],
+ "helion": [
+ 0.198,
+ 2.44,
+ 101.14,
+ 2.27,
+ 47.65,
+ 17.83,
+ 16.37,
+ 69.17,
+ 168.9,
+ 605.19,
+ 2420
+ ]
+ },
+ "aggregate_memory": {
+ "name": "Aggregate Memory Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 94.42,
+ 94.57,
+ 94.6,
+ 95.31,
+ 95.63,
+ 94.6,
+ 95.34,
+ 94.66,
+ 95.56,
+ 95.28,
+ 95.23
+ ],
+ "triton": [
+ 68.77,
+ 88.1,
+ 92.42,
+ 70.87,
+ 88.91,
+ 92.43,
+ 93.9,
+ 87.76,
+ 88.53,
+ 87.33,
+ 87.53
+ ],
+ "helion": [
+ 74.1,
+ 58.96,
+ 78.72,
+ 68.59,
+ 79.98,
+ 60.35,
+ 79.39,
+ 59.81,
+ 79.38,
+ 79.34,
+ 80.39
+ ]
+ },
+ "l1_tex_throughput": {
+ "name": "L1/TEX Cache Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 94.54,
+ 94.58,
+ 94.6,
+ 95.32,
+ 95.65,
+ 94.6,
+ 95.34,
+ 94.66,
+ 95.58,
+ 95.28,
+ 95.23
+ ],
+ "triton": [
+ 71.85,
+ 88.21,
+ 92.43,
+ 45.1,
+ 88.57,
+ 92.43,
+ 68.56,
+ 87.76,
+ 88.57,
+ 87.32,
+ 87.53
+ ],
+ "helion": [
+ 76.21,
+ 60.06,
+ 78.74,
+ 66.82,
+ 80.4,
+ 60.7,
+ 64.83,
+ 59.94,
+ 79.44,
+ 79.22,
+ 80.42
+ ]
+ },
+ "l2_throughput": {
+ "name": "L2 Cache Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 14.22,
+ 14.81,
+ 14.65,
+ 15.12,
+ 13.84,
+ 14.63,
+ 12.79,
+ 14.26,
+ 11.9,
+ 27.56,
+ 27.56
+ ],
+ "triton": [
+ 72.5,
+ 38.87,
+ 50.06,
+ 88.88,
+ 20.12,
+ 49.75,
+ 93.89,
+ 47.4,
+ 20.12,
+ 41.65,
+ 37.98
+ ],
+ "helion": [
+ 55.41,
+ 50.75,
+ 46.94,
+ 68.87,
+ 19.98,
+ 45.22,
+ 78.66,
+ 43.01,
+ 21.1,
+ 22.15,
+ 26.1
+ ]
+ },
+ "dram_throughput": {
+ "name": "DRAM Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 1.96,
+ 1.04,
+ 0.54,
+ 0.27,
+ 0.35,
+ 0.52,
+ 0.11,
+ 0.26,
+ 0.2,
+ 16.47,
+ 16.41
+ ],
+ "triton": [
+ 59.42,
+ 27.08,
+ 32.89,
+ 70.87,
+ 2.09,
+ 32.65,
+ 6.73,
+ 30.85,
+ 2.09,
+ 28.39,
+ 26.93
+ ],
+ "helion": [
+ 27.12,
+ 26.64,
+ 29.88,
+ 20.96,
+ 13.07,
+ 25.37,
+ 17.03,
+ 23.69,
+ 13.49,
+ 14.72,
+ 17.13
+ ]
+ },
+ "compute_throughput": {
+ "name": "Compute Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 73.61,
+ 73.92,
+ 74.05,
+ 74.75,
+ 74.85,
+ 74.06,
+ 74.79,
+ 74.16,
+ 74.83,
+ 74.63,
+ 74.61
+ ],
+ "triton": [
+ 19.8,
+ 15.91,
+ 14.26,
+ 56.34,
+ 11.36,
+ 14.26,
+ 62.92,
+ 15.05,
+ 11.5,
+ 14.8,
+ 14.71
+ ],
+ "helion": [
+ 36.37,
+ 30.38,
+ 53.38,
+ 67.16,
+ 15.98,
+ 33.82,
+ 72.98,
+ 34.87,
+ 15.94,
+ 16.03,
+ 16.3
+ ]
+ },
+ "no_eligible_warps": {
+ "name": "No Eligible Warps (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 47.7,
+ 48.75,
+ 49.26,
+ 47.21,
+ 48.03,
+ 49.26,
+ 46.65,
+ 49.49,
+ 48.3,
+ 45.34,
+ 45.48
+ ],
+ "triton": [
+ 80.97,
+ 84.07,
+ 85.74,
+ 76.02,
+ 86.63,
+ 85.74,
+ 37.19,
+ 84.95,
+ 88.5,
+ 85.18,
+ 85.29
+ ],
+ "helion": [
+ 76.1,
+ 87.45,
+ 80.06,
+ 77.41,
+ 99.02,
+ 89.44,
+ 79.01,
+ 89.3,
+ 96.24,
+ 96.41,
+ 96.43
+ ]
+ }
+ }
+ },
+ "softmax_forward": {
+ "configs": [
+ {
+ "name": "Batch (B=1)",
+ "detail": ""
+ },
+ {
+ "name": "Batch (B=8)",
+ "detail": ""
+ },
+ {
+ "name": "Batch (B=32)",
+ "detail": ""
+ },
+ {
+ "name": "Batch (B=128)",
+ "detail": ""
+ },
+ {
+ "name": "Data (32, 16K, 1024)",
+ "detail": ""
+ },
+ {
+ "name": "Data (32, 4K, 256)",
+ "detail": ""
+ },
+ {
+ "name": "Data (64, 8K, 1K)",
+ "detail": ""
+ },
+ {
+ "name": "Data (64, 8K, 1K, BF16)",
+ "detail": ""
+ },
+ {
+ "name": "Data (64, 8K, 1K, FP16)",
+ "detail": ""
+ },
+ {
+ "name": "Data (64, 8K, 512)",
+ "detail": ""
+ },
+ {
+ "name": "Hidden (32, 2K, 1K)",
+ "detail": ""
+ },
+ {
+ "name": "Hidden (32, 2K, 4K)",
+ "detail": ""
+ },
+ {
+ "name": "Hidden (32, 2K, 8K)",
+ "detail": ""
+ },
+ {
+ "name": "Seq (32, 4K, 4K)",
+ "detail": ""
+ },
+ {
+ "name": "Seq (32, 8K, 4K)",
+ "detail": ""
+ },
+ {
+ "name": "Seq (32, 16K, 4K)",
+ "detail": ""
+ }
+ ],
+ "metrics": {
+ "duration": {
+ "name": "Duration (ms)",
+ "lower_is_better": true,
+ "cuda": [
+ 0.0494,
+ 0.3755,
+ 1.49,
+ 5.93,
+ 14.23,
+ 0.3867,
+ 7.12,
+ 7.17,
+ 7.16,
+ 3.14,
+ 0.892,
+ 1.49,
+ 2.29,
+ 2.97,
+ 5.93,
+ 11.88
+ ],
+ "triton": [
+ 0.0177,
+ 0.1322,
+ 0.5309,
+ 2.15,
+ 2.02,
+ 0.1249,
+ 1.02,
+ 0.8622,
+ 0.7968,
+ 0.6206,
+ 0.131,
+ 0.5249,
+ 1.07,
+ 1.05,
+ 2.14,
+ 4.31
+ ],
+ "helion": [
+ 0.0174,
+ 0.1318,
+ 0.5076,
+ 1.98,
+ 1.99,
+ 0.0622,
+ 0.9964,
+ 0.5413,
+ 0.5054,
+ 0.4941,
+ 0.1919,
+ 0.5159,
+ 0.9963,
+ 0.9951,
+ 1.98,
+ 4.2
+ ]
+ },
+ "compute_throughput": {
+ "name": "Compute Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 57.7,
+ 60.52,
+ 60.94,
+ 61.04,
+ 69.95,
+ 84.42,
+ 69.94,
+ 70.0,
+ 69.83,
+ 80.32,
+ 69.75,
+ 61.07,
+ 56.19,
+ 61.12,
+ 61.05,
+ 61.16
+ ],
+ "triton": [
+ 54.46,
+ 62.29,
+ 58.58,
+ 57.46,
+ 76.41,
+ 83.98,
+ 76.34,
+ 89.51,
+ 93.53,
+ 82.72,
+ 77.92,
+ 59.01,
+ 58.83,
+ 57.59,
+ 57.35,
+ 57.24
+ ],
+ "helion": [
+ 34.42,
+ 58.0,
+ 63.68,
+ 64.16,
+ 78.01,
+ 62.4,
+ 78.23,
+ 75.23,
+ 78.99,
+ 67.72,
+ 59.61,
+ 57.55,
+ 60.78,
+ 64.49,
+ 64.7,
+ 69.77
+ ]
+ },
+ "memory_throughput": {
+ "name": "Memory Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 19.67,
+ 28.05,
+ 29.77,
+ 30.01,
+ 22.38,
+ 28.05,
+ 22.38,
+ 22.11,
+ 22.21,
+ 26.16,
+ 22.32,
+ 29.09,
+ 38.76,
+ 29.26,
+ 30.01,
+ 29.38
+ ],
+ "triton": [
+ 52.09,
+ 79.5,
+ 83.71,
+ 83.53,
+ 86.1,
+ 70.65,
+ 85.11,
+ 50.22,
+ 55.5,
+ 71.24,
+ 80.07,
+ 82.71,
+ 83.03,
+ 82.94,
+ 83.7,
+ 81.76
+ ],
+ "helion": [
+ 47.6,
+ 79.2,
+ 87.76,
+ 89.81,
+ 87.45,
+ 79.86,
+ 87.18,
+ 79.86,
+ 87.31,
+ 89.22,
+ 77.96,
+ 83.76,
+ 89.15,
+ 87.4,
+ 89.78,
+ 83.19
+ ]
+ },
+ "l1_tex_throughput": {
+ "name": "L1/TEX Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 21.2,
+ 20.6,
+ 20.69,
+ 20.68,
+ 22.39,
+ 28.23,
+ 22.39,
+ 22.12,
+ 22.22,
+ 26.18,
+ 22.39,
+ 20.66,
+ 19.77,
+ 20.65,
+ 20.68,
+ 20.65
+ ],
+ "triton": [
+ 46.5,
+ 45.56,
+ 42.64,
+ 42.22,
+ 58.08,
+ 71.91,
+ 57.84,
+ 48.69,
+ 53.0,
+ 69.61,
+ 60.57,
+ 42.17,
+ 41.81,
+ 41.95,
+ 41.8,
+ 41.18
+ ],
+ "helion": [
+ 40.8,
+ 49.76,
+ 41.25,
+ 40.43,
+ 57.55,
+ 67.05,
+ 57.72,
+ 35.87,
+ 38.78,
+ 64.92,
+ 47.98,
+ 48.54,
+ 39.26,
+ 40.52,
+ 40.45,
+ 50.66
+ ]
+ },
+ "l2_throughput": {
+ "name": "L2 Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 33.35,
+ 36.51,
+ 37.21,
+ 37.17,
+ 15.9,
+ 18.07,
+ 15.95,
+ 8.65,
+ 8.66,
+ 18.03,
+ 15.73,
+ 37.12,
+ 47.62,
+ 37.12,
+ 37.17,
+ 37.09
+ ],
+ "triton": [
+ 80.32,
+ 88.71,
+ 90.25,
+ 90.1,
+ 92.91,
+ 53.62,
+ 92.15,
+ 61.93,
+ 66.63,
+ 82.63,
+ 90.88,
+ 89.85,
+ 88.17,
+ 90.4,
+ 90.39,
+ 90.18
+ ],
+ "helion": [
+ 81.11,
+ 90.52,
+ 93.69,
+ 94.08,
+ 93.69,
+ 92.15,
+ 93.7,
+ 89.55,
+ 92.94,
+ 93.65,
+ 89.42,
+ 92.21,
+ 93.63,
+ 93.78,
+ 94.07,
+ 89.76
+ ]
+ },
+ "sm_busy": {
+ "name": "SM Busy (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 62.19,
+ 61.08,
+ 60.94,
+ 61.04,
+ 69.96,
+ 84.96,
+ 69.96,
+ 70.03,
+ 69.83,
+ 80.32,
+ 69.97,
+ 61.21,
+ 56.19,
+ 61.19,
+ 61.05,
+ 61.18
+ ],
+ "triton": [
+ 64.78,
+ 62.93,
+ 58.58,
+ 57.46,
+ 76.67,
+ 85.49,
+ 76.31,
+ 89.72,
+ 93.53,
+ 82.72,
+ 79.86,
+ 58.49,
+ 58.83,
+ 58.3,
+ 57.35,
+ 57.36
+ ],
+ "helion": [
+ 41.93,
+ 59.0,
+ 63.68,
+ 64.16,
+ 78.17,
+ 67.19,
+ 78.38,
+ 75.73,
+ 78.99,
+ 67.72,
+ 59.91,
+ 57.66,
+ 60.78,
+ 64.7,
+ 64.7,
+ 69.84
+ ]
+ },
+ "no_eligible_warps": {
+ "name": "No Eligible Warps (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 37.84,
+ 38.86,
+ 38.92,
+ 38.91,
+ 30.04,
+ 15.08,
+ 30.04,
+ 29.98,
+ 30.15,
+ 19.63,
+ 30.04,
+ 38.79,
+ 43.73,
+ 38.82,
+ 38.92,
+ 38.82
+ ],
+ "triton": [
+ 35.38,
+ 35.91,
+ 41.01,
+ 42.7,
+ 23.35,
+ 14.68,
+ 23.77,
+ 10.14,
+ 6.18,
+ 17.07,
+ 20.65,
+ 41.24,
+ 40.97,
+ 40.69,
+ 42.91,
+ 42.73
+ ],
+ "helion": [
+ 57.97,
+ 40.41,
+ 35.66,
+ 35.26,
+ 21.84,
+ 32.87,
+ 21.77,
+ 24.23,
+ 20.86,
+ 31.58,
+ 39.9,
+ 42.15,
+ 39.53,
+ 35.38,
+ 35.5,
+ 30.25
+ ]
+ },
+ "gflops": {
+ "name": "GFLOPs",
+ "lower_is_better": false,
+ "cuda": [
+ 0.00472,
+ 0.0378,
+ 0.151,
+ 0.605,
+ 1.51,
+ 0.0507,
+ 0.757,
+ 0.757,
+ 0.757,
+ 0.387,
+ 0.0946,
+ 0.151,
+ 0.227,
+ 0.302,
+ 0.605,
+ 1.21
+ ],
+ "triton": [
+ 0.00339,
+ 0.0271,
+ 0.109,
+ 0.434,
+ 0.461,
+ 0.0199,
+ 0.231,
+ 0.231,
+ 0.231,
+ 0.13,
+ 0.0288,
+ 0.109,
+ 0.218,
+ 0.217,
+ 0.434,
+ 0.868
+ ],
+ "helion": [
+ 0.00463,
+ 0.0417,
+ 0.246,
+ 0.985,
+ 1.03,
+ 0.0229,
+ 0.516,
+ 0.295,
+ 0.295,
+ 0.185,
+ 0.0685,
+ 0.167,
+ 0.482,
+ 0.492,
+ 0.985,
+ 2.06
+ ]
+ },
+ "registers_per_thread": {
+ "name": "Registers per Thread",
+ "lower_is_better": true,
+ "cuda": [
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18,
+ 18
+ ],
+ "triton": [
+ 80,
+ 32,
+ 64,
+ 64,
+ 32,
+ 32,
+ 32,
+ 64,
+ 64,
+ 32,
+ 47,
+ 32,
+ 57,
+ 64,
+ 64,
+ 40
+ ],
+ "helion": [
+ 36,
+ 36,
+ 34,
+ 34,
+ 25,
+ 21,
+ 25,
+ 31,
+ 31,
+ 22,
+ 25,
+ 36,
+ 39,
+ 36,
+ 34,
+ 36
+ ]
+ },
+ "register_limited_blocks": {
+ "name": "Register-Limited Blocks",
+ "lower_is_better": true,
+ "cuda": [
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 10,
+ 2,
+ 2,
+ 2,
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2,
+ 2
+ ],
+ "triton": [
+ 6,
+ 4,
+ 4,
+ 4,
+ 16,
+ 32,
+ 16,
+ 32,
+ 32,
+ 16,
+ 40,
+ 4,
+ 4,
+ 4,
+ 4,
+ 3
+ ],
+ "helion": [
+ 6,
+ 6,
+ 6,
+ 6,
+ 16,
+ 21,
+ 16,
+ 16,
+ 16,
+ 21,
+ 16,
+ 6,
+ 3,
+ 6,
+ 6,
+ 6
+ ]
+ },
+ "memory_bandwidth": {
+ "name": "Memory Bandwidth (GB/s)",
+ "lower_is_better": false,
+ "cuda": [
+ 954.6,
+ 1380,
+ 1430,
+ 1440,
+ 602.39,
+ 646.03,
+ 600.86,
+ 297.02,
+ 297.64,
+ 678.67,
+ 580.95,
+ 1430,
+ 1870,
+ 1440,
+ 1440,
+ 1440
+ ],
+ "triton": [
+ 2560,
+ 3910,
+ 4030,
+ 4010,
+ 4230,
+ 1990,
+ 4190,
+ 2470,
+ 2670,
+ 3430,
+ 3930,
+ 4060,
+ 4000,
+ 4080,
+ 4030,
+ 4020
+ ],
+ "helion": [
+ 2330,
+ 3890,
+ 4220,
+ 4320,
+ 4300,
+ 3920,
+ 4280,
+ 3920,
+ 4200,
+ 4290,
+ 3830,
+ 4120,
+ 4290,
+ 4300,
+ 4320,
+ 4090
+ ]
+ }
+ }
+ },
+ "softmax_backward": {
+ "configs": [
+ {
+ "name": "Batch (B=1)",
+ "detail": ""
+ },
+ {
+ "name": "Batch (B=8)",
+ "detail": ""
+ },
+ {
+ "name": "Batch (B=32)",
+ "detail": ""
+ },
+ {
+ "name": "Batch (B=128)",
+ "detail": ""
+ },
+ {
+ "name": "Data (32, 16K, 1024)",
+ "detail": ""
+ },
+ {
+ "name": "Data (32, 4K, 256)",
+ "detail": ""
+ },
+ {
+ "name": "Data (64, 8K, 1K)",
+ "detail": ""
+ },
+ {
+ "name": "Data (64, 8K, 1K, BF16)",
+ "detail": ""
+ },
+ {
+ "name": "Data (64, 8K, 1K, FP16)",
+ "detail": ""
+ },
+ {
+ "name": "Data (64, 8K, 512)",
+ "detail": ""
+ },
+ {
+ "name": "Hidden (32, 2K, 1K)",
+ "detail": ""
+ },
+ {
+ "name": "Hidden (32, 2K, 4K)",
+ "detail": ""
+ },
+ {
+ "name": "Hidden (32, 2K, 8K)",
+ "detail": ""
+ },
+ {
+ "name": "Seq (32, 4K, 4K)",
+ "detail": ""
+ },
+ {
+ "name": "Seq (32, 8K, 4K)",
+ "detail": ""
+ },
+ {
+ "name": "Seq (32, 16K, 4K)",
+ "detail": ""
+ }
+ ],
+ "metrics": {
+ "duration": {
+ "name": "Duration (ms)",
+ "lower_is_better": true,
+ "cuda": [
+ 0.0442,
+ 0.33,
+ 1.31,
+ 5.2,
+ 9.22,
+ 0.2556,
+ 4.6,
+ 4.51,
+ 4.45,
+ 2.12,
+ 0.5773,
+ 1.3,
+ 2.3,
+ 2.58,
+ 5.21,
+ 10.43
+ ],
+ "triton": [
+ 0.0289,
+ 0.2394,
+ 0.9745,
+ 4.22,
+ 3.13,
+ 0.1129,
+ 1.55,
+ 0.7338,
+ 0.7295,
+ 0.7284,
+ 0.1865,
+ 0.9757,
+ 1.59,
+ 1.97,
+ 4.2,
+ 7.96
+ ],
+ "helion": [
+ 0.0233,
+ 0.1876,
+ 0.7271,
+ 2.88,
+ 2.87,
+ 0.0881,
+ 1.43,
+ 0.7287,
+ 0.7174,
+ 0.7222,
+ 0.3209,
+ 0.7596,
+ 1.45,
+ 1.5,
+ 2.88,
+ 13.88
+ ]
+ },
+ "compute_throughput": {
+ "name": "Compute Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 45.74,
+ 48.03,
+ 47.91,
+ 48.4,
+ 62.96,
+ 74.27,
+ 63.07,
+ 66.8,
+ 67.01,
+ 68.76,
+ 62.84,
+ 48.77,
+ 43.21,
+ 49.18,
+ 48.4,
+ 48.57
+ ],
+ "triton": [
+ 17.54,
+ 17.32,
+ 15.63,
+ 14.34,
+ 27.78,
+ 64.47,
+ 28.02,
+ 64.21,
+ 57.92,
+ 41.77,
+ 30.52,
+ 15.83,
+ 19.38,
+ 15.68,
+ 14.4,
+ 15.47
+ ],
+ "helion": [
+ 7.45,
+ 21.36,
+ 15.12,
+ 15.37,
+ 30.08,
+ 24.63,
+ 28.24,
+ 52.2,
+ 41.83,
+ 31.32,
+ 54.2,
+ 26.69,
+ 18.47,
+ 25.45,
+ 15.2,
+ 9.79
+ ]
+ },
+ "memory_throughput": {
+ "name": "Memory Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 40.78,
+ 48.89,
+ 50.99,
+ 51.4,
+ 28.41,
+ 31.11,
+ 28.42,
+ 25.57,
+ 25.87,
+ 31.44,
+ 27.98,
+ 50.13,
+ 58.23,
+ 50.77,
+ 51.37,
+ 50.25
+ ],
+ "triton": [
+ 66.8,
+ 79.61,
+ 81.79,
+ 76.22,
+ 84.55,
+ 70.45,
+ 84.87,
+ 88.94,
+ 91.36,
+ 91.5,
+ 86.56,
+ 79.97,
+ 88.19,
+ 79.13,
+ 76.42,
+ 78.6
+ ],
+ "helion": [
+ 72.11,
+ 85.8,
+ 91.54,
+ 92.74,
+ 91.35,
+ 87.42,
+ 91.18,
+ 89.51,
+ 92.67,
+ 92.24,
+ 84.31,
+ 85.91,
+ 92.33,
+ 86.95,
+ 92.75,
+ 62.96
+ ]
+ },
+ "l1_tex_throughput": {
+ "name": "L1/TEX Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 25.9,
+ 25.41,
+ 25.12,
+ 25.44,
+ 25.57,
+ 31.18,
+ 25.61,
+ 25.58,
+ 25.88,
+ 28.45,
+ 25.68,
+ 25.5,
+ 26.01,
+ 25.79,
+ 25.48,
+ 25.51
+ ],
+ "triton": [
+ 36.24,
+ 30.71,
+ 28.54,
+ 26.43,
+ 42.05,
+ 62.81,
+ 42.3,
+ 54.68,
+ 54.85,
+ 55.17,
+ 46.75,
+ 28.84,
+ 35.73,
+ 28.55,
+ 28.1,
+ 27.75
+ ],
+ "helion": [
+ 31.56,
+ 44.21,
+ 32.99,
+ 32.97,
+ 46.3,
+ 43.74,
+ 46.48,
+ 52.76,
+ 38.99,
+ 53.96,
+ 45.64,
+ 45.32,
+ 42.17,
+ 39.69,
+ 33.02,
+ 29.19
+ ]
+ },
+ "l2_throughput": {
+ "name": "L2 Throughput (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 56.3,
+ 61.89,
+ 62.47,
+ 62.71,
+ 36.78,
+ 41.09,
+ 36.93,
+ 21.18,
+ 21.4,
+ 39.97,
+ 36.77,
+ 62.74,
+ 70.49,
+ 63.45,
+ 62.67,
+ 62.56
+ ],
+ "triton": [
+ 82.27,
+ 89.71,
+ 92.07,
+ 85.44,
+ 92.43,
+ 84.67,
+ 92.97,
+ 94.31,
+ 94.55,
+ 94.59,
+ 93.27,
+ 91.87,
+ 94.35,
+ 91.4,
+ 88.1,
+ 91.4
+ ],
+ "helion": [
+ 88.48,
+ 92.37,
+ 94.09,
+ 94.61,
+ 95.07,
+ 93.86,
+ 95.04,
+ 94.43,
+ 94.94,
+ 94.67,
+ 89.94,
+ 92.17,
+ 94.25,
+ 92.22,
+ 94.62,
+ 80.49
+ ]
+ },
+ "sm_busy": {
+ "name": "SM Busy (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 49.4,
+ 48.56,
+ 47.91,
+ 48.4,
+ 62.98,
+ 74.97,
+ 63.1,
+ 66.83,
+ 67.01,
+ 68.76,
+ 63.19,
+ 48.87,
+ 43.21,
+ 49.21,
+ 48.4,
+ 48.6
+ ],
+ "triton": [
+ 19.9,
+ 16.99,
+ 15.63,
+ 14.34,
+ 27.87,
+ 65.96,
+ 28.06,
+ 64.19,
+ 57.92,
+ 41.77,
+ 30.9,
+ 15.99,
+ 19.38,
+ 15.85,
+ 14.4,
+ 15.43
+ ],
+ "helion": [
+ 8.39,
+ 21.41,
+ 15.12,
+ 15.37,
+ 30.04,
+ 25.89,
+ 28.33,
+ 52.44,
+ 41.83,
+ 31.32,
+ 55.76,
+ 26.49,
+ 17.17,
+ 24.45,
+ 15.2,
+ 5.8
+ ]
+ },
+ "no_eligible_warps": {
+ "name": "No Eligible Warps (%)",
+ "lower_is_better": false,
+ "cuda": [
+ 50.33,
+ 51.1,
+ 51.87,
+ 51.52,
+ 37.01,
+ 24.89,
+ 36.9,
+ 33.18,
+ 32.97,
+ 31.15,
+ 36.88,
+ 51.0,
+ 56.69,
+ 50.75,
+ 51.53,
+ 51.4
+ ],
+ "triton": [
+ 79.93,
+ 83.37,
+ 84.3,
+ 84.62,
+ 71.93,
+ 33.77,
+ 71.85,
+ 35.74,
+ 41.96,
+ 58.19,
+ 69.12,
+ 84.08,
+ 80.66,
+ 84.37,
+ 85.49,
+ 84.52
+ ],
+ "helion": [
+ 91.55,
+ 78.51,
+ 84.81,
+ 84.69,
+ 70.02,
+ 73.96,
+ 71.62,
+ 47.58,
+ 57.88,
+ 68.51,
+ 43.86,
+ 73.52,
+ 82.77,
+ 75.5,
+ 84.62,
+ 94.2
+ ]
+ },
+ "fused_operations": {
+ "name": "Fused Operations (instruction-mix)",
+ "lower_is_better": false,
+ "cuda": [
+ 262000.0,
+ 2100000.0,
+ 8390000.0,
+ 33600000.0,
+ 33600000.0,
+ 1050000.0,
+ 16800000.0,
+ 16800000.0,
+ 16800000.0,
+ 8390000.0,
+ 2100000.0,
+ 8390000.0,
+ 16800000.0,
+ 16800000.0,
+ 33600000.0,
+ 67100000.0
+ ],
+ "triton": [
+ 246000.0,
+ 1970000.0,
+ 7860000.0,
+ 31500000.0,
+ 29400000.0,
+ 524000.0,
+ 14700000.0,
+ 14700000.0,
+ 14700000.0,
+ 6290000.0,
+ 1840000.0,
+ 7860000.0,
+ 15700000.0,
+ 15700000.0,
+ 31500000.0,
+ 62900000.0
+ ],
+ "helion": [
+ 254000.0,
+ 1840000.0,
+ 7340000.0,
+ 29400000.0,
+ 25200000.0,
+ 786000.0,
+ 12600000.0,
+ 0,
+ 0,
+ 6290000.0,
+ 1570000.0,
+ 6290000.0,
+ 6290000.0,
+ 12600000.0,
+ 29400000.0,
+ 33600000.0
+ ]
+ },
+ "non_fused_operations": {
+ "name": "Non-Fused Operations (instruction-mix)",
+ "lower_is_better": false,
+ "cuda": [
+ 862000.0,
+ 6900000.0,
+ 27600000.0,
+ 110000000.0,
+ 240000000.0,
+ 8000000.0,
+ 120000000.0,
+ 120000000.0,
+ 120000000.0,
+ 61300000.0,
+ 15000000.0,
+ 27600000.0,
+ 44400000.0,
+ 55200000.0,
+ 110000000.0,
+ 221000000.0
+ ],
+ "triton": [
+ 688000.0,
+ 5510000.0,
+ 22000000.0,
+ 88100000.0,
+ 105000000.0,
+ 6820000.0,
+ 52400000.0,
+ 52400000.0,
+ 52400000.0,
+ 35700000.0,
+ 6550000.0,
+ 22000000.0,
+ 45100000.0,
+ 44000000.0,
+ 88100000.0,
+ 176000000.0
+ ],
+ "helion": [
+ 598000.0,
+ 7080000.0,
+ 26200000.0,
+ 105000000.0,
+ 151000000.0,
+ 4190000.0,
+ 75500000.0,
+ 35700000.0,
+ 35700000.0,
+ 35700000.0,
+ 7860000.0,
+ 41900000.0,
+ 58700000.0,
+ 83900000.0,
+ 105000000.0,
+ 302000000.0
+ ]
+ },
+ "gflops": {
+ "name": "GFLOPs",
+ "lower_is_better": false,
+ "cuda": [
+ 0.00139,
+ 0.0111,
+ 0.0444,
+ 0.177,
+ 0.307,
+ 0.0101,
+ 0.154,
+ 0.154,
+ 0.154,
+ 0.0781,
+ 0.0192,
+ 0.0444,
+ 0.0779,
+ 0.0887,
+ 0.177,
+ 0.355
+ ],
+ "triton": [
+ 0.00118,
+ 0.00944,
+ 0.0377,
+ 0.151,
+ 0.164,
+ 0.00786,
+ 0.0818,
+ 0.0818,
+ 0.0818,
+ 0.0482,
+ 0.0102,
+ 0.0371,
+ 0.0765,
+ 0.0755,
+ 0.151,
+ 0.302
+ ],
+ "helion": [
+ 0.00111,
+ 0.0107,
+ 0.0409,
+ 0.164,
+ 0.201,
+ 0.00577,
+ 0.101,
+ 0.0357,
+ 0.0357,
+ 0.0482,
+ 0.011,
+ 0.0545,
+ 0.0881,
+ 0.109,
+ 0.164,
+ 0.369
+ ]
+ },
+ "registers_per_thread": {
+ "name": "Registers per Thread",
+ "lower_is_better": true,
+ "cuda": [
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16
+ ],
+ "triton": [
+ 32,
+ 32,
+ 32,
+ 32,
+ 32,
+ 32,
+ 32,
+ 30,
+ 32,
+ 32,
+ 32,
+ 32,
+ 40,
+ 32,
+ 32,
+ 32
+ ],
+ "helion": [
+ 78,
+ 46,
+ 54,
+ 54,
+ 22,
+ 22,
+ 22,
+ 22,
+ 30,
+ 21,
+ 22,
+ 21,
+ 32,
+ 22,
+ 54,
+ 48
+ ]
+ },
+ "register_limited_blocks": {
+ "name": "Register-Limited Blocks",
+ "lower_is_better": true,
+ "cuda": [
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 16,
+ 4,
+ 4,
+ 4,
+ 8,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4,
+ 4
+ ],
+ "triton": [
+ 8,
+ 8,
+ 8,
+ 8,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 16,
+ 8,
+ 3,
+ 8,
+ 8,
+ 8
+ ],
+ "helion": [
+ 6,
+ 2,
+ 2,
+ 2,
+ 5,
+ 42,
+ 5,
+ 5,
+ 4,
+ 10,
+ 5,
+ 2,
+ 2,
+ 2,
+ 2,
+ 10
+ ]
+ },
+ "memory_bandwidth": {
+ "name": "Memory Bandwidth (GB/s)",
+ "lower_is_better": false,
+ "cuda": [
+ 2000,
+ 2470,
+ 2450,
+ 2400,
+ 1400,
+ 1530,
+ 1400,
+ 712.17,
+ 720.66,
+ 1510,
+ 1370,
+ 1970,
+ 4220,
+ 4440,
+ 3090,
+ 4270
+ ],
+ "triton": [
+ 3540,
+ 4460,
+ 4400,
+ 4220,
+ 4490,
+ 4300,
+ 4480,
+ 4400,
+ 4460,
+ 4440,
+ 4140,
+ 4130,
+ 3930,
+ 4240,
+ 3860,
+ 3890
+ ],
+ "helion": [
+ 3280,
+ 3670,
+ 3940,
+ 3910,
+ 4160,
+ 3460,
+ 4170,
+ 4370,
+ 4400,
+ 4400,
+ 4250,
+ 2460,
+ 2800,
+ 2470,
+ 2500,
+ 2470
+ ]
+ }
+ }
+ }
+};
\ No newline at end of file
diff --git a/data_dashboard/index.html b/data_dashboard/index.html
new file mode 100644
index 0000000..9d947fb
--- /dev/null
+++ b/data_dashboard/index.html
@@ -0,0 +1,530 @@
+
+
+
+
+
+ GPU Kernel Performance Dashboard
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+