-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbenchmark.py
More file actions
114 lines (96 loc) · 3.56 KB
/
benchmark.py
File metadata and controls
114 lines (96 loc) · 3.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import dtxt
try:
import dtxt_rs
except ImportError:
dtxt_rs = None
import json
import time
import random
import os
def generate_large_data(count):
data = {
"title": "DTXT vs JSON (JSON-native types only)",
"description": "Benchmark for base format overhead (unquoted keys, short literals)",
"entries": []
}
for i in range(count):
data["entries"].append({
"id": i,
"uid": f"user-{i}",
"isActive": i % 2 == 0,
"score": random.random() * 1000,
"tags": ["data", "benchmark", "storage", "json", "dtxt"],
"meta": {
"level": i % 10,
"verified": i % 3 == 0,
"note": None,
"nested": {
"a": 1,
"b": False,
"c": "nested string"
}
}
})
return data
DATASET_SIZE = 30000
def run_benchmark():
print(f"Generating dataset with {DATASET_SIZE} entries (JSON-native types only)...")
raw_data = generate_large_data(DATASET_SIZE)
# 1. Payload Size Comparison
json_str = json.dumps(raw_data)
dtxt_str = dtxt.dumps(raw_data)
base_path = "../../benchmarks/python"
json_path = os.path.join(base_path, "bench_v2.json")
dtxt_path = os.path.join(base_path, "bench_v2.dtxt")
with open(json_path, "w") as f:
f.write(json_str)
with open(dtxt_path, "w") as f:
f.write(dtxt_str)
json_size = os.path.getsize(json_path)
dtxt_size = os.path.getsize(dtxt_path)
print("\n--- Payload Size ---")
print(f"JSON: {json_size / 1024 / 1024:.2f} MB")
print(f"DTXT: {dtxt_size / 1024 / 1024:.2f} MB")
print(f"Reduction: {(1 - dtxt_size / json_size) * 100:.1f}%")
# 2. Performance Comparison (Time)
iterations = 5
print("\n--- Parsing Performance (Average of 5 runs) ---")
json_parse_total = 0
for _ in range(iterations):
start = time.perf_counter()
json.loads(json_str)
json_parse_total += (time.perf_counter() - start) * 1000
print(f"json.loads: {json_parse_total / iterations:.2f} ms")
# Force pure Python for comparison
original_rs = dtxt.dtxt_rs
dtxt.dtxt_rs = None
pure_python_parse_total = 0
for _ in range(iterations):
start = time.perf_counter()
dtxt.loads(dtxt_str)
pure_python_parse_total += (time.perf_counter() - start) * 1000
print(f"dtxt.loads (Pure Python): {pure_python_parse_total / iterations:.2f} ms")
dtxt.dtxt_rs = original_rs
if dtxt.dtxt_rs:
rust_ext_parse_total = 0
for _ in range(iterations):
start = time.perf_counter()
dtxt.loads(dtxt_str)
rust_ext_parse_total += (time.perf_counter() - start) * 1000
print(f"dtxt.loads (Rust Ext): {rust_ext_parse_total / iterations:.2f} ms")
print(f"Speedup: {pure_python_parse_total / rust_ext_parse_total:.1f}x")
print("\n--- Serialization Performance (Average of 5 runs) ---")
json_stringify_total = 0
for _ in range(iterations):
start = time.perf_counter()
json.dumps(raw_data)
json_stringify_total += (time.perf_counter() - start) * 1000
print(f"json.dumps: {json_stringify_total / iterations:.2f} ms")
dtxt_stringify_total = 0
for _ in range(iterations):
start = time.perf_counter()
dtxt.dumps(raw_data)
dtxt_stringify_total += (time.perf_counter() - start) * 1000
print(f"dtxt.dumps: {dtxt_stringify_total / iterations:.2f} ms")
if __name__ == "__main__":
run_benchmark()