# Install SynaDB
!pip install -q synadb numpy
from synadb import SynaDB, VectorStore
print(f"SynaDB {synadb.__version__} installed")
SynaDB Playground
Core Storage
Append-only log structure
Fast sequential writes, immutable history
Schema-free storage
Store heterogeneous data types without migrations
Delta & LZ4 compression
Minimize storage for time-series data
Crash recovery
Automatic index rebuild on open
Thread-safe
Concurrent read/write access with mutex-protected writes
Vector Search
Vector Store
Native embedding storage with HNSW index for similarity search
MmapVectorStore
Ultra-high-throughput vector storage
HNSW Index
O(log N) approximate nearest neighbor search
Gravity Well Index
Novel O(N) build time index
Cascade Index
ExperimentalThree-stage hybrid index (LSH + bucket tree + graph)
Sparse Vector Store
Inverted index for lexical embeddings (SPLADE, BM25, TF-IDF)
FAISS Integration
OptionalBillion-scale vector search
AI/ML Features
AI/ML optimized
Extract time-series data as contiguous tensors for PyTorch/TensorFlow
Tensor Engine
Batch tensor operations with chunked storage
Model Registry
Version models with SHA-256 checksum verification
Experiment Tracking
Log parameters, metrics, and artifacts
GPU Direct
OptionalCUDA tensor loading with pinned memory
Interactive Playground
Run real benchmarks with the actual SynaDB library in Google Colab.
SynaDB Benchmark Notebook
synadb via pip
# Benchmark: Write Performance
db = SynaDB("bench.db", sync_on_write=False)
for i in range(1_000_000):
db.put_float(f"key_{i}", float(i))
# Result: 127,000 ops/sec [PASS]
# Benchmark: Vector Search
store = VectorStore("vectors.db", dimensions=768)
store.insert_batch(keys, embeddings)
store.build_index()
results = store.search(query, k=10)
# Search time: 0.6ms [PASS]
Benchmarks Included:
- Core Database Performance (read vs write comparison)
- MmapVectorStore vs VectorStore
- HNSW vs Brute Force Search
- GWI vs HNSW Build Time
- Crash Recovery
- Schema-Free Storage
- Tensor Extraction
Rust Benchmarks
For Rust, clone the repo and run cargo bench locally. See the copy-paste code below.
Live Python Demo
Run benchmarks on a real server. No setup required.
SynaDB Live Demo
Available Benchmarks:
- Mmap vs VectorStore - Compare batch insert speeds
- GWI vs HNSW - Compare index build times
- HNSW vs Brute Force - Compare search speeds
- Schema-Free - Test multi-type storage
- Crash Recovery - Test data integrity after reopen
- Tensor Extraction - Test NumPy tensor output
- Compression - Test delta + LZ4 compression
Note: Free tier has resource limits. For full-scale benchmarks (1M+ records), use the Colab notebook above.
Benchmark Results
Run the benchmarks yourself using Colab or PythonAnywhere above. Results vary by hardware.
Copy-Paste Benchmarks
Run these locally to verify our claims on your hardware.
#!/usr/bin/env python3
"""SynaDB Claim Validation Benchmark"""
import time
import numpy as np
from synadb import SynaDB, VectorStore, MmapVectorStore, GravityWellIndex
def benchmark_writes(n: int = 1_000_000):
"""Benchmark write performance"""
db = SynaDB("bench_write.db")
start = time.perf_counter()
for i in range(n):
db.put_float(f"key_{i}", float(i))
elapsed = time.perf_counter() - start
rate = n / elapsed
print(f"Write: {rate:,.0f} ops/sec ({n:,} records in {elapsed:.2f}s)")
print(f"[PASS]" if rate >= 100_000 else f"[FAIL] (target: 100K)")
return rate
def benchmark_mmap_vectors(n: int = 100_000, dims: int = 768):
"""Claim: 7x faster than VectorStore"""
store = MmapVectorStore("bench_mmap.mmap", dimensions=dims)
vectors = np.random.randn(n, dims).astype(np.float32)
keys = [f"vec_{i}" for i in range(n)]
start = time.perf_counter()
store.insert_batch(keys, vectors)
elapsed = time.perf_counter() - start
rate = n / elapsed
print(f"MmapVector Insert: {rate:,.0f} vectors/sec")
print(f"[PASS]" if rate >= vector_rate else f"[FAIL]")
return rate
def benchmark_gwi_vs_hnsw(n: int = 50_000, dims: int = 768):
"""Claim: GWI builds faster than HNSW"""
vectors = np.random.randn(n, dims).astype(np.float32)
# GWI build
gwi = GravityWellIndex("bench_gwi.gwi", dimensions=dims)
gwi.initialize(vectors[:1000])
start = time.perf_counter()
gwi.insert_batch([f"v_{i}" for i in range(n)], vectors)
gwi_time = time.perf_counter() - start
# HNSW build
vs = VectorStore("bench_hnsw.db", dimensions=dims)
start = time.perf_counter()
for i, v in enumerate(vectors):
vs.insert(f"v_{i}", v)
vs.build_index()
hnsw_time = time.perf_counter() - start
speedup = hnsw_time / gwi_time
print(f"GWI: {gwi_time:.2f}s | HNSW: {hnsw_time:.2f}s | Speedup: {speedup:.1f}x")
print(f"[PASS]" if speedup >= 2 else f"[WARN] {speedup:.1f}x")
if __name__ == "__main__":
print("=== SynaDB Claim Validation ===")
benchmark_writes(1_000_000)
benchmark_mmap_vectors(100_000)
benchmark_gwi_vs_hnsw(50_000)
//! SynaDB Claim Validation Benchmark
use synadb::{SynaDB, Atom, VectorStore, VectorConfig};
use synadb::distance::DistanceMetric;
use std::time::Instant;
fn benchmark_writes(n: usize) {
// Benchmark write performance
let mut db = SynaDB::new("bench_write.db").unwrap();
let start = Instant::now();
for i in 0..n {
db.append(&format!("key_{}", i), Atom::Float(i as f64)).unwrap();
}
let elapsed = start.elapsed().as_secs_f64();
let rate = n as f64 / elapsed;
println!("Write: {:.0} ops/sec ({} records in {:.2}s)", rate, n, elapsed);
println!("{}", if rate >= 100_000.0 { "[PASS]" } else { "[FAIL]" });
}
fn benchmark_reads(n: usize) {
// Benchmark read performance
let mut db = SynaDB::new("bench_read.db").unwrap();
// Setup: write data first
for i in 0..n {
db.append(&format!("key_{}", i), Atom::Float(i as f64)).unwrap();
}
let start = Instant::now();
for i in 0..n {
let _ = db.get(&format!("key_{}", i)).unwrap();
}
let elapsed = start.elapsed().as_secs_f64();
let rate = n as f64 / elapsed;
println!("Read: {:.0} ops/sec ({} records in {:.2}s)", rate, n, elapsed);
println!("{}", if rate >= 500_000.0 { "[PASS]" } else { "[FAIL]" });
}
fn benchmark_vector_search(n: usize, dims: u16) {
// Claim: <10ms search for 1M vectors
let config = VectorConfig {
dimensions: dims,
metric: DistanceMetric::Cosine,
key_prefix: "vec/".to_string(),
index_threshold: 10000,
};
let mut store = VectorStore::new("bench_vec.db", config).unwrap();
// Insert vectors
for i in 0..n {
let vec: Vec<f32> = (0..dims).map(|_| rand::random()).collect();
store.insert(&format!("v_{}", i), &vec).unwrap();
}
store.build_index().unwrap();
// Search
let query: Vec<f32> = (0..dims).map(|_| rand::random()).collect();
let start = Instant::now();
let _ = store.search(&query, 10).unwrap();
let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
println!("Vector Search: {:.2}ms ({} vectors)", elapsed_ms, n);
println!("{}", if elapsed_ms < 10.0 { "[PASS]" } else { "[FAIL]" });
}
fn main() {
println!("=== SynaDB Claim Validation (Rust) ===");
benchmark_writes(1_000_000);
benchmark_reads(1_000_000);
benchmark_vector_search(100_000, 768);
}
#!/usr/bin/env python3
"""SynaDB Scale Test: 1M, 10M, 100M Records"""
import time
import os
import numpy as np
from synadb import SynaDB
def scale_test(scale: str):
scales = {"1m": 1_000_000, "10m": 10_000_000, "100m": 100_000_000}
n = scales[scale]
db_path = f"scale_test_{scale}.db"
print(f"\n{'='*50}")
print(f"Scale Test: {n:,} records")
print(f"{'='*50}")
# Write test
db = SynaDB(db_path, sync_on_write=False) # Disable sync for speed
print(f"\nWriting {n:,} records...")
start = time.perf_counter()
batch_size = 100_000
for batch_start in range(0, n, batch_size):
batch_end = min(batch_start + batch_size, n)
for i in range(batch_start, batch_end):
db.put_float(f"sensor/temp/{i}", 20.0 + (i % 100) * 0.1)
if (batch_start + batch_size) % 1_000_000 == 0:
elapsed = time.perf_counter() - start
rate = (batch_start + batch_size) / elapsed
print(f" Progress: {(batch_start + batch_size):,} ({rate:,.0f}/sec)")
write_time = time.perf_counter() - start
write_rate = n / write_time
print(f"[OK] Write complete: {write_rate:,.0f} ops/sec")
# File size
file_size = os.path.getsize(db_path) / (1024 ** 3) # GB
print(f"File size: {file_size:.2f} GB")
# Read test (random access)
print(f"\nReading {min(n, 100_000):,} random records...")
indices = np.random.randint(0, n, size=min(n, 100_000))
start = time.perf_counter()
for i in indices:
_ = db.get_float(f"sensor/temp/{i}")
read_time = time.perf_counter() - start
read_rate = len(indices) / read_time
print(f"[OK] Read complete: {read_rate:,.0f} ops/sec")
# Recovery test
db.close()
print(f"\nTesting crash recovery...")
start = time.perf_counter()
db2 = SynaDB(db_path) # Reopen triggers index rebuild
recovery_time = time.perf_counter() - start
recovery_rate = n / recovery_time
print(f"[OK] Recovery: {recovery_time:.2f}s ({recovery_rate:,.0f} entries/sec)")
# Summary
print(f"\n{'='*50}")
print(f"SUMMARY: {scale.upper()}")
print(f"{'='*50}")
print(f"Write: {write_rate:>12,.0f} ops/sec {'[PASS]' if write_rate >= 100_000 else '[FAIL]'}")
print(f"Read: {read_rate:>12,.0f} ops/sec {'[PASS]' if read_rate >= 500_000 else '[FAIL]'}")
print(f"Recovery: {recovery_rate:>12,.0f} entries/sec {'[PASS]' if recovery_rate >= 1_000_000 else '[FAIL]'}")
print(f"Size: {file_size:>12.2f} GB")
# Cleanup
os.remove(db_path)
if __name__ == "__main__":
import sys
scale = sys.argv[1] if len(sys.argv) > 1 else "1m"
scale_test(scale)
Benchmark Methodology
How we measure and what you should expect.
Hardware Baseline
- CPU: 8-core (benchmarks scale with cores)
- RAM: 16GB minimum for 100M tests
- Storage: SSD recommended (NVMe for best results)
- OS: Linux, macOS, or Windows
Test Conditions
sync_on_write=Falsefor throughput tests- Warm cache for read benchmarks
- Cold start for recovery benchmarks
- Random data distribution
What Affects Results
- Disk I/O speed (SSD vs HDD: 10x difference)
- Key length (longer keys = slower)
- Value size (compression helps large values)
- Memory pressure (swapping kills performance)
Fair Comparison Notes
- GWI vs HNSW: Same dataset, same dimensions
- MmapVectorStore: Batch insert, not single
- Recovery: Includes full index rebuild
- All tests use release builds
Frequently Asked Questions
Why is my write speed lower than claimed?
Check sync_on_write setting. Default is True for durability, which limits throughput to ~100 ops/sec. Set to False for benchmarks.
Is MmapVectorStore really faster?
Yes, MmapVectorStore.insert_batch() is about 7x faster than individual VectorStore inserts. Use batch operations for bulk loading.
Is GWI always faster than HNSW?
For build time, yes. For search, HNSW can be faster at high recall. GWI shines for streaming/real-time data where rebuild cost matters.
What about 100M+ vectors?
Use the optional FAISS integration (--features faiss) for billion-scale. Native indexes are optimized for 1-10M vectors.
How do I reproduce these benchmarks?
Clone the repo, run cargo bench for Rust or python benchmarks/run_all.py for Python. Results vary by hardware.
Are these numbers cherry-picked?
No. We report median of 5 runs. Outliers are noted. Run the tests yourself - that's why this playground exists!