Performance Optimization
Optimize DuckDB WASM Adapter for maximum performance in browser environments.
WASM Performance Fundamentals
Browser Environment Limitations
DuckDB WASM runs in the browser, which has different performance characteristics than native databases:
- Memory is limited to browser heap size (1-4GB)
- Single-threaded by default (use Web Workers for parallelism)
- No disk-based operations (everything is in-memory)
- JavaScript bridge overhead for data transfer
Memory Optimization
Monitor Memory Usage
javascript
// Create memory-aware connection
class MemoryAwareConnection {
constructor(connection, maxMemoryMB = 512) {
this.connection = connection;
this.maxMemory = maxMemoryMB * 1024 * 1024;
this.queries = new Map();
}
async execute(sql, params) {
// Check memory before executing
const memoryUsage = this.getMemoryUsage();
if (memoryUsage > this.maxMemory * 0.8) {
console.warn('High memory usage detected, clearing caches');
await this.clearCaches();
}
// Track query memory footprint
const startMemory = this.getMemoryUsage();
const result = await this.connection.execute(sql, params);
const memoryDelta = this.getMemoryUsage() - startMemory;
this.queries.set(sql, {
memoryUsage: memoryDelta,
rowCount: result.length,
timestamp: Date.now()
});
return result;
}
getMemoryUsage() {
if (performance.memory) {
return performance.memory.usedJSHeapSize;
}
// Fallback estimation
return this.estimateMemory();
}
estimateMemory() {
let totalSize = 0;
for (const [query, stats] of this.queries.entries()) {
totalSize += stats.memoryUsage || 0;
}
return totalSize;
}
async clearCaches() {
// Clear query results cache
this.queries.clear();
// Drop temporary tables
await this.connection.execute(`
SELECT table_name
FROM information_schema.tables
WHERE table_name LIKE 'temp_%'
`).then(tables => {
return Promise.all(
tables.map(t =>
this.connection.execute(`DROP TABLE IF EXISTS ${t.table_name}`)
)
);
});
// Force garbage collection if available
if (global.gc) {
global.gc();
}
}
}Optimize Data Types
javascript
// ❌ BAD: Wasteful data types
CREATE TABLE inefficient (
id BIGINT, -- Use INTEGER if values fit
name VARCHAR(1000000), -- Excessive length
amount DOUBLE PRECISION -- Use DECIMAL for money
);
// ✅ GOOD: Appropriate data types
CREATE TABLE efficient (
id INTEGER, -- 4 bytes vs 8 bytes
name VARCHAR(255), -- Reasonable length
amount DECIMAL(10,2), -- Exact decimal for currency
active BOOLEAN, -- 1 byte vs VARCHAR
created_date DATE -- 4 bytes vs TIMESTAMP (8 bytes)
);
// Use appropriate numeric types
const optimizeNumericColumns = async () => {
// Analyze column ranges
const analysis = await connection.execute(`
SELECT
MIN(value) as min_val,
MAX(value) as max_val,
COUNT(DISTINCT value) as distinct_count
FROM large_table
`);
const { min_val, max_val, distinct_count } = analysis[0];
// Choose optimal type
let dataType;
if (min_val >= -128 && max_val <= 127) {
dataType = 'TINYINT';
} else if (min_val >= -32768 && max_val <= 32767) {
dataType = 'SMALLINT';
} else if (min_val >= -2147483648 && max_val <= 2147483647) {
dataType = 'INTEGER';
} else {
dataType = 'BIGINT';
}
// Recreate with optimal type
await connection.execute(`
CREATE TABLE optimized AS
SELECT * FROM large_table
`);
};Query Optimization
Use Indexes Strategically
javascript
// Create indexes for frequently queried columns
async function createOptimalIndexes() {
// Analyze query patterns
const queryLog = await connection.execute(`
SELECT
column_name,
COUNT(*) as query_count
FROM query_log
WHERE operation = 'WHERE'
GROUP BY column_name
ORDER BY query_count DESC
LIMIT 10
`);
// Create indexes for high-frequency columns
for (const { column_name, query_count } of queryLog) {
if (query_count > 100) {
await connection.execute(`
CREATE INDEX IF NOT EXISTS idx_${column_name}
ON main_table(${column_name})
`);
}
}
// Composite indexes for common combinations
await connection.execute(`
CREATE INDEX idx_composite ON orders(customer_id, order_date)
`);
}
// Monitor index usage
const indexStats = await connection.execute(`
SELECT
index_name,
table_name,
index_scans,
index_size
FROM duckdb_indexes()
ORDER BY index_scans DESC
`);Optimize JOIN Operations
javascript
// ❌ BAD: Cartesian product
SELECT * FROM large_table1, large_table2;
// ❌ BAD: Function in JOIN condition
SELECT * FROM t1
JOIN t2 ON UPPER(t1.name) = UPPER(t2.name);
// ✅ GOOD: Indexed JOIN with simple condition
SELECT * FROM t1
JOIN t2 ON t1.id = t2.foreign_id
WHERE t1.active = true;
// Optimize JOIN order (smaller tables first)
const optimizedJoin = `
WITH small_filtered AS (
SELECT * FROM small_table WHERE condition
)
SELECT * FROM small_filtered
JOIN large_table ON small_filtered.id = large_table.foreign_id
`;
// Use EXISTS instead of IN for subqueries
// ❌ Slower
SELECT * FROM orders
WHERE customer_id IN (
SELECT id FROM customers WHERE country = 'USA'
);
// ✅ Faster
SELECT * FROM orders o
WHERE EXISTS (
SELECT 1 FROM customers c
WHERE c.id = o.customer_id AND c.country = 'USA'
);Batch Operations
javascript
// ❌ BAD: Individual inserts
for (const row of data) {
await connection.execute(
'INSERT INTO table VALUES (?, ?, ?)',
[row.a, row.b, row.c]
);
}
// ✅ GOOD: Batch insert
const batchInsert = async (data, batchSize = 1000) => {
for (let i = 0; i < data.length; i += batchSize) {
const batch = data.slice(i, i + batchSize);
const values = batch.map(() => '(?, ?, ?)').join(', ');
const params = batch.flatMap(row => [row.a, row.b, row.c]);
await connection.execute(
`INSERT INTO table VALUES ${values}`,
params
);
}
};
// ✅ BETTER: Use COPY for large datasets
const bulkLoad = async (csvData) => {
await connection.registerFileText('data.csv', csvData);
await connection.execute(`
COPY table FROM 'data.csv' (FORMAT CSV, HEADER TRUE)
`);
};Web Worker Optimization
Offload Heavy Queries
javascript
// worker-pool.js
class WorkerPool {
constructor(workerScript, poolSize = 4) {
this.workers = [];
this.queue = [];
this.poolSize = poolSize;
for (let i = 0; i < poolSize; i++) {
const worker = new Worker(workerScript);
worker.busy = false;
worker.id = i;
this.workers.push(worker);
}
}
async execute(sql, params) {
return new Promise((resolve, reject) => {
const task = { sql, params, resolve, reject };
this.queue.push(task);
this.processQueue();
});
}
processQueue() {
if (this.queue.length === 0) return;
const worker = this.workers.find(w => !w.busy);
if (!worker) return;
const task = this.queue.shift();
worker.busy = true;
const handleMessage = (e) => {
if (e.data.error) {
task.reject(new Error(e.data.error));
} else {
task.resolve(e.data.result);
}
worker.busy = false;
worker.removeEventListener('message', handleMessage);
this.processQueue();
};
worker.addEventListener('message', handleMessage);
worker.postMessage({ sql: task.sql, params: task.params });
}
terminate() {
this.workers.forEach(w => w.terminate());
}
}
// Usage
const pool = new WorkerPool('duckdb-worker.js', navigator.hardwareConcurrency || 4);
// Execute queries in parallel
const results = await Promise.all([
pool.execute('SELECT COUNT(*) FROM table1'),
pool.execute('SELECT SUM(amount) FROM table2'),
pool.execute('SELECT AVG(value) FROM table3')
]);Caching Strategy
Multi-Level Cache
javascript
class MultiLevelCache {
constructor() {
// L1: Memory cache (fastest, smallest)
this.memoryCache = new Map();
this.memoryCacheSize = 50 * 1024 * 1024; // 50MB
// L2: IndexedDB cache (slower, larger)
this.initIndexedDB();
// L3: Query result materialization
this.materializedViews = new Map();
}
async get(key) {
// Check L1 (memory)
if (this.memoryCache.has(key)) {
this.updateLRU(key);
return this.memoryCache.get(key);
}
// Check L2 (IndexedDB)
const fromDB = await this.getFromIndexedDB(key);
if (fromDB) {
// Promote to L1
this.setMemoryCache(key, fromDB);
return fromDB;
}
// Check L3 (materialized view)
if (this.materializedViews.has(key)) {
const viewName = this.materializedViews.get(key);
const result = await connection.execute(`SELECT * FROM ${viewName}`);
this.setMemoryCache(key, result);
return result;
}
return null;
}
async set(key, value, options = {}) {
const size = this.estimateSize(value);
if (size < this.memoryCacheSize * 0.1) {
// Small result - keep in memory
this.setMemoryCache(key, value);
} else if (size < 10 * 1024 * 1024) {
// Medium result - store in IndexedDB
await this.setIndexedDB(key, value);
} else {
// Large result - create materialized view
await this.createMaterializedView(key, value);
}
}
async createMaterializedView(key, data) {
const viewName = `cache_${key.replace(/[^a-z0-9]/gi, '_')}`;
// Create table from data
const columns = Object.keys(data[0]);
const columnDefs = columns.map(col => `${col} VARCHAR`).join(', ');
await connection.execute(`
CREATE TABLE ${viewName} (${columnDefs})
`);
// Insert data
const values = data.map(row =>
`(${columns.map(col => `'${row[col]}'`).join(', ')})`
).join(', ');
await connection.execute(`
INSERT INTO ${viewName} VALUES ${values}
`);
this.materializedViews.set(key, viewName);
}
}Lazy Loading and Virtualization
Virtual Scrolling for Large Datasets
javascript
// React component with virtual scrolling
import { FixedSizeList } from 'react-window';
function VirtualTable({ query }) {
const [totalRows, setTotalRows] = useState(0);
const [cache, setCache] = useState(new Map());
const rowHeight = 35;
const pageSize = 100;
useEffect(() => {
// Get total count
connection.execute(`SELECT COUNT(*) as count FROM (${query})`).then(result => {
setTotalRows(result[0].count);
});
}, [query]);
const loadPage = async (startIndex) => {
const pageIndex = Math.floor(startIndex / pageSize);
if (!cache.has(pageIndex)) {
const offset = pageIndex * pageSize;
const result = await connection.execute(`
${query}
LIMIT ${pageSize}
OFFSET ${offset}
`);
cache.set(pageIndex, result);
setCache(new Map(cache));
}
return cache.get(pageIndex);
};
const Row = ({ index, style }) => {
const [data, setData] = useState(null);
useEffect(() => {
loadPage(index).then(pageData => {
const rowIndex = index % pageSize;
setData(pageData[rowIndex]);
});
}, [index]);
if (!data) {
return <div style={style}>Loading...</div>;
}
return (
<div style={style}>
{Object.values(data).join(' | ')}
</div>
);
};
return (
<FixedSizeList
height={600}
itemCount={totalRows}
itemSize={rowHeight}
width="100%"
>
{Row}
</FixedSizeList>
);
}Query Result Streaming
javascript
// Stream large query results
class QueryStreamer {
constructor(connection, chunkSize = 1000) {
this.connection = connection;
this.chunkSize = chunkSize;
}
async *stream(query) {
// Get total count
const countResult = await this.connection.execute(`
SELECT COUNT(*) as total FROM (${query})
`);
const total = countResult[0].total;
// Stream in chunks
for (let offset = 0; offset < total; offset += this.chunkSize) {
const chunk = await this.connection.execute(`
${query}
LIMIT ${this.chunkSize}
OFFSET ${offset}
`);
yield {
data: chunk,
progress: Math.min(100, (offset + chunk.length) / total * 100),
isLast: offset + chunk.length >= total
};
// Allow browser to breathe
await new Promise(resolve => setTimeout(resolve, 0));
}
}
async process(query, processor) {
let processedCount = 0;
for await (const { data, progress, isLast } of this.stream(query)) {
await processor(data);
processedCount += data.length;
console.log(`Processed ${processedCount} rows (${progress.toFixed(1)}%)`);
if (isLast) {
console.log('Processing complete');
}
}
return processedCount;
}
}
// Usage
const streamer = new QueryStreamer(connection);
await streamer.process(
'SELECT * FROM large_table',
async (chunk) => {
// Process chunk
for (const row of chunk) {
await processRow(row);
}
}
);Monitoring and Profiling
Performance Monitoring Dashboard
javascript
class PerformanceMonitor {
constructor() {
this.metrics = {
queries: [],
memory: [],
timing: []
};
this.startMonitoring();
}
startMonitoring() {
// Monitor memory every second
setInterval(() => {
if (performance.memory) {
this.metrics.memory.push({
timestamp: Date.now(),
used: performance.memory.usedJSHeapSize,
total: performance.memory.totalJSHeapSize,
limit: performance.memory.jsHeapSizeLimit
});
// Keep only last 60 seconds
const cutoff = Date.now() - 60000;
this.metrics.memory = this.metrics.memory.filter(
m => m.timestamp > cutoff
);
}
}, 1000);
}
async measureQuery(sql, params) {
const startTime = performance.now();
const startMemory = performance.memory?.usedJSHeapSize || 0;
try {
const result = await connection.execute(sql, params);
const endTime = performance.now();
const endMemory = performance.memory?.usedJSHeapSize || 0;
const metric = {
sql: sql.substring(0, 100),
duration: endTime - startTime,
memoryDelta: endMemory - startMemory,
rowCount: result.length,
timestamp: Date.now()
};
this.metrics.queries.push(metric);
// Alert on slow queries
if (metric.duration > 1000) {
console.warn('Slow query detected:', metric);
}
return result;
} catch (error) {
console.error('Query failed:', error);
throw error;
}
}
getReport() {
const avgQueryTime = this.metrics.queries.reduce(
(sum, q) => sum + q.duration, 0
) / this.metrics.queries.length;
const slowQueries = this.metrics.queries
.filter(q => q.duration > avgQueryTime * 2)
.sort((a, b) => b.duration - a.duration)
.slice(0, 10);
const currentMemory = this.metrics.memory[this.metrics.memory.length - 1];
return {
summary: {
totalQueries: this.metrics.queries.length,
avgQueryTime: avgQueryTime.toFixed(2),
memoryUsage: currentMemory ?
(currentMemory.used / 1024 / 1024).toFixed(2) + ' MB' : 'N/A'
},
slowQueries,
memoryTrend: this.metrics.memory.slice(-10)
};
}
}Best Practices
- Monitor memory constantly - WASM has strict memory limits
- Use Web Workers for heavy computations
- Stream large results instead of loading all at once
- Cache aggressively but manage cache size
- Choose appropriate data types to minimize memory usage
- Create indexes for frequently queried columns
- Batch operations to reduce overhead
- Profile queries to identify bottlenecks
- Use virtual scrolling for large result sets
- Clean up resources when no longer needed
WASM-Specific Optimizations
- Minimize data transfers between WASM and JavaScript
- Use typed arrays for numeric data
- Avoid string operations in SQL when possible
- Prefer numeric comparisons over string comparisons
- Keep result sets small with LIMIT and pagination
- Use appropriate precision for floating-point numbers
- Leverage browser caching for static data
- Implement progressive loading for better UX
- Consider using SharedArrayBuffer when available
- Test on target devices to ensure performance
Next Steps
- WASM Considerations - Detailed WASM limitations
- Caching Guide - Advanced caching strategies
- Error Handling - Handle performance issues