Data Import/Export
Complete guide for importing and exporting data with DuckDB WASM Adapter.
Overview
DuckDB WASM Adapter supports importing and exporting data in various formats including CSV, JSON, and Parquet. All operations happen entirely in the browser, making it perfect for data processing without server dependencies.
Supported Formats
- CSV - Comma-separated values
- JSON - JavaScript Object Notation
- Parquet - Columnar storage format
- Arrow - Apache Arrow format
CSV Import
Basic CSV Import
javascript
// React
import { useImportCSV } from '@northprint/duckdb-wasm-adapter-react';
function CSVImporter() {
const { importCSV, loading, error } = useImportCSV({
onSuccess: (result) => {
console.log('Import successful:', result);
},
onError: (error) => {
console.error('Import failed:', error);
}
});
const handleFileUpload = async (event) => {
const file = event.target.files[0];
if (!file) return;
await importCSV(file, 'users', {
header: true, // First row contains headers
delimiter: ',', // Column delimiter
nullString: 'NULL' // String representing NULL values
});
};
return (
<div>
<input
type="file"
accept=".csv"
onChange={handleFileUpload}
disabled={loading}
/>
{loading && <div>Importing...</div>}
{error && <div>Error: {error.message}</div>}
</div>
);
}Advanced CSV Options
javascript
const options = {
// Column detection
header: true, // First row contains headers
columns: ['id', 'name', 'email'], // Explicit column names
// Parsing options
delimiter: ',', // Column delimiter
quote: '"', // Quote character
escape: '"', // Escape character
nullString: 'NULL', // String representing NULL
dateFormat: '%Y-%m-%d', // Date format
timestampFormat: '%Y-%m-%d %H:%M:%S',
// Type inference
autoDetect: true, // Auto-detect column types
sampleSize: 100, // Rows to sample for type detection
// Performance
parallel: true, // Use parallel parsing
compression: 'auto' // Auto-detect compression (gzip, etc.)
};
await importCSV(file, 'table_name', options);CSV Import from URL
javascript
async function importCSVFromURL(url, tableName) {
try {
const response = await fetch(url);
const blob = await response.blob();
const file = new File([blob], 'data.csv', { type: 'text/csv' });
await importCSV(file, tableName, {
header: true,
delimiter: ','
});
console.log('CSV imported from URL successfully');
} catch (error) {
console.error('Failed to import CSV from URL:', error);
}
}
// Usage
await importCSVFromURL(
'https://example.com/data.csv',
'remote_data'
);JSON Import
Basic JSON Import
javascript
// React
import { useImportJSON } from '@northprint/duckdb-wasm-adapter-react';
function JSONImporter() {
const { importJSON, loading, error } = useImportJSON();
const handleImport = async () => {
const data = [
{ id: 1, name: 'Alice', email: 'alice@example.com' },
{ id: 2, name: 'Bob', email: 'bob@example.com' },
{ id: 3, name: 'Charlie', email: 'charlie@example.com' }
];
await importJSON(data, 'users');
};
return (
<button onClick={handleImport} disabled={loading}>
Import JSON Data
</button>
);
}JSON File Import
javascript
async function importJSONFile(file, tableName) {
const text = await file.text();
const data = JSON.parse(text);
// Handle different JSON structures
if (Array.isArray(data)) {
// Array of objects
await importJSON(data, tableName);
} else if (data.records) {
// Nested structure
await importJSON(data.records, tableName);
} else {
// Single object - wrap in array
await importJSON([data], tableName);
}
}Streaming JSON Import
javascript
async function importLargeJSON(file, tableName, batchSize = 1000) {
const text = await file.text();
const data = JSON.parse(text);
// Import in batches for large datasets
for (let i = 0; i < data.length; i += batchSize) {
const batch = data.slice(i, i + batchSize);
if (i === 0) {
// Create table with first batch
await importJSON(batch, tableName);
} else {
// Append subsequent batches
await appendJSON(batch, tableName);
}
// Update progress
const progress = Math.min(100, (i / data.length) * 100);
console.log(`Import progress: ${progress.toFixed(1)}%`);
}
}Parquet Import
Basic Parquet Import
javascript
// React
import { useImportParquet } from '@northprint/duckdb-wasm-adapter-react';
function ParquetImporter() {
const { importParquet, loading, error } = useImportParquet();
const handleFileUpload = async (event) => {
const file = event.target.files[0];
if (!file) return;
await importParquet(file, 'analytics_data');
};
return (
<input
type="file"
accept=".parquet"
onChange={handleFileUpload}
disabled={loading}
/>
);
}Parquet from URL
javascript
async function importParquetFromURL(url, tableName) {
// DuckDB can read Parquet files directly from URLs
await connection.execute(`
CREATE TABLE ${tableName} AS
SELECT * FROM read_parquet('${url}')
`);
}
// Import from S3
await connection.execute(`
CREATE TABLE sales_data AS
SELECT * FROM read_parquet('s3://bucket/path/to/file.parquet')
`);CSV Export
Basic CSV Export
javascript
// React
import { useExportCSV } from '@northprint/duckdb-wasm-adapter-react';
function CSVExporter() {
const { exportCSV } = useExportCSV();
const handleExport = async () => {
const csv = await exportCSV('SELECT * FROM users ORDER BY name');
// Download the CSV file
const blob = new Blob([csv], { type: 'text/csv' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'users.csv';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
};
return (
<button onClick={handleExport}>
Export to CSV
</button>
);
}Advanced CSV Export Options
javascript
const csv = await exportCSV(
'SELECT * FROM users',
{
header: true, // Include column headers
delimiter: ',', // Column delimiter
quote: '"', // Quote character
escape: '"', // Escape character
nullString: '', // How to represent NULL
dateFormat: '%Y-%m-%d', // Date formatting
bom: true // Include BOM for Excel compatibility
}
);JSON Export
Basic JSON Export
javascript
// React
import { useExportJSON } from '@northprint/duckdb-wasm-adapter-react';
function JSONExporter() {
const { exportJSON } = useExportJSON();
const handleExport = async () => {
const data = await exportJSON('SELECT * FROM users');
// Download as JSON file
const json = JSON.stringify(data, null, 2);
const blob = new Blob([json], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'users.json';
a.click();
URL.revokeObjectURL(url);
};
return (
<button onClick={handleExport}>
Export to JSON
</button>
);
}Formatted JSON Export
javascript
async function exportFormattedJSON(query) {
const data = await exportJSON(query);
// Custom formatting
const formatted = {
metadata: {
exportDate: new Date().toISOString(),
recordCount: data.length,
query: query
},
records: data
};
return JSON.stringify(formatted, null, 2);
}Parquet Export
Basic Parquet Export
javascript
async function exportToParquet(query, filename) {
// Export query results to Parquet
await connection.execute(`
COPY (${query})
TO '${filename}.parquet'
(FORMAT PARQUET, COMPRESSION 'SNAPPY')
`);
// Get the file for download
const result = await connection.execute(
`SELECT * FROM read_blob('${filename}.parquet')`
);
return result;
}Bulk Import Operations
Import Multiple Files
javascript
function MultiFileImporter() {
const [files, setFiles] = useState([]);
const [importing, setImporting] = useState(false);
const [progress, setProgress] = useState(0);
const handleFilesSelect = (event) => {
setFiles(Array.from(event.target.files));
};
const importAllFiles = async () => {
setImporting(true);
for (let i = 0; i < files.length; i++) {
const file = files[i];
const tableName = file.name.replace(/\.[^/.]+$/, ''); // Remove extension
try {
if (file.name.endsWith('.csv')) {
await importCSV(file, tableName, { header: true });
} else if (file.name.endsWith('.json')) {
const text = await file.text();
const data = JSON.parse(text);
await importJSON(data, tableName);
} else if (file.name.endsWith('.parquet')) {
await importParquet(file, tableName);
}
setProgress(((i + 1) / files.length) * 100);
} catch (error) {
console.error(`Failed to import ${file.name}:`, error);
}
}
setImporting(false);
setProgress(0);
};
return (
<div>
<input
type="file"
multiple
accept=".csv,.json,.parquet"
onChange={handleFilesSelect}
/>
{files.length > 0 && (
<div>
<p>Selected {files.length} files</p>
<button onClick={importAllFiles} disabled={importing}>
Import All Files
</button>
{importing && (
<div className="progress-bar">
<div
className="progress-fill"
style={{ width: `${progress}%` }}
/>
<span>{progress.toFixed(0)}%</span>
</div>
)}
</div>
)}
</div>
);
}Data Transformation
Transform During Import
javascript
async function importWithTransformation(file, tableName) {
// First, import to temporary table
const tempTable = `temp_${Date.now()}`;
await importCSV(file, tempTable, { header: true });
// Transform and create final table
await connection.execute(`
CREATE TABLE ${tableName} AS
SELECT
id,
UPPER(name) as name,
LOWER(email) as email,
CAST(salary AS DECIMAL(10,2)) as salary,
strptime(hire_date, '%m/%d/%Y') as hire_date
FROM ${tempTable}
WHERE salary > 0
`);
// Clean up temp table
await connection.execute(`DROP TABLE ${tempTable}`);
}Data Validation
javascript
async function importWithValidation(data, tableName) {
// Validate data before import
const errors = [];
const validRecords = [];
data.forEach((record, index) => {
const recordErrors = [];
// Email validation
if (!record.email || !record.email.match(/^[^\s@]+@[^\s@]+\.[^\s@]+$/)) {
recordErrors.push('Invalid email');
}
// Required fields
if (!record.name) {
recordErrors.push('Name is required');
}
// Numeric validation
if (record.salary && isNaN(record.salary)) {
recordErrors.push('Salary must be numeric');
}
if (recordErrors.length > 0) {
errors.push({ row: index + 1, errors: recordErrors });
} else {
validRecords.push(record);
}
});
if (errors.length > 0) {
console.warn('Validation errors:', errors);
// Optionally import only valid records
if (validRecords.length > 0) {
await importJSON(validRecords, tableName);
console.log(`Imported ${validRecords.length} valid records`);
}
} else {
await importJSON(data, tableName);
}
return { validRecords: validRecords.length, errors };
}Performance Optimization
Batch Import
javascript
async function optimizedImport(largeDataset, tableName) {
const batchSize = 10000;
const totalBatches = Math.ceil(largeDataset.length / batchSize);
// Create table with first batch
const firstBatch = largeDataset.slice(0, batchSize);
await importJSON(firstBatch, tableName);
// Import remaining batches
for (let i = 1; i < totalBatches; i++) {
const start = i * batchSize;
const end = Math.min(start + batchSize, largeDataset.length);
const batch = largeDataset.slice(start, end);
// Use INSERT for subsequent batches
const columns = Object.keys(batch[0]);
const placeholders = batch.map(() =>
`(${columns.map(() => '?').join(',')})`
).join(',');
const values = batch.flatMap(record =>
columns.map(col => record[col])
);
await connection.execute(
`INSERT INTO ${tableName} (${columns.join(',')}) VALUES ${placeholders}`,
values
);
console.log(`Imported batch ${i + 1} of ${totalBatches}`);
}
}Memory-Efficient Export
javascript
async function* streamExport(query, batchSize = 1000) {
let offset = 0;
let hasMore = true;
while (hasMore) {
const batchQuery = `${query} LIMIT ${batchSize} OFFSET ${offset}`;
const batch = await connection.execute(batchQuery);
if (batch.length < batchSize) {
hasMore = false;
}
yield batch;
offset += batchSize;
}
}
// Usage
async function exportLargeDataset() {
const csvLines = [];
for await (const batch of streamExport('SELECT * FROM large_table')) {
const csv = convertToCSV(batch);
csvLines.push(csv);
}
return csvLines.join('\n');
}Best Practices
- Validate data before importing to prevent errors
- Use appropriate formats - Parquet for analytics, CSV for compatibility
- Handle large files with streaming or batch processing
- Transform data during import to optimize storage
- Use compression for large exports
- Provide progress feedback for long operations
- Clean up temporary tables after transformations
Next Steps
- Query Builder - Build complex queries
- Caching - Optimize performance with caching
- Performance - Advanced optimization techniques