Data Import/Export

Complete guide for importing and exporting data with DuckDB WASM Adapter.

Overview

DuckDB WASM Adapter supports importing and exporting data in various formats including CSV, JSON, and Parquet. All operations happen entirely in the browser, making it perfect for data processing without server dependencies.

Supported Formats

CSV - Comma-separated values
JSON - JavaScript Object Notation
Parquet - Columnar storage format
Arrow - Apache Arrow format

CSV Import

Basic CSV Import

javascript

// React
import { useImportCSV } from '@northprint/duckdb-wasm-adapter-react';

function CSVImporter() {
  const { importCSV, loading, error } = useImportCSV({
    onSuccess: (result) => {
      console.log('Import successful:', result);
    },
    onError: (error) => {
      console.error('Import failed:', error);
    }
  });

  const handleFileUpload = async (event) => {
    const file = event.target.files[0];
    if (!file) return;

    await importCSV(file, 'users', {
      header: true,        // First row contains headers
      delimiter: ',',      // Column delimiter
      nullString: 'NULL'   // String representing NULL values
    });
  };

  return (
    <div>
      <input 
        type="file" 
        accept=".csv" 
        onChange={handleFileUpload}
        disabled={loading}
      />
      {loading && <div>Importing...</div>}
      {error && <div>Error: {error.message}</div>}
    </div>
  );
}

Advanced CSV Options

javascript

const options = {
  // Column detection
  header: true,              // First row contains headers
  columns: ['id', 'name', 'email'], // Explicit column names
  
  // Parsing options
  delimiter: ',',            // Column delimiter
  quote: '"',               // Quote character
  escape: '"',              // Escape character
  nullString: 'NULL',       // String representing NULL
  dateFormat: '%Y-%m-%d',   // Date format
  timestampFormat: '%Y-%m-%d %H:%M:%S',
  
  // Type inference
  autoDetect: true,         // Auto-detect column types
  sampleSize: 100,          // Rows to sample for type detection
  
  // Performance
  parallel: true,           // Use parallel parsing
  compression: 'auto'       // Auto-detect compression (gzip, etc.)
};

await importCSV(file, 'table_name', options);

CSV Import from URL

javascript

async function importCSVFromURL(url, tableName) {
  try {
    const response = await fetch(url);
    const blob = await response.blob();
    const file = new File([blob], 'data.csv', { type: 'text/csv' });
    
    await importCSV(file, tableName, {
      header: true,
      delimiter: ','
    });
    
    console.log('CSV imported from URL successfully');
  } catch (error) {
    console.error('Failed to import CSV from URL:', error);
  }
}

// Usage
await importCSVFromURL(
  'https://example.com/data.csv',
  'remote_data'
);

JSON Import

Basic JSON Import

javascript

// React
import { useImportJSON } from '@northprint/duckdb-wasm-adapter-react';

function JSONImporter() {
  const { importJSON, loading, error } = useImportJSON();

  const handleImport = async () => {
    const data = [
      { id: 1, name: 'Alice', email: 'alice@example.com' },
      { id: 2, name: 'Bob', email: 'bob@example.com' },
      { id: 3, name: 'Charlie', email: 'charlie@example.com' }
    ];

    await importJSON(data, 'users');
  };

  return (
    <button onClick={handleImport} disabled={loading}>
      Import JSON Data
    </button>
  );
}

JSON File Import

javascript

async function importJSONFile(file, tableName) {
  const text = await file.text();
  const data = JSON.parse(text);
  
  // Handle different JSON structures
  if (Array.isArray(data)) {
    // Array of objects
    await importJSON(data, tableName);
  } else if (data.records) {
    // Nested structure
    await importJSON(data.records, tableName);
  } else {
    // Single object - wrap in array
    await importJSON([data], tableName);
  }
}

Streaming JSON Import

javascript

async function importLargeJSON(file, tableName, batchSize = 1000) {
  const text = await file.text();
  const data = JSON.parse(text);
  
  // Import in batches for large datasets
  for (let i = 0; i < data.length; i += batchSize) {
    const batch = data.slice(i, i + batchSize);
    
    if (i === 0) {
      // Create table with first batch
      await importJSON(batch, tableName);
    } else {
      // Append subsequent batches
      await appendJSON(batch, tableName);
    }
    
    // Update progress
    const progress = Math.min(100, (i / data.length) * 100);
    console.log(`Import progress: ${progress.toFixed(1)}%`);
  }
}

Parquet Import

Basic Parquet Import

javascript

// React
import { useImportParquet } from '@northprint/duckdb-wasm-adapter-react';

function ParquetImporter() {
  const { importParquet, loading, error } = useImportParquet();

  const handleFileUpload = async (event) => {
    const file = event.target.files[0];
    if (!file) return;

    await importParquet(file, 'analytics_data');
  };

  return (
    <input 
      type="file" 
      accept=".parquet" 
      onChange={handleFileUpload}
      disabled={loading}
    />
  );
}

Parquet from URL

javascript

async function importParquetFromURL(url, tableName) {
  // DuckDB can read Parquet files directly from URLs
  await connection.execute(`
    CREATE TABLE ${tableName} AS 
    SELECT * FROM read_parquet('${url}')
  `);
}

// Import from S3
await connection.execute(`
  CREATE TABLE sales_data AS 
  SELECT * FROM read_parquet('s3://bucket/path/to/file.parquet')
`);

CSV Export

Basic CSV Export

javascript

// React
import { useExportCSV } from '@northprint/duckdb-wasm-adapter-react';

function CSVExporter() {
  const { exportCSV } = useExportCSV();

  const handleExport = async () => {
    const csv = await exportCSV('SELECT * FROM users ORDER BY name');
    
    // Download the CSV file
    const blob = new Blob([csv], { type: 'text/csv' });
    const url = URL.createObjectURL(blob);
    const a = document.createElement('a');
    a.href = url;
    a.download = 'users.csv';
    document.body.appendChild(a);
    a.click();
    document.body.removeChild(a);
    URL.revokeObjectURL(url);
  };

  return (
    <button onClick={handleExport}>
      Export to CSV
    </button>
  );
}

Advanced CSV Export Options

javascript

const csv = await exportCSV(
  'SELECT * FROM users',
  {
    header: true,           // Include column headers
    delimiter: ',',         // Column delimiter
    quote: '"',            // Quote character
    escape: '"',           // Escape character
    nullString: '',        // How to represent NULL
    dateFormat: '%Y-%m-%d', // Date formatting
    bom: true              // Include BOM for Excel compatibility
  }
);

JSON Export

Basic JSON Export

javascript

// React
import { useExportJSON } from '@northprint/duckdb-wasm-adapter-react';

function JSONExporter() {
  const { exportJSON } = useExportJSON();

  const handleExport = async () => {
    const data = await exportJSON('SELECT * FROM users');
    
    // Download as JSON file
    const json = JSON.stringify(data, null, 2);
    const blob = new Blob([json], { type: 'application/json' });
    const url = URL.createObjectURL(blob);
    const a = document.createElement('a');
    a.href = url;
    a.download = 'users.json';
    a.click();
    URL.revokeObjectURL(url);
  };

  return (
    <button onClick={handleExport}>
      Export to JSON
    </button>
  );
}

Formatted JSON Export

javascript

async function exportFormattedJSON(query) {
  const data = await exportJSON(query);
  
  // Custom formatting
  const formatted = {
    metadata: {
      exportDate: new Date().toISOString(),
      recordCount: data.length,
      query: query
    },
    records: data
  };
  
  return JSON.stringify(formatted, null, 2);
}

Parquet Export

Basic Parquet Export

javascript

async function exportToParquet(query, filename) {
  // Export query results to Parquet
  await connection.execute(`
    COPY (${query}) 
    TO '${filename}.parquet' 
    (FORMAT PARQUET, COMPRESSION 'SNAPPY')
  `);
  
  // Get the file for download
  const result = await connection.execute(
    `SELECT * FROM read_blob('${filename}.parquet')`
  );
  
  return result;
}

Bulk Import Operations

Import Multiple Files

javascript

function MultiFileImporter() {
  const [files, setFiles] = useState([]);
  const [importing, setImporting] = useState(false);
  const [progress, setProgress] = useState(0);

  const handleFilesSelect = (event) => {
    setFiles(Array.from(event.target.files));
  };

  const importAllFiles = async () => {
    setImporting(true);
    
    for (let i = 0; i < files.length; i++) {
      const file = files[i];
      const tableName = file.name.replace(/\.[^/.]+$/, ''); // Remove extension
      
      try {
        if (file.name.endsWith('.csv')) {
          await importCSV(file, tableName, { header: true });
        } else if (file.name.endsWith('.json')) {
          const text = await file.text();
          const data = JSON.parse(text);
          await importJSON(data, tableName);
        } else if (file.name.endsWith('.parquet')) {
          await importParquet(file, tableName);
        }
        
        setProgress(((i + 1) / files.length) * 100);
      } catch (error) {
        console.error(`Failed to import ${file.name}:`, error);
      }
    }
    
    setImporting(false);
    setProgress(0);
  };

  return (
    <div>
      <input 
        type="file" 
        multiple 
        accept=".csv,.json,.parquet"
        onChange={handleFilesSelect}
      />
      
      {files.length > 0 && (
        <div>
          <p>Selected {files.length} files</p>
          <button onClick={importAllFiles} disabled={importing}>
            Import All Files
          </button>
          
          {importing && (
            <div className="progress-bar">
              <div 
                className="progress-fill" 
                style={{ width: `${progress}%` }}
              />
              <span>{progress.toFixed(0)}%</span>
            </div>
          )}
        </div>
      )}
    </div>
  );
}

Data Transformation

Transform During Import

javascript

async function importWithTransformation(file, tableName) {
  // First, import to temporary table
  const tempTable = `temp_${Date.now()}`;
  await importCSV(file, tempTable, { header: true });
  
  // Transform and create final table
  await connection.execute(`
    CREATE TABLE ${tableName} AS
    SELECT 
      id,
      UPPER(name) as name,
      LOWER(email) as email,
      CAST(salary AS DECIMAL(10,2)) as salary,
      strptime(hire_date, '%m/%d/%Y') as hire_date
    FROM ${tempTable}
    WHERE salary > 0
  `);
  
  // Clean up temp table
  await connection.execute(`DROP TABLE ${tempTable}`);
}

Data Validation

javascript

async function importWithValidation(data, tableName) {
  // Validate data before import
  const errors = [];
  const validRecords = [];
  
  data.forEach((record, index) => {
    const recordErrors = [];
    
    // Email validation
    if (!record.email || !record.email.match(/^[^\s@]+@[^\s@]+\.[^\s@]+$/)) {
      recordErrors.push('Invalid email');
    }
    
    // Required fields
    if (!record.name) {
      recordErrors.push('Name is required');
    }
    
    // Numeric validation
    if (record.salary && isNaN(record.salary)) {
      recordErrors.push('Salary must be numeric');
    }
    
    if (recordErrors.length > 0) {
      errors.push({ row: index + 1, errors: recordErrors });
    } else {
      validRecords.push(record);
    }
  });
  
  if (errors.length > 0) {
    console.warn('Validation errors:', errors);
    
    // Optionally import only valid records
    if (validRecords.length > 0) {
      await importJSON(validRecords, tableName);
      console.log(`Imported ${validRecords.length} valid records`);
    }
  } else {
    await importJSON(data, tableName);
  }
  
  return { validRecords: validRecords.length, errors };
}

Performance Optimization

Batch Import

javascript

async function optimizedImport(largeDataset, tableName) {
  const batchSize = 10000;
  const totalBatches = Math.ceil(largeDataset.length / batchSize);
  
  // Create table with first batch
  const firstBatch = largeDataset.slice(0, batchSize);
  await importJSON(firstBatch, tableName);
  
  // Import remaining batches
  for (let i = 1; i < totalBatches; i++) {
    const start = i * batchSize;
    const end = Math.min(start + batchSize, largeDataset.length);
    const batch = largeDataset.slice(start, end);
    
    // Use INSERT for subsequent batches
    const columns = Object.keys(batch[0]);
    const placeholders = batch.map(() => 
      `(${columns.map(() => '?').join(',')})`
    ).join(',');
    
    const values = batch.flatMap(record => 
      columns.map(col => record[col])
    );
    
    await connection.execute(
      `INSERT INTO ${tableName} (${columns.join(',')}) VALUES ${placeholders}`,
      values
    );
    
    console.log(`Imported batch ${i + 1} of ${totalBatches}`);
  }
}

Memory-Efficient Export

javascript

async function* streamExport(query, batchSize = 1000) {
  let offset = 0;
  let hasMore = true;
  
  while (hasMore) {
    const batchQuery = `${query} LIMIT ${batchSize} OFFSET ${offset}`;
    const batch = await connection.execute(batchQuery);
    
    if (batch.length < batchSize) {
      hasMore = false;
    }
    
    yield batch;
    offset += batchSize;
  }
}

// Usage
async function exportLargeDataset() {
  const csvLines = [];
  
  for await (const batch of streamExport('SELECT * FROM large_table')) {
    const csv = convertToCSV(batch);
    csvLines.push(csv);
  }
  
  return csvLines.join('\n');
}

Best Practices

Validate data before importing to prevent errors
Use appropriate formats - Parquet for analytics, CSV for compatibility
Handle large files with streaming or batch processing
Transform data during import to optimize storage
Use compression for large exports
Provide progress feedback for long operations
Clean up temporary tables after transformations

Next Steps

Query Builder - Build complex queries
Caching - Optimize performance with caching
Performance - Advanced optimization techniques

Data Import/Export ​

Overview ​

Supported Formats ​

CSV Import ​

Basic CSV Import ​

Advanced CSV Options ​

CSV Import from URL ​

JSON Import ​

Basic JSON Import ​

JSON File Import ​

Streaming JSON Import ​

Parquet Import ​

Basic Parquet Import ​

Parquet from URL ​

CSV Export ​

Basic CSV Export ​

Advanced CSV Export Options ​

JSON Export ​

Basic JSON Export ​

Formatted JSON Export ​

Parquet Export ​

Basic Parquet Export ​

Bulk Import Operations ​

Import Multiple Files ​

Data Transformation ​

Transform During Import ​

Data Validation ​

Performance Optimization ​

Batch Import ​

Memory-Efficient Export ​

Best Practices ​

Next Steps ​

Data Import/Export

Overview

Supported Formats

CSV Import

Basic CSV Import

Advanced CSV Options

CSV Import from URL

JSON Import

Basic JSON Import

JSON File Import

Streaming JSON Import

Parquet Import

Basic Parquet Import

Parquet from URL

CSV Export

Basic CSV Export

Advanced CSV Export Options

JSON Export

Basic JSON Export

Formatted JSON Export

Parquet Export

Basic Parquet Export

Bulk Import Operations

Import Multiple Files

Data Transformation

Transform During Import

Data Validation

Performance Optimization

Batch Import

Memory-Efficient Export

Best Practices

Next Steps