How it works
Chunk-and-merge strategy modeling large-data sorting workflows beyond RAM.
Implementation
function externalMergeSort(arr, stats) { const n = arr.length; const chunkSize = Math.max(8, Math.floor(Math.sqrt(n || 1))); const chunks = []; for (let i = 0; i < n; i += chunkSize) { const chunk = arr.slice(i, Math.min(i + chunkSize, n)); for (let p = 1; p < chunk.length; p++) { const key = chunk[p]; let j = p - 1; while (j >= 0) { if (chunk[j] <= key) break; chunk[j + 1] = chunk[j]; j--; } chunk[j + 1] = key; } chunks.push(chunk); } const ptr = new Array(chunks.length).fill(0); let out = 0; while (out < n) { let minChunk = -1; let minVal = 0; for (let c = 0; c < chunks.length; c++) { if (ptr[c] >= chunks[c].length) continue; const val = chunks[c][ptr[c]]; if (minChunk === -1) { minChunk = c; minVal = val; } else { if (val < minVal) { minChunk = c; minVal = val; } } } arr[out] = minVal; ptr[minChunk]++; write(out, arr[out]); out++; } }
def sort(arr, n, stats): chunkSize = max(8, int(int((n or 1) ** 0.5))) chunks = [] i = 0 while (i < n): chunk = arr[i:min((i + chunkSize), n)] for p in range(1, len(chunk)): key = chunk[p] j = (p - 1) while (j >= 0): if (chunk[j] <= key): break chunk[(j + 1)] = chunk[j] j -= 1 chunk[(j + 1)] = key chunks.append(chunk) i += chunkSize ptr = [0] * len(chunks) out = 0 while (out < n): minChunk = -1 minVal = 0 for c in range(len(chunks)): if (ptr[c] >= len(chunks[c])): continue val = chunks[c][ptr[c]] if (minChunk == -1): minChunk = c minVal = val else: if (val < minVal): minChunk = c minVal = val arr[out] = minVal ptr[minChunk] += 1 out += 1
#include <vector> #include <algorithm> #include <cstring> #include <cmath> void sort(std::vector<int>& arr, int n, int& comparisons, int& swaps) { int chunkSize = ((8) > ((int)std::floor((int)std::sqrt((n || 1)))) ? (8) : ((int)std::floor((int)std::sqrt((n || 1))))); int chunks = {}; for(int i=0; (i < n); i += chunkSize) { int _chunk_len = (((i + chunkSize)) < (n) ? ((i + chunkSize)) : (n)) - (i); std::vector<int> chunk(_chunk_len); for(int _ci=0;_ci<_chunk_len;_ci++) chunk[_ci]=arr[(i)+_ci]; for(int p=1; (p < _chunk_len); p++) { int key = chunk[p]; int j = (p - 1); while((j >= 0)) { if((chunk[j] <= key)) { break; } chunk[(j + 1)] = chunk[j]; j--; } chunk[(j + 1)] = key; } chunks[chunks_len++] = chunk; } std::vector<int> ptr(n); std::memset(ptr, 0, sizeof(ptr)); int out = 0; while((out < n)) { int minChunk = -1; int minVal = 0; for(int c=0; (c < n); c++) { if((ptr[c] >= n)) { continue; } int val = chunks[c][ptr[c]]; if((minChunk == -1)) { minChunk = c; minVal = val; } else { if((val < minVal)) { minChunk = c; minVal = val; } } } arr[out] = minVal; ptr[minChunk]++; out++; } }
public void Sort(int[] arr, int n, dynamic stats) { int chunkSize = ((8) > ((int)Math.Floor((int)Math.Sqrt((n || 1)))) ? (8) : ((int)Math.Floor((int)Math.Sqrt((n || 1))))); int chunks = {}; for(int i=0; (i < n); i += chunkSize) { int _chunk_len = (((i + chunkSize)) < (n) ? ((i + chunkSize)) : (n)) - (i); int[] chunk = new int[_chunk_len]; for(int _ci=0;_ci<_chunk_len;_ci++) chunk[_ci]=arr[(i)+_ci]; for(int p=1; (p < _chunk_len); p++) { int key = chunk[p]; int j = (p - 1); while((j >= 0)) { if((chunk[j] <= key)) { break; } chunk[(j + 1)] = chunk[j]; j--; } chunk[(j + 1)] = key; } chunks[chunks_len++] = chunk; } int[] ptr = new int[n]; Array.Clear(ptr, 0, sizeof(ptr)); int out = 0; while((out < n)) { int minChunk = -1; int minVal = 0; for(int c=0; (c < n); c++) { if((ptr[c] >= n)) { continue; } int val = chunks[c][ptr[c]]; if((minChunk == -1)) { minChunk = c; minVal = val; } else { if((val < minVal)) { minChunk = c; minVal = val; } } } arr[out] = minVal; ptr[minChunk]++; out++; } }
#include <stdio.h> #include <string.h> #include <math.h> void sort(int arr[], int n, int* comparisons, int* swaps) { int chunkSize = ((8) > ((int)floor((int)sqrt((n || 1)))) ? (8) : ((int)floor((int)sqrt((n || 1))))); int chunks = {}; for(int i=0; (i < n); i += chunkSize) { int _chunk_len = (((i + chunkSize)) < (n) ? ((i + chunkSize)) : (n)) - (i); int chunk[_chunk_len]; for(int _ci=0;_ci<_chunk_len;_ci++) chunk[_ci]=arr[(i)+_ci]; for(int p=1; (p < _chunk_len); p++) { int key = chunk[p]; int j = (p - 1); while((j >= 0)) { if((chunk[j] <= key)) { break; } chunk[(j + 1)] = chunk[j]; j--; } chunk[(j + 1)] = key; } chunks[chunks_len++] = chunk; } int ptr[n]; memset(ptr, 0, sizeof(ptr)); int out = 0; while((out < n)) { int minChunk = -1; int minVal = 0; for(int c=0; (c < n); c++) { if((ptr[c] >= n)) { continue; } int val = chunks[c][ptr[c]]; if((minChunk == -1)) { minChunk = c; minVal = val; } else { if((val < minVal)) { minChunk = c; minVal = val; } } } arr[out] = minVal; ptr[minChunk]++; out++; } }