- Accurate memory calculation using ggml quantization formulas - Support for f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1 quantizations - Asymmetric context support (separate K/V cache quantization) - Full attention interval support - Parallel sequences multiplier - Bilingual interface (Russian/English) - Retro-style design with tooltips Signed-off-by: Arseniy Romenskiy <romenskiy@altlinux.org> - Co-authored-by: Qwen3.5-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled <qwen@example.com>
217 lines
5.7 KiB
JavaScript
217 lines
5.7 KiB
JavaScript
/**
|
||
* LLM Context Memory Calculator - Core Calculation Logic
|
||
* Pure functions with no DOM dependencies
|
||
*/
|
||
|
||
const QUANT_SIZES = {
|
||
f32: 4.0,
|
||
f16: 2.0,
|
||
bf16: 2.0,
|
||
q8_0: 34/32, // 1.0625
|
||
q4_0: 18/32, // 0.5625
|
||
q4_1: 20/32, // 0.625
|
||
iq4_nl: 18/32, // 0.5625
|
||
q5_0: 22/32, // 0.6875
|
||
q5_1: 24/32 // 0.75
|
||
};
|
||
|
||
/**
|
||
* Normalize quantization type (handle 'K' alias for 'KV')
|
||
* @param {string} type - Quantization type
|
||
* @returns {string} Normalized type
|
||
*/
|
||
function normalizeQuantType(type) {
|
||
if (type === 'K') return 'KV';
|
||
return type;
|
||
}
|
||
|
||
/**
|
||
* Get quantization size in bytes per tensor element
|
||
* @param {string} type - Quantization type
|
||
* @returns {number} Size in bytes
|
||
*/
|
||
function getQuantizationSize(type) {
|
||
const normalizedType = normalizeQuantType(type);
|
||
return QUANT_SIZES[normalizedType] || QUANT_SIZES.f32;
|
||
}
|
||
|
||
/**
|
||
* Format bytes to human-readable string
|
||
* @param {number} bytes - Number of bytes
|
||
* @returns {string} Formatted string (e.g., "1.23 MB")
|
||
*/
|
||
function formatBytes(bytes) {
|
||
if (bytes === 0) return '0 B';
|
||
|
||
const k = 1024;
|
||
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
|
||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||
|
||
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
||
}
|
||
|
||
/**
|
||
* Calculate memory breakdown
|
||
* @param {Object} params - Calculation parameters
|
||
* @returns {Object} Memory breakdown object
|
||
*/
|
||
function calculateMemory(params) {
|
||
const {
|
||
contextLength,
|
||
kType,
|
||
vType,
|
||
kvHeads,
|
||
headSize,
|
||
numLayers,
|
||
modelSizeGB,
|
||
parallel,
|
||
fullAttentionInterval
|
||
} = params;
|
||
|
||
// Calculate effective layers (account for full attention interval)
|
||
const effectiveLayers = fullAttentionInterval
|
||
? Math.ceil(numLayers / fullAttentionInterval)
|
||
: numLayers;
|
||
|
||
// Determine parallel multiplier (default 1)
|
||
const parallelMultiplier = parallel || 1;
|
||
|
||
// Get quantization sizes
|
||
const bK = getQuantizationSize(kType);
|
||
const bV = vType ? getQuantizationSize(vType) : bK;
|
||
|
||
// Memory per token for all layers
|
||
// Formula: ctx × layers × kvheads × headdim × (bK + bV)
|
||
const memoryPerToken = contextLength * effectiveLayers * kvHeads * headSize * (bK + bV);
|
||
|
||
// Total KV cache memory
|
||
const totalKVCache = memoryPerToken * parallelMultiplier;
|
||
|
||
// Total memory including model weights (if provided)
|
||
const totalMemory = modelSizeGB
|
||
? totalKVCache + (modelSizeGB * 1024 * 1024 * 1024)
|
||
: null;
|
||
|
||
// Calculate individual cache sizes
|
||
const kCacheSize = contextLength * effectiveLayers * kvHeads * headSize * bK * parallelMultiplier;
|
||
const vCacheSize = contextLength * effectiveLayers * kvHeads * headSize * bV * parallelMultiplier;
|
||
|
||
return {
|
||
kCache: {
|
||
size: kCacheSize,
|
||
formatted: formatBytes(kCacheSize)
|
||
},
|
||
vCache: {
|
||
size: vCacheSize,
|
||
formatted: formatBytes(vCacheSize)
|
||
},
|
||
totalKVCache: {
|
||
size: totalKVCache,
|
||
formatted: formatBytes(totalKVCache)
|
||
},
|
||
totalMemory: totalMemory ? {
|
||
size: totalMemory,
|
||
formatted: formatBytes(totalMemory)
|
||
} : null,
|
||
effectiveLayers,
|
||
parallelMultiplier
|
||
};
|
||
}
|
||
|
||
/**
|
||
* Validate input parameters
|
||
* @param {Object} params - Parameters to validate
|
||
* @param {string} lang - Language code ('ru' or 'en')
|
||
* @returns {Object} Validation result
|
||
*/
|
||
function validateParams(params, lang) {
|
||
lang = lang || 'ru';
|
||
const errors = [];
|
||
|
||
if (!params.contextLength || params.contextLength <= 0) {
|
||
if (lang === 'ru') {
|
||
errors.push('Длина контекста должна быть положительным числом');
|
||
} else {
|
||
errors.push('Context length must be a positive number');
|
||
}
|
||
}
|
||
|
||
if (!params.kvHeads || params.kvHeads <= 0) {
|
||
if (lang === 'ru') {
|
||
errors.push('Количество KV головок должно быть положительным числом');
|
||
} else {
|
||
errors.push('KV heads must be a positive number');
|
||
}
|
||
}
|
||
|
||
if (!params.headSize || params.headSize <= 0) {
|
||
if (lang === 'ru') {
|
||
errors.push('Размер головы должен быть положительным числом');
|
||
} else {
|
||
errors.push('Head size must be a positive number');
|
||
}
|
||
}
|
||
|
||
if (!params.numLayers || params.numLayers <= 0) {
|
||
if (lang === 'ru') {
|
||
errors.push('Количество слоев должно быть положительным числом');
|
||
} else {
|
||
errors.push('Number of layers must be a positive number');
|
||
}
|
||
}
|
||
|
||
if (params.fullAttentionInterval && params.fullAttentionInterval <= 0) {
|
||
if (lang === 'ru') {
|
||
errors.push('Интервал полного внимания должен быть положительным числом');
|
||
} else {
|
||
errors.push('Full attention interval must be a positive number');
|
||
}
|
||
}
|
||
|
||
if (params.modelSizeGB && params.modelSizeGB <= 0) {
|
||
if (lang === 'ru') {
|
||
errors.push('Размер модели должен быть положительным числом');
|
||
} else {
|
||
errors.push('Model size must be a positive number');
|
||
}
|
||
}
|
||
|
||
return {
|
||
valid: errors.length === 0,
|
||
errors
|
||
};
|
||
}
|
||
|
||
/**
|
||
* Generate example parameters
|
||
* @returns {Object} Example parameters object
|
||
*/
|
||
function getExampleParams() {
|
||
return {
|
||
contextLength: 8192,
|
||
kType: 'f16',
|
||
vType: 'f16',
|
||
kvHeads: 32,
|
||
headSize: 128,
|
||
numLayers: 32,
|
||
modelSizeGB: 7,
|
||
parallel: 1,
|
||
fullAttentionInterval: null
|
||
};
|
||
}
|
||
|
||
// Export for use in other files
|
||
if (typeof module !== 'undefined' && module.exports) {
|
||
module.exports = {
|
||
getQuantizationSize,
|
||
formatBytes,
|
||
calculateMemory,
|
||
validateParams,
|
||
getExampleParams,
|
||
QUANT_SIZES
|
||
};
|
||
}
|
||
|
||
// Global export for browser
|
||
window.calculateMemory = calculateMemory;
|
||
window.validateParams = validateParams;
|