llm_calculator/js/calculation.js
romenskiy2012 c471e1d0a9 Create LLM context memory calculator with:
- Accurate memory calculation using ggml quantization formulas
- Support for f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1 quantizations
- Asymmetric context support (separate K/V cache quantization)
- Full attention interval support
- Parallel sequences multiplier
- Bilingual interface (Russian/English)
- Retro-style design with tooltips

Signed-off-by: Arseniy Romenskiy <romenskiy@altlinux.org> - Co-authored-by: Qwen3.5-35B-A3B-Claude-4.6-Opus-Reasoning-Distilled <qwen@example.com>
2026-04-12 00:05:56 +03:00

217 lines
5.7 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* LLM Context Memory Calculator - Core Calculation Logic
* Pure functions with no DOM dependencies
*/
const QUANT_SIZES = {
f32: 4.0,
f16: 2.0,
bf16: 2.0,
q8_0: 34/32, // 1.0625
q4_0: 18/32, // 0.5625
q4_1: 20/32, // 0.625
iq4_nl: 18/32, // 0.5625
q5_0: 22/32, // 0.6875
q5_1: 24/32 // 0.75
};
/**
* Normalize quantization type (handle 'K' alias for 'KV')
* @param {string} type - Quantization type
* @returns {string} Normalized type
*/
function normalizeQuantType(type) {
if (type === 'K') return 'KV';
return type;
}
/**
* Get quantization size in bytes per tensor element
* @param {string} type - Quantization type
* @returns {number} Size in bytes
*/
function getQuantizationSize(type) {
const normalizedType = normalizeQuantType(type);
return QUANT_SIZES[normalizedType] || QUANT_SIZES.f32;
}
/**
* Format bytes to human-readable string
* @param {number} bytes - Number of bytes
* @returns {string} Formatted string (e.g., "1.23 MB")
*/
function formatBytes(bytes) {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}
/**
* Calculate memory breakdown
* @param {Object} params - Calculation parameters
* @returns {Object} Memory breakdown object
*/
function calculateMemory(params) {
const {
contextLength,
kType,
vType,
kvHeads,
headSize,
numLayers,
modelSizeGB,
parallel,
fullAttentionInterval
} = params;
// Calculate effective layers (account for full attention interval)
const effectiveLayers = fullAttentionInterval
? Math.ceil(numLayers / fullAttentionInterval)
: numLayers;
// Determine parallel multiplier (default 1)
const parallelMultiplier = parallel || 1;
// Get quantization sizes
const bK = getQuantizationSize(kType);
const bV = vType ? getQuantizationSize(vType) : bK;
// Memory per token for all layers
// Formula: ctx × layers × kvheads × headdim × (bK + bV)
const memoryPerToken = contextLength * effectiveLayers * kvHeads * headSize * (bK + bV);
// Total KV cache memory
const totalKVCache = memoryPerToken * parallelMultiplier;
// Total memory including model weights (if provided)
const totalMemory = modelSizeGB
? totalKVCache + (modelSizeGB * 1024 * 1024 * 1024)
: null;
// Calculate individual cache sizes
const kCacheSize = contextLength * effectiveLayers * kvHeads * headSize * bK * parallelMultiplier;
const vCacheSize = contextLength * effectiveLayers * kvHeads * headSize * bV * parallelMultiplier;
return {
kCache: {
size: kCacheSize,
formatted: formatBytes(kCacheSize)
},
vCache: {
size: vCacheSize,
formatted: formatBytes(vCacheSize)
},
totalKVCache: {
size: totalKVCache,
formatted: formatBytes(totalKVCache)
},
totalMemory: totalMemory ? {
size: totalMemory,
formatted: formatBytes(totalMemory)
} : null,
effectiveLayers,
parallelMultiplier
};
}
/**
* Validate input parameters
* @param {Object} params - Parameters to validate
* @param {string} lang - Language code ('ru' or 'en')
* @returns {Object} Validation result
*/
function validateParams(params, lang) {
lang = lang || 'ru';
const errors = [];
if (!params.contextLength || params.contextLength <= 0) {
if (lang === 'ru') {
errors.push('Длина контекста должна быть положительным числом');
} else {
errors.push('Context length must be a positive number');
}
}
if (!params.kvHeads || params.kvHeads <= 0) {
if (lang === 'ru') {
errors.push('Количество KV головок должно быть положительным числом');
} else {
errors.push('KV heads must be a positive number');
}
}
if (!params.headSize || params.headSize <= 0) {
if (lang === 'ru') {
errors.push('Размер головы должен быть положительным числом');
} else {
errors.push('Head size must be a positive number');
}
}
if (!params.numLayers || params.numLayers <= 0) {
if (lang === 'ru') {
errors.push('Количество слоев должно быть положительным числом');
} else {
errors.push('Number of layers must be a positive number');
}
}
if (params.fullAttentionInterval && params.fullAttentionInterval <= 0) {
if (lang === 'ru') {
errors.push('Интервал полного внимания должен быть положительным числом');
} else {
errors.push('Full attention interval must be a positive number');
}
}
if (params.modelSizeGB && params.modelSizeGB <= 0) {
if (lang === 'ru') {
errors.push('Размер модели должен быть положительным числом');
} else {
errors.push('Model size must be a positive number');
}
}
return {
valid: errors.length === 0,
errors
};
}
/**
* Generate example parameters
* @returns {Object} Example parameters object
*/
function getExampleParams() {
return {
contextLength: 8192,
kType: 'f16',
vType: 'f16',
kvHeads: 32,
headSize: 128,
numLayers: 32,
modelSizeGB: 7,
parallel: 1,
fullAttentionInterval: null
};
}
// Export for use in other files
if (typeof module !== 'undefined' && module.exports) {
module.exports = {
getQuantizationSize,
formatBytes,
calculateMemory,
validateParams,
getExampleParams,
QUANT_SIZES
};
}
// Global export for browser
window.calculateMemory = calculateMemory;
window.validateParams = validateParams;