/** * LLM Context Memory Calculator - Core Calculation Logic * Pure functions with no DOM dependencies */ const QUANT_SIZES = { f32: 4.0, f16: 2.0, bf16: 2.0, q8_0: 34/32, // 1.0625 q4_0: 18/32, // 0.5625 q4_1: 20/32, // 0.625 iq4_nl: 18/32, // 0.5625 q5_0: 22/32, // 0.6875 q5_1: 24/32 // 0.75 }; /** * Normalize quantization type (handle 'K' alias for 'KV') * @param {string} type - Quantization type * @returns {string} Normalized type */ function normalizeQuantType(type) { if (type === 'K') return 'KV'; return type; } /** * Get quantization size in bytes per tensor element * @param {string} type - Quantization type * @returns {number} Size in bytes */ function getQuantizationSize(type) { const normalizedType = normalizeQuantType(type); return QUANT_SIZES[normalizedType] || QUANT_SIZES.f32; } /** * Format bytes to human-readable string * @param {number} bytes - Number of bytes * @returns {string} Formatted string (e.g., "1.23 MB") */ function formatBytes(bytes) { if (bytes === 0) return '0 B'; const k = 1024; const sizes = ['B', 'KB', 'MB', 'GB', 'TB']; const i = Math.floor(Math.log(bytes) / Math.log(k)); return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; } /** * Calculate memory breakdown * @param {Object} params - Calculation parameters * @returns {Object} Memory breakdown object */ function calculateMemory(params) { const { contextLength, kType, vType, kvHeads, headSize, numLayers, modelSizeGB, parallel, fullAttentionInterval } = params; // Calculate effective layers (account for full attention interval) const effectiveLayers = fullAttentionInterval ? Math.ceil(numLayers / fullAttentionInterval) : numLayers; // Determine parallel multiplier (default 1) const parallelMultiplier = parallel || 1; // Get quantization sizes const bK = getQuantizationSize(kType); const bV = vType ? getQuantizationSize(vType) : bK; // Memory per token for all layers // Formula: ctx × layers × kvheads × headdim × (bK + bV) const memoryPerToken = contextLength * effectiveLayers * kvHeads * headSize * (bK + bV); // Total KV cache memory const totalKVCache = memoryPerToken * parallelMultiplier; // Total memory including model weights (if provided) const totalMemory = modelSizeGB ? totalKVCache + (modelSizeGB * 1024 * 1024 * 1024) : null; // Calculate individual cache sizes const kCacheSize = contextLength * effectiveLayers * kvHeads * headSize * bK * parallelMultiplier; const vCacheSize = contextLength * effectiveLayers * kvHeads * headSize * bV * parallelMultiplier; return { kCache: { size: kCacheSize, formatted: formatBytes(kCacheSize) }, vCache: { size: vCacheSize, formatted: formatBytes(vCacheSize) }, totalKVCache: { size: totalKVCache, formatted: formatBytes(totalKVCache) }, totalMemory: totalMemory ? { size: totalMemory, formatted: formatBytes(totalMemory) } : null, effectiveLayers, parallelMultiplier }; } /** * Validate input parameters * @param {Object} params - Parameters to validate * @param {string} lang - Language code ('ru' or 'en') * @returns {Object} Validation result */ function validateParams(params, lang) { lang = lang || 'ru'; const errors = []; if (!params.contextLength || params.contextLength <= 0) { if (lang === 'ru') { errors.push('Длина контекста должна быть положительным числом'); } else { errors.push('Context length must be a positive number'); } } if (!params.kvHeads || params.kvHeads <= 0) { if (lang === 'ru') { errors.push('Количество KV головок должно быть положительным числом'); } else { errors.push('KV heads must be a positive number'); } } if (!params.headSize || params.headSize <= 0) { if (lang === 'ru') { errors.push('Размер головы должен быть положительным числом'); } else { errors.push('Head size must be a positive number'); } } if (!params.numLayers || params.numLayers <= 0) { if (lang === 'ru') { errors.push('Количество слоев должно быть положительным числом'); } else { errors.push('Number of layers must be a positive number'); } } if (params.fullAttentionInterval && params.fullAttentionInterval <= 0) { if (lang === 'ru') { errors.push('Интервал полного внимания должен быть положительным числом'); } else { errors.push('Full attention interval must be a positive number'); } } if (params.modelSizeGB && params.modelSizeGB <= 0) { if (lang === 'ru') { errors.push('Размер модели должен быть положительным числом'); } else { errors.push('Model size must be a positive number'); } } return { valid: errors.length === 0, errors }; } /** * Generate example parameters * @returns {Object} Example parameters object */ function getExampleParams() { return { contextLength: 8192, kType: 'f16', vType: 'f16', kvHeads: 32, headSize: 128, numLayers: 32, modelSizeGB: 7, parallel: 1, fullAttentionInterval: null }; } // Export for use in other files if (typeof module !== 'undefined' && module.exports) { module.exports = { getQuantizationSize, formatBytes, calculateMemory, validateParams, getExampleParams, QUANT_SIZES }; } // Global export for browser window.calculateMemory = calculateMemory; window.validateParams = validateParams;