llm_calculator/js/calculation.js

/**
 * LLM Context Memory Calculator - Core Calculation Logic
 * Pure functions with no DOM dependencies
 */

const QUANT_SIZES = {
  f32: 4.0,
  f16: 2.0,
  bf16: 2.0,
  q8_0: 34/32,    // 1.0625
  q4_0: 18/32,    // 0.5625
  q4_1: 20/32,    // 0.625
  iq4_nl: 18/32,  // 0.5625
  q5_0: 22/32,    // 0.6875
  q5_1: 24/32     // 0.75
};

/**
 * Normalize quantization type (handle 'K' alias for 'KV')
 * @param {string} type - Quantization type
 * @returns {string} Normalized type
 */
function normalizeQuantType(type) {
  if (type === 'K') return 'KV';
  return type;
}

/**
 * Get quantization size in bytes per tensor element
 * @param {string} type - Quantization type
 * @returns {number} Size in bytes
 */
function getQuantizationSize(type) {
  const normalizedType = normalizeQuantType(type);
  return QUANT_SIZES[normalizedType] || QUANT_SIZES.f32;
}

/**
 * Format bytes to human-readable string
 * @param {number} bytes - Number of bytes
 * @returns {string} Formatted string (e.g., "1.23 MB")
 */
function formatBytes(bytes) {
  if (bytes === 0) return '0 B';

  const k = 1024;
  const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
  const i = Math.floor(Math.log(bytes) / Math.log(k));

  return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}

/**
 * Calculate memory breakdown
 * @param {Object} params - Calculation parameters
 * @returns {Object} Memory breakdown object
 */
function calculateMemory(params) {
  const {
    contextLength,
    kType,
    vType,
    kvHeads,
    headSize,
    numLayers,
    modelSizeGB,
    parallel,
    fullAttentionInterval
  } = params;

  // Calculate effective layers (account for full attention interval)
  const effectiveLayers = fullAttentionInterval
    ? Math.ceil(numLayers / fullAttentionInterval)
    : numLayers;

  // Determine parallel multiplier (default 1)
  const parallelMultiplier = parallel || 1;

  // Get quantization sizes
  const bK = getQuantizationSize(kType);
  const bV = vType ? getQuantizationSize(vType) : bK;

  // Memory per token for all layers
  // Formula: ctx × layers × kvheads × headdim × (bK + bV)
  const memoryPerToken = contextLength * effectiveLayers * kvHeads * headSize * (bK + bV);

  // Total KV cache memory
  const totalKVCache = memoryPerToken * parallelMultiplier;

  // Total memory including model weights (if provided)
  const totalMemory = modelSizeGB
    ? totalKVCache + (modelSizeGB * 1024 * 1024 * 1024)
    : null;

  // Calculate individual cache sizes
  const kCacheSize = contextLength * effectiveLayers * kvHeads * headSize * bK * parallelMultiplier;
  const vCacheSize = contextLength * effectiveLayers * kvHeads * headSize * bV * parallelMultiplier;

  return {
    kCache: {
      size: kCacheSize,
      formatted: formatBytes(kCacheSize)
    },
    vCache: {
      size: vCacheSize,
      formatted: formatBytes(vCacheSize)
    },
    totalKVCache: {
      size: totalKVCache,
      formatted: formatBytes(totalKVCache)
    },
    totalMemory: totalMemory ? {
      size: totalMemory,
      formatted: formatBytes(totalMemory)
    } : null,
    effectiveLayers,
    parallelMultiplier
  };
}

/**
 * Validate input parameters
 * @param {Object} params - Parameters to validate
 * @param {string} lang - Language code ('ru' or 'en')
 * @returns {Object} Validation result
 */
function validateParams(params, lang) {
  lang = lang || 'ru';
  const errors = [];

  if (!params.contextLength || params.contextLength <= 0) {
    if (lang === 'ru') {
      errors.push('Длина контекста должна быть положительным числом');
    } else {
      errors.push('Context length must be a positive number');
    }
  }

  if (!params.kvHeads || params.kvHeads <= 0) {
    if (lang === 'ru') {
      errors.push('Количество KV головок должно быть положительным числом');
    } else {
      errors.push('KV heads must be a positive number');
    }
  }

  if (!params.headSize || params.headSize <= 0) {
    if (lang === 'ru') {
      errors.push('Размер головы должен быть положительным числом');
    } else {
      errors.push('Head size must be a positive number');
    }
  }

  if (!params.numLayers || params.numLayers <= 0) {
    if (lang === 'ru') {
      errors.push('Количество слоев должно быть положительным числом');
    } else {
      errors.push('Number of layers must be a positive number');
    }
  }

  if (params.fullAttentionInterval && params.fullAttentionInterval <= 0) {
    if (lang === 'ru') {
      errors.push('Интервал полного внимания должен быть положительным числом');
    } else {
      errors.push('Full attention interval must be a positive number');
    }
  }

  if (params.modelSizeGB && params.modelSizeGB <= 0) {
    if (lang === 'ru') {
      errors.push('Размер модели должен быть положительным числом');
    } else {
      errors.push('Model size must be a positive number');
    }
  }

  return {
    valid: errors.length === 0,
    errors
  };
}

/**
 * Generate example parameters
 * @returns {Object} Example parameters object
 */
function getExampleParams() {
  return {
    contextLength: 8192,
    kType: 'f16',
    vType: 'f16',
    kvHeads: 32,
    headSize: 128,
    numLayers: 32,
    modelSizeGB: 7,
    parallel: 1,
    fullAttentionInterval: null
  };
}

// Export for use in other files
if (typeof module !== 'undefined' && module.exports) {
  module.exports = {
    getQuantizationSize,
    formatBytes,
    calculateMemory,
    validateParams,
    getExampleParams,
    QUANT_SIZES
  };
}

// Global export for browser
window.calculateMemory = calculateMemory;
window.validateParams = validateParams;