Local Inference Calculator
Contents:
Installation
User Guide
Glossary / Glossário
API Reference
Examples
Local Inference Calculator
Index
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
I
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
V
|
W
|
Y
_
__init__() (calculator.CalculationBreakdown method)
(calculator.CPUOffloadCalculator method)
(calculator.CPUOffloadResult method)
(calculator.InferenceResult method)
(calculator.LayerOffloadCalculator method)
(calculator.LayerOffloadResult method)
(calculator.PCIeConfig method)
(calculator.VRAMCalculator method)
(gpus.GPU method)
(models.LLMModel method)
A
architecture (gpus.GPU attribute)
,
[1]
(models.LLMModel attribute)
,
[1]
B
bandwidth_gb_s (calculator.PCIeConfig attribute)
,
[1]
BG_BLUE (main.Colors attribute)
BG_GREEN (main.Colors attribute)
BG_RED (main.Colors attribute)
BG_YELLOW (main.Colors attribute)
BLACK (main.Colors attribute)
BLUE (main.Colors attribute)
BOLD (main.Colors attribute)
bold() (main.Colors static method)
bytes_per_param (calculator.Quantization property)
C
calculate_all_combinations() (calculator.VRAMCalculator method)
calculate_inference() (in module calculator)
calculate_kv_cache() (calculator.VRAMCalculator method)
calculate_kv_cache_memory() (calculator.LayerOffloadCalculator method)
calculate_offload() (calculator.CPUOffloadCalculator method)
calculate_optimal_offload() (calculator.LayerOffloadCalculator method)
calculate_overhead() (calculator.VRAMCalculator method)
calculate_params_memory() (calculator.VRAMCalculator method)
calculate_total_model_memory() (calculator.CPUOffloadCalculator method)
calculate_total_vram() (calculator.VRAMCalculator method)
CalculationBreakdown (class in calculator)
CalculationMode (class in calculator)
calculator
module
Colors (class in main)
CONSERVATIVE (calculator.CalculationMode attribute)
CONSUMER (gpus.GPUType attribute)
context_length_max (models.LLMModel attribute)
,
[1]
cpu_ram_used (calculator.LayerOffloadResult attribute)
,
[1]
CPUOffloadCalculator (class in calculator)
CPUOffloadResult (class in calculator)
CYAN (main.Colors attribute)
D
DATACENTER (gpus.GPUType attribute)
DIM (main.Colors attribute)
dim() (main.Colors static method)
E
effective_bandwidth_gb_s (calculator.PCIeConfig property)
error() (main.Colors static method)
estimate_kv_cache() (in module main)
estimate_layer_size_gb() (calculator.LayerOffloadCalculator method)
estimate_offload_performance() (calculator.CPUOffloadCalculator method)
estimated_layers (models.LLMModel property)
estimated_token_speed (calculator.CPUOffloadResult attribute)
,
[1]
evaluate_pair() (calculator.VRAMCalculator method)
export_csv() (in module main)
export_json() (in module main)
F
fits_in_ram (calculator.CPUOffloadResult attribute)
,
[1]
format (models.LLMModel attribute)
,
[1]
FP16 (calculator.Quantization attribute)
FP32 (calculator.Quantization attribute)
G
generation (calculator.PCIeConfig attribute)
,
[1]
get_all_gpus() (in module gpus)
get_all_models() (in module models)
get_consumer_gpus() (in module gpus)
get_datacenter_gpus() (in module gpus)
get_gpu_by_name() (in module gpus)
get_gpus_by_vram_min() (in module gpus)
get_model_by_size() (in module models)
get_models_by_size_range() (in module models)
GPU (class in gpus)
gpu_name (calculator.InferenceResult attribute)
,
[1]
gpu_vram_gb (calculator.InferenceResult attribute)
,
[1]
gpu_vram_used (calculator.LayerOffloadResult attribute)
,
[1]
gpus
module
GPUType (class in gpus)
GREEN (main.Colors attribute)
I
InferenceResult (class in calculator)
info() (main.Colors static method)
INT4 (calculator.Quantization attribute)
INT8 (calculator.Quantization attribute)
K
kv_cache_gb (calculator.CalculationBreakdown attribute)
,
[1]
kv_cache_mb_per_token (models.LLMModel attribute)
,
[1]
kv_cache_multiplier (calculator.Quantization property)
L
lanes (calculator.PCIeConfig attribute)
,
[1]
LayerOffloadCalculator (class in calculator)
LayerOffloadResult (class in calculator)
layers_on_cpu (calculator.LayerOffloadResult attribute)
,
[1]
layers_on_gpu (calculator.LayerOffloadResult attribute)
,
[1]
list_models() (in module main)
LLMModel (class in models)
M
MAGENTA (main.Colors attribute)
main
module
main() (in module main)
memory_bandwidth_gb_s (gpus.GPU attribute)
,
[1]
model_name (calculator.InferenceResult attribute)
,
[1]
model_params_billion (calculator.InferenceResult attribute)
,
[1]
model_with_overhead_gb (calculator.CalculationBreakdown attribute)
,
[1]
models
module
module
calculator
gpus
main
models
N
name (gpus.GPU attribute)
,
[1]
(models.LLMModel attribute)
,
[1]
NOT_RUNS (calculator.Status attribute)
num_layers (models.LLMModel attribute)
,
[1]
O
offload_config (calculator.CPUOffloadResult attribute)
,
[1]
offload_ratio (calculator.LayerOffloadResult attribute)
,
[1]
ok() (main.Colors static method)
overhead_gb (calculator.CalculationBreakdown attribute)
,
[1]
P
params_billion (models.LLMModel attribute)
,
[1]
params_memory_gb (calculator.CalculationBreakdown attribute)
,
[1]
parse_args() (in module main)
pcie_gen (gpus.GPU attribute)
,
[1]
pcie_generation (calculator.CPUOffloadResult attribute)
,
[1]
PCIeConfig (class in calculator)
performance_impact (calculator.LayerOffloadResult attribute)
,
[1]
precision_default (models.LLMModel attribute)
,
[1]
print_cpu_offload_result() (in module main)
print_layer_offload_result() (in module main)
print_model_vram_breakdown() (in module main)
print_multi_gpu_result() (in module main)
print_summary_by_gpu() (in module main)
print_summary_by_model() (in module main)
print_table() (in module main)
PRODUCTION (calculator.CalculationMode attribute)
Q
quantization (calculator.InferenceResult attribute)
,
[1]
Quantization (class in calculator)
R
recommended_gpu_split (calculator.LayerOffloadResult attribute)
,
[1]
RED (main.Colors attribute)
required_vram_gb (calculator.InferenceResult attribute)
,
[1]
RESET (main.Colors attribute)
RUNS (calculator.Status attribute)
S
search_models_by_name() (in module models)
size_label (models.LLMModel property)
speed_vs_full_gpu (calculator.CPUOffloadResult attribute)
,
[1]
status (calculator.InferenceResult attribute)
,
[1]
(calculator.LayerOffloadResult attribute)
Status (class in calculator)
system_ram_available (calculator.CPUOffloadResult attribute)
,
[1]
system_ram_required (calculator.CPUOffloadResult attribute)
,
[1]
T
THEORETICAL (calculator.CalculationMode attribute)
to_dict() (calculator.CalculationBreakdown method)
(calculator.InferenceResult method)
total_layers (calculator.LayerOffloadResult attribute)
,
[1]
total_vram_gb (calculator.CalculationBreakdown attribute)
,
[1]
type (gpus.GPU attribute)
,
[1]
V
vram_free_percent (calculator.InferenceResult attribute)
,
[1]
vram_gb (gpus.GPU attribute)
,
[1]
vram_label (gpus.GPU property)
VRAMCalculator (class in calculator)
W
warn() (main.Colors static method)
warning (calculator.InferenceResult attribute)
,
[1]
warning() (main.Colors static method)
WHITE (main.Colors attribute)
Y
YELLOW (main.Colors attribute)