Local Inference Calculator
Conteúdo:
Instalação
Guia do Usuário
Glossário
API Reference
Exemplos / Examples
Local Inference Calculator
Índice
Índice
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
I
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
V
|
W
|
Y
_
__init__() (método calculator.CalculationBreakdown)
(método calculator.CPUOffloadCalculator)
(método calculator.CPUOffloadResult)
(método calculator.InferenceResult)
(método calculator.LayerOffloadCalculator)
(método calculator.LayerOffloadResult)
(método calculator.PCIeConfig)
(método calculator.VRAMCalculator)
(método gpus.GPU)
(método models.LLMModel)
A
architecture (atributo gpus.GPU)
,
[1]
(atributo models.LLMModel)
,
[1]
B
bandwidth_gb_s (atributo calculator.PCIeConfig)
,
[1]
BG_BLUE (atributo main.Colors)
BG_GREEN (atributo main.Colors)
BG_RED (atributo main.Colors)
BG_YELLOW (atributo main.Colors)
BLACK (atributo main.Colors)
BLUE (atributo main.Colors)
BOLD (atributo main.Colors)
bold() (método estático main.Colors)
bytes_per_param (propriedade calculator.Quantization )
C
calculate_all_combinations() (método calculator.VRAMCalculator)
calculate_inference() (no módulo calculator)
calculate_kv_cache() (método calculator.VRAMCalculator)
calculate_kv_cache_memory() (método calculator.LayerOffloadCalculator)
calculate_offload() (método calculator.CPUOffloadCalculator)
calculate_optimal_offload() (método calculator.LayerOffloadCalculator)
calculate_overhead() (método calculator.VRAMCalculator)
calculate_params_memory() (método calculator.VRAMCalculator)
calculate_total_model_memory() (método calculator.CPUOffloadCalculator)
calculate_total_vram() (método calculator.VRAMCalculator)
CalculationBreakdown (classe em calculator)
CalculationMode (classe em calculator)
calculator
module
Colors (classe em main)
CONSERVATIVE (atributo calculator.CalculationMode)
CONSUMER (atributo gpus.GPUType)
context_length_max (atributo models.LLMModel)
,
[1]
cpu_ram_used (atributo calculator.LayerOffloadResult)
,
[1]
CPUOffloadCalculator (classe em calculator)
CPUOffloadResult (classe em calculator)
CYAN (atributo main.Colors)
D
DATACENTER (atributo gpus.GPUType)
DIM (atributo main.Colors)
dim() (método estático main.Colors)
E
effective_bandwidth_gb_s (propriedade calculator.PCIeConfig )
error() (método estático main.Colors)
estimate_kv_cache() (no módulo main)
estimate_layer_size_gb() (método calculator.LayerOffloadCalculator)
estimate_offload_performance() (método calculator.CPUOffloadCalculator)
estimated_layers (propriedade models.LLMModel )
estimated_token_speed (atributo calculator.CPUOffloadResult)
,
[1]
evaluate_pair() (método calculator.VRAMCalculator)
export_csv() (no módulo main)
export_json() (no módulo main)
F
fits_in_ram (atributo calculator.CPUOffloadResult)
,
[1]
format (atributo models.LLMModel)
,
[1]
FP16 (atributo calculator.Quantization)
FP32 (atributo calculator.Quantization)
G
generation (atributo calculator.PCIeConfig)
,
[1]
get_all_gpus() (no módulo gpus)
get_all_models() (no módulo models)
get_consumer_gpus() (no módulo gpus)
get_datacenter_gpus() (no módulo gpus)
get_gpu_by_name() (no módulo gpus)
get_gpus_by_vram_min() (no módulo gpus)
get_model_by_size() (no módulo models)
get_models_by_size_range() (no módulo models)
GPU (classe em gpus)
gpu_name (atributo calculator.InferenceResult)
,
[1]
gpu_vram_gb (atributo calculator.InferenceResult)
,
[1]
gpu_vram_used (atributo calculator.LayerOffloadResult)
,
[1]
gpus
module
GPUType (classe em gpus)
GREEN (atributo main.Colors)
I
InferenceResult (classe em calculator)
info() (método estático main.Colors)
INT4 (atributo calculator.Quantization)
INT8 (atributo calculator.Quantization)
K
kv_cache_gb (atributo calculator.CalculationBreakdown)
,
[1]
kv_cache_mb_per_token (atributo models.LLMModel)
,
[1]
kv_cache_multiplier (propriedade calculator.Quantization )
L
lanes (atributo calculator.PCIeConfig)
,
[1]
LayerOffloadCalculator (classe em calculator)
LayerOffloadResult (classe em calculator)
layers_on_cpu (atributo calculator.LayerOffloadResult)
,
[1]
layers_on_gpu (atributo calculator.LayerOffloadResult)
,
[1]
list_models() (no módulo main)
LLMModel (classe em models)
M
MAGENTA (atributo main.Colors)
main
module
main() (no módulo main)
memory_bandwidth_gb_s (atributo gpus.GPU)
,
[1]
model_name (atributo calculator.InferenceResult)
,
[1]
model_params_billion (atributo calculator.InferenceResult)
,
[1]
model_with_overhead_gb (atributo calculator.CalculationBreakdown)
,
[1]
models
module
module
calculator
gpus
main
models
N
name (atributo gpus.GPU)
,
[1]
(atributo models.LLMModel)
,
[1]
NOT_RUNS (atributo calculator.Status)
num_layers (atributo models.LLMModel)
,
[1]
O
offload_config (atributo calculator.CPUOffloadResult)
,
[1]
offload_ratio (atributo calculator.LayerOffloadResult)
,
[1]
ok() (método estático main.Colors)
overhead_gb (atributo calculator.CalculationBreakdown)
,
[1]
P
params_billion (atributo models.LLMModel)
,
[1]
params_memory_gb (atributo calculator.CalculationBreakdown)
,
[1]
parse_args() (no módulo main)
pcie_gen (atributo gpus.GPU)
,
[1]
pcie_generation (atributo calculator.CPUOffloadResult)
,
[1]
PCIeConfig (classe em calculator)
performance_impact (atributo calculator.LayerOffloadResult)
,
[1]
precision_default (atributo models.LLMModel)
,
[1]
print_cpu_offload_result() (no módulo main)
print_layer_offload_result() (no módulo main)
print_model_vram_breakdown() (no módulo main)
print_multi_gpu_result() (no módulo main)
print_summary_by_gpu() (no módulo main)
print_summary_by_model() (no módulo main)
print_table() (no módulo main)
PRODUCTION (atributo calculator.CalculationMode)
Q
quantization (atributo calculator.InferenceResult)
,
[1]
Quantization (classe em calculator)
R
recommended_gpu_split (atributo calculator.LayerOffloadResult)
,
[1]
RED (atributo main.Colors)
required_vram_gb (atributo calculator.InferenceResult)
,
[1]
RESET (atributo main.Colors)
RUNS (atributo calculator.Status)
S
search_models_by_name() (no módulo models)
size_label (propriedade models.LLMModel )
speed_vs_full_gpu (atributo calculator.CPUOffloadResult)
,
[1]
status (atributo calculator.InferenceResult)
,
[1]
(atributo calculator.LayerOffloadResult)
Status (classe em calculator)
system_ram_available (atributo calculator.CPUOffloadResult)
,
[1]
system_ram_required (atributo calculator.CPUOffloadResult)
,
[1]
T
THEORETICAL (atributo calculator.CalculationMode)
to_dict() (método calculator.CalculationBreakdown)
(método calculator.InferenceResult)
total_layers (atributo calculator.LayerOffloadResult)
,
[1]
total_vram_gb (atributo calculator.CalculationBreakdown)
,
[1]
type (atributo gpus.GPU)
,
[1]
V
vram_free_percent (atributo calculator.InferenceResult)
,
[1]
vram_gb (atributo gpus.GPU)
,
[1]
vram_label (propriedade gpus.GPU )
VRAMCalculator (classe em calculator)
W
warn() (método estático main.Colors)
warning (atributo calculator.InferenceResult)
,
[1]
warning() (método estático main.Colors)
WHITE (atributo main.Colors)
Y
YELLOW (atributo main.Colors)