Module lib.llama.ModelConfig

Expand source code
from typing import NamedTuple

class ModelConfig(NamedTuple):
    d_ff: int
    d_k: int
    d_model: int
    d_v: int
    n_heads_kv: int
    n_layers: int
    n_rep_kv: int
    rms_norm_eps: float
    token_id_bos: int
    token_id_eos: int
    token_id_pad: int
    vocab_size: int

    # TODO: move out of model config
    dropout_rate: float | None
    return_kv_cache: bool

model_config_dummy = ModelConfig(
    d_ff=32,
    d_k=32,
    d_model=15,
    d_v=16,
    n_heads_kv=32,
    n_layers=3,
    n_rep_kv=2,
    rms_norm_eps=1e-6,
    token_id_bos=1,
    token_id_eos=2,
    token_id_pad=0,
    vocab_size=32000,
    dropout_rate=0.1,
    return_kv_cache=False,
)

model_config_llama1_7B = ModelConfig(
    d_ff=11008,
    d_k=128,
    d_model=4096,
    d_v=128,
    n_heads_kv=32,
    n_layers=32,
    n_rep_kv=1,
    rms_norm_eps=1e-6,
    token_id_bos=1,
    token_id_eos=2,
    token_id_pad=0,
    vocab_size=32000,
    dropout_rate=0.1,
    return_kv_cache=False,
)

model_config_llama2_7B = model_config_llama1_7B

model_config_llama2_13B = ModelConfig(
    d_ff=13824,
    d_k=128,
    d_model=5120,
    d_v=128,
    n_heads_kv=40,
    n_layers=40,
    n_rep_kv=1,
    rms_norm_eps=1e-6,
    token_id_bos=1,
    token_id_eos=2,
    token_id_pad=0,
    vocab_size=32000,
    dropout_rate=0.1,
    return_kv_cache=False,
)

model_config_llama2_70B = ModelConfig(
    d_ff=28672,
    d_k=128,
    d_model=8192,
    d_v=128,
    n_heads_kv=8,
    n_layers=80,
    n_rep_kv=8,
    rms_norm_eps=1e-6,
    token_id_bos=1,
    token_id_eos=2,
    token_id_pad=0,
    vocab_size=32000,
    dropout_rate=0.1,
    return_kv_cache=False,
)

Classes

class ModelConfig (d_ff: int, d_k: int, d_model: int, d_v: int, n_heads_kv: int, n_layers: int, n_rep_kv: int, rms_norm_eps: float, token_id_bos: int, token_id_eos: int, token_id_pad: int, vocab_size: int, dropout_rate: float | None, return_kv_cache: bool)

ModelConfig(d_ff, d_k, d_model, d_v, n_heads_kv, n_layers, n_rep_kv, rms_norm_eps, token_id_bos, token_id_eos, token_id_pad, vocab_size, dropout_rate, return_kv_cache)

Expand source code
class ModelConfig(NamedTuple):
    d_ff: int
    d_k: int
    d_model: int
    d_v: int
    n_heads_kv: int
    n_layers: int
    n_rep_kv: int
    rms_norm_eps: float
    token_id_bos: int
    token_id_eos: int
    token_id_pad: int
    vocab_size: int

    # TODO: move out of model config
    dropout_rate: float | None
    return_kv_cache: bool

Ancestors

  • builtins.tuple

Instance variables

var d_ff : int

Alias for field number 0

var d_k : int

Alias for field number 1

var d_model : int

Alias for field number 2

var d_v : int

Alias for field number 3

var n_heads_kv : int

Alias for field number 4

var n_layers : int

Alias for field number 5

var n_rep_kv : int

Alias for field number 6

var rms_norm_eps : float

Alias for field number 7

var token_id_bos : int

Alias for field number 8

var token_id_eos : int

Alias for field number 9

var token_id_pad : int

Alias for field number 10

var vocab_size : int

Alias for field number 11

var dropout_rate : float | None

Alias for field number 12

var return_kv_cache : bool

Alias for field number 13