Позволяют использовать:
Поддерживаемые типы:
int
, bool
, float
, str
, Enums
, bytes
, pathlib.Path
Существует возможность композиции (вложения) конфигов и использования опциональных полей.
conf/preprocessing/roberta.yaml
@dataclass
class PreprocessingConfig:
padding: str = "max_length"
max_length: int = 512
tokenizer_name: str = MISSING
test_size: int = 10000
train_size: int = 50000
@dataclass
class RobertaPreprocessing(PreprocessingConfig):
max_length: int = 256
tokenizer_name: str = "siebert/sentiment-roberta-large-english"
test_size: int = 5000
train_size: int = 20000
from hydra.core.config_store import ConfigStore
from src import (
RobertaPreprocessing,
DistillBertPreprocessing,
MainConfig,
)
#...
cs = ConfigStore.instance()
cs.store(group="preprocessing", name="distillbert", node=DistillBertPreprocessing)
cs.store(group="preprocessing", name="roberta", node=RobertaPreprocessing)
@hydra.main(version_base=None, config_name="config")
def main(cfg: MainConfig):
...
@hydra.main(version_base=None, config_name="config")
def main(cfg: MainConfig):
log.info(cfg.preprocessing.pading)
$ python SA_example.py preprocessing.test_size=0.2
@dataclass
class PreprocessingConfig:
padding: str = "max_length"
max_length: int = 512
tokenizer_name: str = MISSING
test_size: int = 10000
train_size: int = 50000
@dataclass
class RobertaPreprocessing(PreprocessingConfig):
max_length: int = 256
tokenizer_name: str = "siebert/sentiment-roberta-large-english"
test_size: int = 5000
train_size: int = 20000
@dataclass
class DistillBertPreprocessing(PreprocessingConfig):
tokenizer_name: str = "lxyuan/distilbert-base-multilingual-cased-sentiments-student"
# аналогично для ModelConfig и #TrainingConfig
@dataclass
class MainConfig:
defaults: list[Any] = field(default_factory=lambda: defaults)
preprocessing: PreprocessingConfig = MISSING
model: ModelConfig = MISSING
training: TrainingConfig = MISSING
src/conf/config.yaml
defaults = [
{"preprocessing": "distillbert"},
{"model": "distillbert"},
{"training": "distillbert"},
]
@dataclass(frozen=True)
class ModelConfig:
_target_: str = "transformers.AutoModelForSequenceClassification.from_pretrained"
_convert_: str = "object"
pretrained_model_name_or_path: str = MISSING
problem_type: str = "single_label_classification"
@dataclass(frozen=True)
class DistillBertModel(ModelConfig):
pretrained_model_name_or_path: str = (
"lxyuan/distilbert-base-multilingual-cased-sentiments-student"
)
num_labels: int = 2
ignore_mismatched_sizes: bool = True
Ещё один способ компоновки единой конфигурации — структурированные конфиги, которые предоставляют дополнительную защиту от ошибок типизации.