Source code for hyrax.config_migrations.migrations.005_remove_preload_config
"""Config migration: version 5 → version 6.
Migrates the deprecated ``preload_cache`` and ``preload_threads`` keys from
``[data_set]`` into ``[data_loader].num_workers``. Cache preloading has been
replaced by PyTorch DataLoader's built-in ``num_workers`` / ``prefetch_factor``.
If the user explicitly set ``preload_threads`` to a value other than the old
default of 50, that value is carried forward as ``num_workers``. Otherwise both
keys are simply removed.
"""
import tomlkit
from tomlkit.toml_document import TOMLDocument
from hyrax.config_migrations.migration_utils import migration_step
# The old default for preload_threads was 50, tuned for UW's HYAK Klone HPC
# filesystem where I/O is extremely slow and lightweight threads were cheap.
# num_workers spawns full subprocesses, so 50 would be wildly inappropriate
# for most systems. Treat 50 as "user never customized this."
[docs]
_OLD_DEFAULT_PRELOAD_THREADS = 50
@migration_step(from_version=5)
[docs]
def remove_preload_config(cfg: TOMLDocument) -> TOMLDocument:
"""Migrate preload config to ``[data_loader].num_workers``."""
data_set = cfg.get("data_set")
if not isinstance(data_set, dict):
return cfg
preload_cache = data_set.pop("preload_cache", None)
preload_threads = data_set.pop("preload_threads", None)
if not preload_cache or preload_threads is None or preload_threads == _OLD_DEFAULT_PRELOAD_THREADS:
return cfg
data_loader = cfg.get("data_loader")
if data_loader is None:
data_loader = tomlkit.table()
cfg["data_loader"] = data_loader
existing = data_loader.get("num_workers", 0)
data_loader["num_workers"] = max(existing + preload_threads, 1)
return cfg