LanceDB with Hyrax

LanceDB with Hyrax#

This notebook creates a small LanceDB database with two tables and reads one selected table with LanceDBDataset.

[1]:
from pathlib import Path

import lancedb
import pyarrow as pa
import hyrax
[2]:
db_path = Path("./random_lancedb")
db = lancedb.connect(str(db_path))

observations = pa.table(
    {
        "object_id": ["obj_0", "obj_1", "obj_2"],
        "flux": [10.2, 11.4, 9.8],
        "redshift": [0.12, 0.44, 0.31],
    }
)
db.create_table("observations", observations, mode="overwrite")

calibration = pa.table(
    {
        "object_id": ["cal_0", "cal_1"],
        "zeropoint": [25.1, 25.3],
    }
)
db.create_table("calibration", calibration, mode="overwrite")
[2026-05-18T21:37:00Z WARN  lance::dataset::write::insert] No existing dataset at /home/docs/checkouts/readthedocs.org/user_builds/hyrax/checkouts/v0.8.3/docs/notebooks/random_lancedb/observations.lance, it will be created
[2026-05-18T21:37:00Z WARN  lance::dataset::write::insert] No existing dataset at /home/docs/checkouts/readthedocs.org/user_builds/hyrax/checkouts/v0.8.3/docs/notebooks/random_lancedb/calibration.lance, it will be created
[2]:
LanceTable(name='calibration', version=1, _conn=LanceDBConnection(uri='/home/docs/checkouts/readthedocs.org/user_builds/hyrax/checkouts/v0.8.3/docs/notebooks/random_lancedb'))
[3]:
h = hyrax.Hyrax()
h.config["data_set"]["LanceDBDataset"]["table_name"] = "observations"
h.config["data_request"] = {
    "infer": {
        "lance": {
            "dataset_class": "LanceDBDataset",
            "data_location": str(db_path),
            "primary_id_field": "object_id",
            "fields": ["object_id", "flux", "redshift"],
        }
    }
}
prepared = h.prepare()
[2026-05-18 21:37:00,592 hyrax.verbs.prepare:INFO] Finished Prepare
[4]:
prepared["infer"][0]["lance"]
[4]:
{'object_id': 'obj_0', 'flux': 10.2, 'redshift': 0.12}