LanceDB with Hyrax#
This notebook creates a small LanceDB database with two tables and reads one selected table with LanceDBDataset.
[1]:
from pathlib import Path
import lancedb
import pyarrow as pa
import hyrax
[2]:
db_path = Path("./random_lancedb")
db = lancedb.connect(str(db_path))
observations = pa.table(
{
"object_id": ["obj_0", "obj_1", "obj_2"],
"flux": [10.2, 11.4, 9.8],
"redshift": [0.12, 0.44, 0.31],
}
)
db.create_table("observations", observations, mode="overwrite")
calibration = pa.table(
{
"object_id": ["cal_0", "cal_1"],
"zeropoint": [25.1, 25.3],
}
)
db.create_table("calibration", calibration, mode="overwrite")
[2026-05-18T21:37:00Z WARN lance::dataset::write::insert] No existing dataset at /home/docs/checkouts/readthedocs.org/user_builds/hyrax/checkouts/v0.8.3/docs/notebooks/random_lancedb/observations.lance, it will be created
[2026-05-18T21:37:00Z WARN lance::dataset::write::insert] No existing dataset at /home/docs/checkouts/readthedocs.org/user_builds/hyrax/checkouts/v0.8.3/docs/notebooks/random_lancedb/calibration.lance, it will be created
[2]:
LanceTable(name='calibration', version=1, _conn=LanceDBConnection(uri='/home/docs/checkouts/readthedocs.org/user_builds/hyrax/checkouts/v0.8.3/docs/notebooks/random_lancedb'))
[3]:
h = hyrax.Hyrax()
h.config["data_set"]["LanceDBDataset"]["table_name"] = "observations"
h.config["data_request"] = {
"infer": {
"lance": {
"dataset_class": "LanceDBDataset",
"data_location": str(db_path),
"primary_id_field": "object_id",
"fields": ["object_id", "flux", "redshift"],
}
}
}
prepared = h.prepare()
[2026-05-18 21:37:00,592 hyrax.verbs.prepare:INFO] Finished Prepare
[4]:
prepared["infer"][0]["lance"]
[4]:
{'object_id': 'obj_0', 'flux': 10.2, 'redshift': 0.12}