The Python bindings are built via PyO3/maturin and now cover the full
Rust surface: PCA, Kernel PCA, Laplacian eigenmap, auto_tune,
MetaModel, FeedbackAggregator, and the category enrichment layer
(concept paths, drill-down, domain groups, stats).
Type stubs (.pyi) are auto-generated by pyo3-stub-gen — IDEs and
mypy/pyright pick them up automatically. Regenerate them with
cargo run --bin gen-stubs from sphereql-python/ after changing
bindings.
pip install sphereqlFor Qdrant vector database support:
pip install sphereql[qdrant]import sphereql
categories = ["science", "science", "cooking", "cooking", "sports"]
embeddings = [
[0.1, 0.9, 0.3, 0.0],
[0.2, 0.8, 0.4, 0.1],
[0.9, 0.1, 0.0, 0.5],
[0.8, 0.2, 0.1, 0.4],
[0.4, 0.4, 0.8, 0.2],
]
pipeline = sphereql.Pipeline(categories, embeddings)
# k-nearest neighbors
query = [0.15, 0.85, 0.35, 0.05]
results = pipeline.nearest(query, k=3)
for r in results:
print(f"{r.id} {r.category} distance={r.distance:.4f}")
# Similarity threshold search
similar = pipeline.similar_above(query, min_cosine=0.8)
# Concept path between items
path = pipeline.concept_path("s-0000", "s-0003", graph_k=10)
# Cluster detection
globs = pipeline.detect_globs(max_k=10)
# Local manifold fitting
manifold = pipeline.local_manifold(query, neighborhood_k=10)
# --- Category Enrichment ---
# Category-level concept path
cat_path = pipeline.category_concept_path("science", "cooking")
if cat_path:
for step in cat_path.steps:
print(f" {step.category_name} (d={step.cumulative_distance:.4f})")
# Nearest neighbor categories
neighbors = pipeline.category_neighbors("science", k=3)
for n in neighbors:
print(f" {n.name}: cohesion={n.cohesion:.4f}, members={n.member_count}")
# Drill down within a category (uses inner sphere if available)
hits = pipeline.drill_down("science", query, k=5)
for h in hits:
print(f" item={h.item_index} distance={h.distance:.4f} inner={h.used_inner_sphere}")
# Category stats (summaries + inner sphere reports)
summaries, inner_reports = pipeline.category_stats()
# Export projected coordinates
points = pipeline.exported_points()
print(f"Explained variance ratio: {pipeline.explained_variance_ratio:.4f}")import sphereql
# Opens an interactive WebGL sphere in your browser
sphereql.visualize(categories, embeddings, title="My Embeddings")
# Or visualize from an existing pipeline
sphereql.visualize_pipeline(pipeline, title="Pipeline View")import sphereql
# In-memory store (for testing and small datasets)
store = sphereql.InMemoryStore("my-collection", dimension=384)
store.upsert([
{"id": "doc-1", "vector": embedding_1, "metadata": {"category": "science"}},
{"id": "doc-2", "vector": embedding_2, "metadata": {"category": "cooking"}},
])
bridge = sphereql.VectorStoreBridge(store)
bridge.build_pipeline(category_key="category")
# Hybrid search: angular candidates + cosine re-ranking
results = bridge.hybrid_search(query_vec, final_k=5, recall_k=20)import sphereql
# Spherical/Cartesian/Geo point types
p = sphereql.SphericalPoint(1.0, 0.5, 0.8)
c = sphereql.spherical_to_cartesian(p)
g = sphereql.spherical_to_geo(p)
# Distance functions
d = sphereql.angular_distance(p1, p2)
gc = sphereql.great_circle_distance(p1, p2, radius=6371.0)
# Projection classes
pca = sphereql.PcaProjection.fit(embeddings, radial="magnitude")
kpca = sphereql.KernelPcaProjection.fit(embeddings, radial="magnitude")
rp = sphereql.RandomProjection(dim=384, radial=1.0, seed=42)
# Laplacian eigenmap — connectivity-preserving projection for
# sparse / noisy corpora where variance-maximizing families collapse
# into the noise axes.
lap = sphereql.LaplacianEigenmap.fit(embeddings, k_neighbors=15)
print(f"connectivity_ratio = {lap.connectivity_ratio:.4f}")
# Build a pipeline with a non-default projection kind:
pipeline = sphereql.Pipeline(
categories,
embeddings,
config={"projection_kind": "LaplacianEigenmap"},
)# Tune projection + config on a quality metric
pipeline, report = sphereql.auto_tune(
categories,
embeddings,
metric="default_composite",
strategy="random",
budget=16,
)
# Warm-start from a trained MetaModel
model = sphereql.NearestNeighborMetaModel()
model.fit(sphereql.load_default_store())
features = sphereql.corpus_features(categories, embeddings)
predicted_config = model.predict(features)
pipeline = sphereql.Pipeline(categories, embeddings, config=predicted_config)
# FeedbackAggregator: accumulate per-query satisfaction
agg = sphereql.FeedbackAggregator()
agg.record(sphereql.FeedbackEvent("my-corpus", "query-1", 0.9))
summary = agg.summarize("my-corpus")