From efd508512874c4be22d1d23b58dbc49af12285b0 Mon Sep 17 00:00:00 2001 From: Rob Court Date: Fri, 29 May 2026 20:51:01 +0100 Subject: [PATCH] Close v2 parity gap on 8 image/dataset/expression queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live parity sweep against v2 prod (May 2026) flagged that v2-dev /run_query returned a leaner column set than v2 SOLR-backed processor for 8 queries, even after v1.10.1 batch fixed 6 image-bearing queries and v1.11.0 fixed get_similar_neurons. Each gap is a single missing column or group the V2 frontend already understands via COL_HEADER_MAP in uk.ac.vfb.geppetto VFBqueryJsonProcessor, so the fix is upstream Cypher + schema preview_columns. Owlery-backed (Template_Space + Imaging_Technique) Extend _owlery_query_to_results to extract template and technique from the same anatomy_channel_image[0].channel_image SOLR structure thumbnail already comes from. Bumps headers (both _get_standard_query_headers and _get_neurons_part_here_headers) and preview_columns for the three queries explicitly flagged: NeuronsPresynapticHere, TractsNervesInnervatingHere, LineageClonesIn. AlignedDatasets / AllDatasets (full v2 column shape) Both functions previously returned only [id, name, tags]. Replace with Cypher following prod XMI Datasets-available chain. Returns pubs (Reference), license, template, technique, thumbnail, image_count. Each branch wrapped in CALL subquery scoped to ds so the outer carrier row stays one-per-ds. Shared helpers keep the pair in lockstep. PaintedDomains (Definition column) Cypher already populated description; response builder did not list it in headers/rows. Add it. COL_HEADER_MAP[description] = Definition. TransgeneExpressionHere (replace delegate, add image columns) Function previously delegated to get_expression_overlaps_here so v2-dev got the lean 4-column output. Replace with proper Cypher matching prod XMI dataSources.0/queries.7. Returns name (the ep), pubs (Reference), tags, template, technique, thumbnail. Expressed_in deferred — TODO comment in function body. SimilarMorphologyToNBexp (Type column) Mirrors the PR #42 / v1.11.0 fix for get_similar_neurons: add a pipe-joined type column from primary INSTANCEOF Class inside a CALL subquery so the outer row stays one-per-primary. Files src/vfbquery/vfb_queries.py - _owlery_query_to_results - _get_standard_query_headers, _get_neurons_part_here_headers - get_aligned_datasets, get_all_datasets (+ 3 shared helpers) - get_painted_domains - get_transgene_expression_here - get_similar_morphology_nb_exp - 5 *_to_schema preview_columns updates Refs: projects/geppetto-vfbquery-migration/V2_V2DEV_PARITY_SWEEP.md --- src/vfbquery/vfb_queries.py | 319 +++++++++++++++++++++++++++++++----- 1 file changed, 274 insertions(+), 45 deletions(-) diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py index 4d28762..333303d 100644 --- a/src/vfbquery/vfb_queries.py +++ b/src/vfbquery/vfb_queries.py @@ -1358,7 +1358,7 @@ def NeuronsPresynapticHere_to_schema(name, take_default): "default": take_default, } preview = 5 - preview_columns = ["id", "label", "tags", "thumbnail"] + preview_columns = ["id", "label", "tags", "template", "technique", "thumbnail"] return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns) @@ -1598,7 +1598,7 @@ def TractsNervesInnervatingHere_to_schema(name, take_default): "default": take_default, } preview = 5 - preview_columns = ["id", "label", "tags", "thumbnail"] + preview_columns = ["id", "label", "tags", "template", "technique", "thumbnail"] return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns) @@ -1623,7 +1623,7 @@ def LineageClonesIn_to_schema(name, take_default): "default": take_default, } preview = 5 - preview_columns = ["id", "label", "tags", "thumbnail"] + preview_columns = ["id", "label", "tags", "template", "technique", "thumbnail"] return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns) @@ -1855,7 +1855,7 @@ def SimilarMorphologyToNB_to_schema(name, take_default): def SimilarMorphologyToNBexp_to_schema(name, take_default): """Schema for SimilarMorphologyToNBexp (NeuronBridge expression) query.""" - return Query(query="SimilarMorphologyToNBexp", label=f"NeuronBridge matches for {name}", function="get_similar_morphology_nb_exp", takes={"short_form": {"$and": ["Individual", "Expression_pattern", "neuronbridge"]}, "default": take_default}, preview=5, preview_columns=["id", "name", "score", "tags", "template", "technique", "thumbnail"]) + return Query(query="SimilarMorphologyToNBexp", label=f"NeuronBridge matches for {name}", function="get_similar_morphology_nb_exp", takes={"short_form": {"$and": ["Individual", "Expression_pattern", "neuronbridge"]}, "default": take_default}, preview=5, preview_columns=["id", "name", "score", "tags", "type", "template", "technique", "thumbnail"]) def SimilarMorphologyToUserData_to_schema(name, take_default): @@ -1865,7 +1865,7 @@ def SimilarMorphologyToUserData_to_schema(name, take_default): def PaintedDomains_to_schema(name, take_default): """Schema for PaintedDomains query.""" - return Query(query="PaintedDomains", label=f"Painted domains for {name}", function="get_painted_domains", takes={"short_form": {"$and": ["Template", "Individual"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "type", "thumbnail"]) + return Query(query="PaintedDomains", label=f"Painted domains for {name}", function="get_painted_domains", takes={"short_form": {"$and": ["Template", "Individual"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "type", "description", "thumbnail"]) def DatasetImages_to_schema(name, take_default): @@ -1880,12 +1880,12 @@ def AllAlignedImages_to_schema(name, take_default): def AlignedDatasets_to_schema(name, take_default): """Schema for AlignedDatasets query.""" - return Query(query="AlignedDatasets", label=f"Datasets aligned to {name}", function="get_aligned_datasets", takes={"short_form": {"$and": ["Template", "Individual"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "tags"]) + return Query(query="AlignedDatasets", label=f"Datasets aligned to {name}", function="get_aligned_datasets", takes={"short_form": {"$and": ["Template", "Individual"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "pubs", "tags", "license", "template", "technique", "thumbnail", "image_count"]) def AllDatasets_to_schema(name, take_default): """Schema for AllDatasets query.""" - return Query(query="AllDatasets", label="All available datasets", function="get_all_datasets", takes={"short_form": {"$and": ["Template"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "tags"]) + return Query(query="AllDatasets", label="All available datasets", function="get_all_datasets", takes={"short_form": {"$and": ["Template"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "pubs", "tags", "license", "template", "technique", "thumbnail", "image_count"]) def TermsForPub_to_schema(name, take_default): @@ -1902,7 +1902,7 @@ def TransgeneExpressionHere_to_schema(name, take_default): Query chain: Multi-step Owlery and Neo4j queries """ - return Query(query="TransgeneExpressionHere", label=f"Transgene expression in {name}", function="get_transgene_expression_here", takes={"short_form": {"$and": ["Class", "Nervous_system", "Anatomy"]}, "default": take_default}, preview=5, preview_columns=["id", "name", "tags"]) + return Query(query="TransgeneExpressionHere", label=f"Transgene expression in {name}", function="get_transgene_expression_here", takes={"short_form": {"$and": ["Class", "Nervous_system", "Anatomy"]}, "default": take_default}, preview=5, preview_columns=["id", "name", "pubs", "tags", "template", "technique", "thumbnail"]) def FindStocks_to_schema(name, take_default): @@ -3788,6 +3788,8 @@ def _get_neurons_part_here_headers(): "tags": {"title": "Tags", "type": "tags", "order": 2}, "source": {"title": "Data Source", "type": "metadata", "order": 3}, "source_id": {"title": "Data Source ID", "type": "metadata", "order": 4}, + "template": {"title": "Template", "type": "markdown", "order": 6}, + "technique": {"title": "Imaging Technique", "type": "text", "order": 7}, "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9} } @@ -3798,6 +3800,8 @@ def _get_standard_query_headers(): "id": {"title": "Add", "type": "selection_id", "order": -1}, "label": {"title": "Name", "type": "markdown", "order": 0, "sort": {0: "Asc"}}, "tags": {"title": "Tags", "type": "tags", "order": 2}, + "template": {"title": "Template", "type": "markdown", "order": 6}, + "technique": {"title": "Imaging Technique", "type": "text", "order": 7}, "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9} } @@ -3892,34 +3896,61 @@ def _owlery_query_to_results(owl_query_string: str, short_form: str, return_data # Extract tags from unique_facets tags = '|'.join(term_core.get('unique_facets', [])) - # Extract thumbnail from anatomy_channel_image if available + # Extract thumbnail + template + technique from + # anatomy_channel_image[0].channel_image. v2 prod's SOLR-backed + # processor surfaces all three as separate columns + # (Template_Space, Imaging_Technique, Images); v2-dev was + # previously only getting Images because we only built the + # thumbnail markdown. thumbnail = '' + template = '' + technique = '' anatomy_images = field_data.get('anatomy_channel_image', []) if anatomy_images and len(anatomy_images) > 0: first_img = anatomy_images[0] channel_image = first_img.get('channel_image', {}) image_info = channel_image.get('image', {}) + + # Template — `[label](short_form)` markdown so the + # VFBqueryJsonProcessor's stripMarkdownLink renders a + # clickable link in the V2 Template_Space column. + template_anatomy = image_info.get('template_anatomy', {}) + template_short_form = template_anatomy.get('short_form', '') if template_anatomy else '' + template_label_raw = '' + if template_anatomy: + template_label_raw = template_anatomy.get('symbol') or template_anatomy.get('label', '') + template_label = unquote(template_label_raw) if template_label_raw else '' + if template_label and template_short_form: + template = f"[{template_label}]({template_short_form})" + + # Imaging technique — plain label (V2 Imaging_Technique + # column renders as text; matches how + # get_similar_morphology_part_of et al. emit it). + technique_info = channel_image.get('imaging_technique', {}) + if technique_info: + technique_label_raw = technique_info.get('label', '') + technique = unquote(technique_label_raw) if technique_label_raw else '' + + # Thumbnail — canonical `[![alt](url 'alt')](ref)` form. thumbnail_url = image_info.get('image_thumbnail', '') - if thumbnail_url: # Convert to HTTPS and use non-transparent version thumbnail_url = thumbnail_url.replace('http://', 'https://').replace('thumbnailT.png', 'thumbnail.png') - + # Format thumbnail with proper markdown link (matching Neo4j behavior) - template_anatomy = image_info.get('template_anatomy', {}) - if template_anatomy: - template_label = template_anatomy.get('symbol') or template_anatomy.get('label', '') - template_label = unquote(template_label) + if template_label: anatomy_label = first_img.get('anatomy', {}).get('label', label_text) anatomy_label = unquote(anatomy_label) alt_text = f"{anatomy_label} aligned to {template_label}" thumbnail = f"[![{alt_text}]({thumbnail_url} '{alt_text}')]({class_short_form})" - + # Build row row = { 'id': class_short_form, 'label': f"[{label_text}]({class_short_form})", 'tags': tags, + 'template': template, + 'technique': technique, 'thumbnail': thumbnail } @@ -3955,8 +3986,10 @@ def _owlery_query_to_results(owl_query_string: str, short_form: str, return_data # Convert to DataFrame if requested if return_dataframe: df = pd.DataFrame(rows) - # Apply markdown encoding - columns_to_encode = ['label', 'thumbnail'] + # Apply markdown encoding — template is a `[label](short_form)` + # link and needs the same encoding as label/thumbnail so the V2 + # frontend's link parser renders it consistently. + columns_to_encode = ['label', 'template', 'thumbnail'] df = encode_markdown_links(df, columns_to_encode) return df @@ -4659,19 +4692,34 @@ def get_similar_morphology_nb_exp(expression_short_form: str, return_dataframe=T count_results = vc.nc.commit_list([count_query]) total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0 - main_query = f"""MATCH (n:Individual)-[nblast:has_similar_morphology_to_part_of]-(primary:Individual) WHERE n.short_form = '{expression_short_form}' AND EXISTS(nblast.neuronbridge_score) WITH primary, nblast - OPTIONAL MATCH (primary)<-[:depicts]-(channel:Individual)-[ri:in_register_with]->(:Template)-[:depicts]->(templ:Template) - WITH primary, nblast, channel, ri, templ - OPTIONAL MATCH (channel)-[:is_specified_output_of]->(technique:Class) - WITH primary, nblast, channel, ri, templ, technique - OPTIONAL MATCH (primary)-[:INSTANCEOF]->(typ:Class) WITH CASE WHEN typ IS NULL THEN [] ELSE collect({{short_form: typ.short_form, label: coalesce(typ.label, ''), iri: typ.iri, types: labels(typ), symbol: coalesce(typ.symbol[0], '')}}) END AS types, primary, nblast, channel, ri, templ, technique + # Add `type` as pipe-joined parent class labels (matches v2 prod's + # `Type` column). Aggregate in a CALL subquery scoped to `primary` so + # multi-INSTANCEOF neurons don't multiply rows under the existing + # OPTIONAL MATCH chain. `types` (nested struct) is kept for + # return_dataframe=True consumers but dropped from the dict response + # so the processor's generic List handler doesn't dump HashMap + # toString into the Reference column. + main_query = f"""MATCH (n:Individual)-[nblast:has_similar_morphology_to_part_of]-(primary:Individual) WHERE n.short_form = '{expression_short_form}' AND EXISTS(nblast.neuronbridge_score) + WITH DISTINCT primary, nblast + CALL {{ + WITH primary + OPTIONAL MATCH (primary)-[:INSTANCEOF]->(typ:Class) + RETURN apoc.text.join([l IN collect(DISTINCT typ.label) WHERE l IS NOT NULL AND l <> ''], '|') AS type + }} + CALL {{ + WITH primary + OPTIONAL MATCH (primary)<-[:depicts]-(channel:Individual)-[ri:in_register_with]->(:Template)-[:depicts]->(templ:Template) + OPTIONAL MATCH (channel)-[:is_specified_output_of]->(technique:Class) + WITH ri, templ, technique LIMIT 1 + RETURN ri, templ, technique + }} RETURN primary.short_form AS id, '[' + primary.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + primary.short_form + ')' AS name, apoc.text.join(coalesce(primary.uniqueFacets, []), '|') AS tags, nblast.neuronbridge_score[0] AS score, - types, + type, REPLACE(apoc.text.format("[%s](%s)",[COALESCE(templ.symbol[0],templ.label),templ.short_form]), '[null](null)', '') AS template, - technique.label AS technique, + coalesce(technique.label, '') AS technique, REPLACE(apoc.text.format("[![%s](%s '%s')](%s)",[COALESCE(primary.symbol[0],primary.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), REPLACE(COALESCE(ri.thumbnail[0],""),"thumbnailT.png","thumbnail.png"), COALESCE(primary.symbol[0],primary.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), templ.short_form + "," + primary.short_form]), "[![null]( 'null')](null)", "") AS thumbnail ORDER BY score DESC""" if limit != -1: main_query += f" LIMIT {limit}" @@ -4681,7 +4729,7 @@ def get_similar_morphology_nb_exp(expression_short_form: str, return_dataframe=T if not df.empty: df = encode_markdown_links(df, ['name', 'template', 'thumbnail']) if return_dataframe: return df - return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Match", "type": "markdown", "order": 0}, "score": {"title": "NB Score", "type": "text", "order": 1}, "tags": {"title": "Tags", "type": "tags", "order": 2}, "template": {"title": "Template", "type": "markdown", "order": 3}, "technique": {"title": "Imaging Technique", "type": "text", "order": 4}, "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}}, "rows": [{key: row[key] for key in ["id", "name", "score", "tags", "template", "technique", "thumbnail"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count} + return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Match", "type": "markdown", "order": 0}, "score": {"title": "NB Score", "type": "text", "order": 1}, "tags": {"title": "Tags", "type": "tags", "order": 2}, "type": {"title": "Type", "type": "text", "order": 3}, "template": {"title": "Template", "type": "markdown", "order": 4}, "technique": {"title": "Imaging Technique", "type": "text", "order": 5}, "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}}, "rows": [{key: row[key] for key in ["id", "name", "score", "tags", "type", "template", "technique", "thumbnail"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count} def get_similar_morphology_userdata(upload_id: str, return_dataframe=True, limit: int = -1): @@ -4720,7 +4768,11 @@ def get_painted_domains(template_short_form: str, return_dataframe=True, limit: if not df.empty: df = encode_markdown_links(df, ['name', 'thumbnail']) if return_dataframe: return df - return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Domain", "type": "markdown", "order": 0}, "type": {"title": "Type", "type": "text", "order": 1}, "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 2}}, "rows": [{key: row[key] for key in ["id", "name", "type", "thumbnail"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count} + # description is already populated by the Cypher (coalesce of di/d + # description). v2 prod surfaces it as the `Definition` column via + # COL_HEADER_MAP[description] = Definition — was previously dropped + # because it wasn't listed in headers/rows. + return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Domain", "type": "markdown", "order": 0}, "type": {"title": "Type", "type": "text", "order": 1}, "description": {"title": "Definition", "type": "text", "order": 2}, "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}}, "rows": [{key: row[key] for key in ["id", "name", "type", "description", "thumbnail"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count} def get_dataset_images(dataset_short_form: str, return_dataframe=True, limit: int = -1): @@ -4775,40 +4827,139 @@ def get_all_aligned_images(template_short_form: str, return_dataframe=True, limi return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Image", "type": "markdown", "order": 0}, "tags": {"title": "Tags", "type": "tags", "order": 1}, "type": {"title": "Type", "type": "text", "order": 2}, "template": {"title": "Template", "type": "markdown", "order": 3}, "technique": {"title": "Imaging Technique", "type": "text", "order": 4}, "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}}, "rows": [{key: row[key] for key in ["id", "name", "tags", "type", "template", "technique", "thumbnail"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count} +def _dataset_enrichment_cypher(ds_var: str = "ds") -> str: + """Return CALL subqueries that, given a DataSet bound as ``ds_var``, + aggregate the columns v2 prod surfaces from SOLR: + + pubs — "; "-joined `core.label` (matches v2's Reference column) + license — `[label](short_form)` markdown link + template/technique/thumbnail — one representative channel-image + (matches prod's `apoc.cypher.run('… LIMIT 5')` shape) + image_count — DISTINCT count of individuals sourced to the dataset + + Each branch is wrapped in its own CALL so the outer carrier row stays + one-per-ds; no cartesian blow-up between pubs × license × alignments. + """ + return f""" + CALL {{ + WITH {ds_var} + OPTIONAL MATCH ({ds_var})-[:has_reference]->(p:pub) + RETURN apoc.text.join([l IN collect(DISTINCT coalesce(p.label, p.short_form)) WHERE l IS NOT NULL AND l <> ''], '; ') AS pubs + }} + CALL {{ + WITH {ds_var} + OPTIONAL MATCH ({ds_var})-[:has_license|license]->(lic:License) + WITH lic LIMIT 1 + RETURN REPLACE(apoc.text.format("[%s](%s)", [COALESCE(lic.symbol[0], lic.label), lic.short_form]), '[null](null)', '') AS license + }} + CALL {{ + WITH {ds_var} + OPTIONAL MATCH ({ds_var})<-[:has_source]-(i:Individual)<-[:depicts]-(channel:Individual)-[irw:in_register_with]->(:Template)-[:depicts]->(templ:Template) + OPTIONAL MATCH (channel)-[:is_specified_output_of]->(technique:Class) + WITH i, templ, technique, irw, channel LIMIT 1 + RETURN i, templ, technique, irw + }} + CALL {{ + WITH {ds_var} + OPTIONAL MATCH ({ds_var})<-[:has_source]-(img:Individual) + RETURN count(DISTINCT img) AS image_count + }} + """ + + +def _dataset_return_clause(ds_var: str = "ds") -> str: + """Return the RETURN tail used by both get_aligned_datasets and + get_all_datasets. Matches v2 prod columns: + id, name, pubs(Reference), tags(Gross_Type), license, template, + technique, thumbnail, image_count. + """ + return f""" + RETURN + {ds_var}.short_form AS id, + '[' + coalesce({ds_var}.label, {ds_var}.short_form) + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + {ds_var}.short_form + ')' AS name, + pubs, + apoc.text.join(coalesce({ds_var}.uniqueFacets, []), '|') AS tags, + license, + REPLACE(apoc.text.format("[%s](%s)", [COALESCE(templ.symbol[0], templ.label), templ.short_form]), '[null](null)', '') AS template, + coalesce(technique.label, '') AS technique, + REPLACE(apoc.text.format("[![%s](%s '%s')](%s)", [COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), REPLACE(COALESCE(irw.thumbnail[0], ''), 'thumbnailT.png', 'thumbnail.png'), COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), templ.short_form + "," + coalesce(i.short_form, {ds_var}.short_form)]), "[![null]( 'null')](null)", "") AS thumbnail, + image_count + ORDER BY name + """ + + +def _dataset_response_dict(rows, total_count): + """Shared response shape for get_aligned_datasets and + get_all_datasets — column ordering mirrors v2 prod's + `[Name, Reference, Gross_Type, License, Template_Space, + Imaging_Technique, Images, Image_count]`. + """ + return { + "headers": { + "id": {"title": "ID", "type": "selection_id", "order": -1}, + "name": {"title": "Dataset", "type": "markdown", "order": 0}, + "pubs": {"title": "Reference", "type": "metadata", "order": 1}, + "tags": {"title": "Tags", "type": "tags", "order": 2}, + "license": {"title": "License", "type": "markdown", "order": 3}, + "template": {"title": "Template", "type": "markdown", "order": 4}, + "technique": {"title": "Imaging Technique", "type": "text", "order": 5}, + "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 6}, + "image_count": {"title": "Image_count", "type": "numeric", "order": 7}, + }, + "rows": rows, + "count": total_count, + } + + def get_aligned_datasets(template_short_form: str, return_dataframe=True, limit: int = -1): - """List all datasets aligned to a template.""" + """List all datasets aligned to a template, with the same columns v2 + prod's SOLR-backed chain surfaces (Reference, License, Template_Space, + Imaging_Technique, Images, Image_count). Closes the v2 parity gap + flagged in projects/geppetto-vfbquery-migration/V2_V2DEV_PARITY_SWEEP.md. + """ count_query = f"MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual {{short_form:'{template_short_form}'}})<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds) RETURN count(ds) AS count" count_results = vc.nc.commit_list([count_query]) total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0 - + main_query = f"""MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual {{short_form:'{template_short_form}'}})<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds) - RETURN DISTINCT ds.short_form AS id, '[' + ds.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + ds.short_form + ')' AS name, apoc.text.join(coalesce(ds.uniqueFacets, []), '|') AS tags""" + WITH DISTINCT ds + {_dataset_enrichment_cypher('ds')} + {_dataset_return_clause('ds')}""" if limit != -1: main_query += f" LIMIT {limit}" - + results = vc.nc.commit_list([main_query]) df = pd.DataFrame.from_records(get_dict_cursor()(results)) - if not df.empty: df = encode_markdown_links(df, ['name']) - + if not df.empty: + df = encode_markdown_links(df, ['name', 'license', 'template', 'thumbnail']) + if return_dataframe: return df - return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Dataset", "type": "markdown", "order": 0}, "tags": {"title": "Tags", "type": "tags", "order": 1}}, "rows": [{key: row[key] for key in ["id", "name", "tags"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count} + rows = [{k: row[k] for k in ['id', 'name', 'pubs', 'tags', 'license', 'template', 'technique', 'thumbnail', 'image_count']} + for row in safe_to_dict(df, sort_by_id=False)] + return _dataset_response_dict(rows, total_count) def get_all_datasets(return_dataframe=True, limit: int = -1): - """List all available datasets.""" + """List all available datasets, with the same column shape as + get_aligned_datasets (matches v2 prod's AllDatasets columns).""" count_query = "MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual)<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds) WITH DISTINCT ds RETURN count(ds) AS count" count_results = vc.nc.commit_list([count_query]) total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0 - + main_query = f"""MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual)<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds) - RETURN DISTINCT ds.short_form AS id, '[' + ds.label + '](https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id=' + ds.short_form + ')' AS name, apoc.text.join(coalesce(ds.uniqueFacets, []), '|') AS tags""" + WITH DISTINCT ds + {_dataset_enrichment_cypher('ds')} + {_dataset_return_clause('ds')}""" if limit != -1: main_query += f" LIMIT {limit}" - + results = vc.nc.commit_list([main_query]) df = pd.DataFrame.from_records(get_dict_cursor()(results)) - if not df.empty: df = encode_markdown_links(df, ['name']) + if not df.empty: + df = encode_markdown_links(df, ['name', 'license', 'template', 'thumbnail']) if return_dataframe: return df - return {"headers": {"id": {"title": "ID", "type": "selection_id", "order": -1}, "name": {"title": "Dataset", "type": "markdown", "order": 0}, "tags": {"title": "Tags", "type": "tags", "order": 1}}, "rows": [{key: row[key] for key in ["id", "name", "tags"]} for row in safe_to_dict(df, sort_by_id=False)], "count": total_count} + rows = [{k: row[k] for k in ['id', 'name', 'pubs', 'tags', 'license', 'template', 'technique', 'thumbnail', 'image_count']} + for row in safe_to_dict(df, sort_by_id=False)] + return _dataset_response_dict(rows, total_count) # ===== Publication Query ===== @@ -4835,10 +4986,88 @@ def get_terms_for_pub(pub_short_form: str, return_dataframe=True, limit: int = - # ===== Complex Transgene Expression Query ===== def get_transgene_expression_here(anatomy_short_form: str, return_dataframe=True, limit: int = -1): - """Multi-step query: Owlery subclasses + expression overlaps.""" - # This uses a combination of Owlery and Neo4j similar to get_expression_overlaps_here - # but specifically for transgenes. For now, we'll use the existing expression pattern logic - return get_expression_overlaps_here(anatomy_short_form, return_dataframe, limit) + """Reports of transgene expression in the specified anatomical region. + + Returns one row per overlapping/part-of Expression_pattern with: + - Reference (pubs) — `; `-joined publication labels + - Gross_Type (tags) + - Template_Space / Imaging_Technique / Images (one representative + channel-image per ep, picked via CALL subquery with LIMIT 1) + + Matches the prod XMI Cypher at + geppetto-vfb/master:model/vfb.xmi @dataSources.0/@queries.7 (the + SOLR-backed "Query for exp from anatomy with no warning" path), + flattened to the same column shape v1.10.1 introduced for the + SimilarMorphologyTo* siblings so the geppetto-vfb processor's + COL_HEADER_MAP maps everything cleanly. + + TODO: Expressed_in column. Prod surfaces it from the + anatomy_channel_image[].anatomy.label list — one chip per + representative image. Needs a small design decision on how to + render multiple values in a flat string column; deferred to a + follow-up so the rest of the columns can ship now. + """ + count_query = f""" + MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(:Individual)-[:INSTANCEOF]->(anat:Class) + WHERE anat.short_form = '{anatomy_short_form}' + RETURN COUNT(DISTINCT ep) AS total_count + """ + count_results = vc.nc.commit_list([count_query]) + count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results)) + total_count = count_df['total_count'][0] if not count_df.empty else 0 + + main_query = f""" + MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(:Individual)-[:INSTANCEOF]->(anat:Class) + WHERE anat.short_form = '{anatomy_short_form}' + WITH DISTINCT ep + CALL {{ + WITH ep + OPTIONAL MATCH (ep)<-[:overlaps|part_of]-(:Individual)-[:has_reference|pub]->(p:pub) + RETURN apoc.text.join([l IN collect(DISTINCT coalesce(p.label, p.short_form)) WHERE l IS NOT NULL AND l <> ''], '; ') AS pubs + }} + CALL {{ + WITH ep + OPTIONAL MATCH (ep)<-[:has_source|SUBCLASSOF|INSTANCEOF*]-(i:Individual)<-[:depicts]-(channel:Individual)-[irw:in_register_with]->(:Template)-[:depicts]->(templ:Template) + OPTIONAL MATCH (channel)-[:is_specified_output_of]->(technique:Class) + WITH i, templ, technique, irw LIMIT 1 + RETURN i, templ, technique, irw + }} + RETURN + ep.short_form AS id, + apoc.text.format("[%s](%s)", [ep.label, ep.short_form]) AS name, + apoc.text.join(coalesce(ep.uniqueFacets, []), '|') AS tags, + pubs, + REPLACE(apoc.text.format("[%s](%s)", [COALESCE(templ.symbol[0], templ.label), templ.short_form]), '[null](null)', '') AS template, + coalesce(technique.label, '') AS technique, + REPLACE(apoc.text.format("[![%s](%s '%s')](%s)", [COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), REPLACE(COALESCE(irw.thumbnail[0], ''), 'thumbnailT.png', 'thumbnail.png'), COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), templ.short_form + "," + coalesce(i.short_form, ep.short_form)]), "[![null]( 'null')](null)", "") AS thumbnail + ORDER BY ep.label + """ + if limit != -1: + main_query += f" LIMIT {limit}" + + results = vc.nc.commit_list([main_query]) + df = pd.DataFrame.from_records(get_dict_cursor()(results)) + if not df.empty: + df = encode_markdown_links(df, ['name', 'template', 'thumbnail']) + + if return_dataframe: + return df + return { + "headers": { + "id": {"title": "ID", "type": "selection_id", "order": -1}, + "name": {"title": "Expression Pattern", "type": "markdown", "order": 0}, + "pubs": {"title": "Publications", "type": "metadata", "order": 1}, + "tags": {"title": "Tags", "type": "tags", "order": 2}, + "template": {"title": "Template", "type": "markdown", "order": 3}, + "technique": {"title": "Imaging Technique", "type": "text", "order": 4}, + "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}, + }, + "rows": [ + {k: row[k] for k in ['id', 'name', 'pubs', 'tags', 'template', 'technique', 'thumbnail']} + for row in safe_to_dict(df, sort_by_id=False) + ], + "count": total_count, + } def fill_query_results(term_info, force_refresh: bool = False):