Skip to content

Commit

Permalink
feat: add file download and size to remaining entities (#92) (#93)
Browse files Browse the repository at this point in the history
* feat: update sequencing-data.csv to have read file_size from S3 (#92)

* refactor: adjust build-sequencing-data and buildHelp (#92)

* style: adjust python file names to be more conventional (#92)

* style: change names in python files to be more conventional (#92)

* feat: add file size to annotation and assembly csvs (#92)

* feat: add file size and name to all entities (#92)

* feat: update alignments.json (#92)

* feat: add download, filename, and file size columns to all entities (#92)

---------

Co-authored-by: hunterckx <[email protected]>
  • Loading branch information
NoopDog and hunterckx authored Dec 2, 2024
1 parent 8a7b94e commit 861b769
Show file tree
Hide file tree
Showing 24 changed files with 10,767 additions and 5,161 deletions.
11 changes: 11 additions & 0 deletions app/apis/catalog/hprc-data-explorer/common/entities.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
export type HPRCDataExplorerEntity =
| HPRCDataExplorerAlignment
| HPRCDataExplorerAnnotation
| HPRCDataExplorerAssembly
| HPRCDataExplorerRawSequencingData;

export interface HPRCDataExplorerRawSequencingData {
accession: string | null;
assembly: string | null;
Expand All @@ -13,6 +19,7 @@ export interface HPRCDataExplorerRawSequencingData {
designDescription: string;
familyId: string | null;
filename: string;
fileSize: string; // This and other fileSize fields should be `number | LABEL.NA` if that's ever restored
filetype: string;
fiveHundredkbPlus: string; //number | LABEL.NA;
fourHundredkbPlus: string; //number | LABEL.NA;
Expand Down Expand Up @@ -69,6 +76,8 @@ export interface HPRCDataExplorerAssembly {
awsFasta: string | null;
familyId: string | null;
fastaSha256: string | null;
filename: string;
fileSize: string;
frag: number | null;
fullDup: number | null;
fullSgl: number | null;
Expand All @@ -90,6 +99,8 @@ export interface HPRCDataExplorerAssembly {
export interface HPRCDataExplorerAnnotation {
annotationType: string;
fileLocation: string;
filename: string;
fileSize: string;
haplotype: string | null;
reference: string | null;
sampleId: string;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ import {
HPRCDataExplorerAlignment,
HPRCDataExplorerAnnotation,
HPRCDataExplorerAssembly,
HPRCDataExplorerEntity,
HPRCDataExplorerRawSequencingData,
LABEL,
} from "../../../../apis/catalog/hprc-data-explorer/common/entities";
import * as C from "../../../../components/index";
import { METADATA_KEY } from "./entities";
Expand Down Expand Up @@ -45,7 +47,21 @@ export const buildAlignmentDownload = (
): React.ComponentProps<typeof C.FileDownload> => {
return {
fileName: alignment.filename,
fileUrl: getAlignmentDownloadUrl(alignment),
fileUrl: getDownloadUrl(alignment.loc),
};
};

/**
* Build props for the annotation download cell.
* @param annotation - Annotation entity.
* @returns Props to be used for the cell.
*/
export const buildAnnotationDownload = (
annotation: HPRCDataExplorerAnnotation
): React.ComponentProps<typeof C.FileDownload> => {
return {
fileName: annotation.filename,
fileUrl: getDownloadUrl(annotation.fileLocation),
};
};

Expand Down Expand Up @@ -75,6 +91,20 @@ export const buildAssembly = (
};
};

/**
* Build props for the assembly download cell.
* @param assembly - Assembly entity.
* @returns Props to be used for the cell.
*/
export const buildAssemblyDownload = (
assembly: HPRCDataExplorerAssembly
): React.ComponentProps<typeof C.FileDownload> => {
return {
fileName: assembly.filename,
fileUrl: assembly.awsFasta ? getDownloadUrl(assembly.awsFasta) : undefined,
};
};

/**
* Build props for the AWS FASTA cell.
* @param assembly - Assembly entity.
Expand Down Expand Up @@ -259,11 +289,11 @@ export const buildFileLocation = (

/**
* Build props for the filename cell.
* @param entity - Raw sequencing data or alignment entity.
* @param entity - Entity.
* @returns Props to be used for the cell.
*/
export const buildFilename = (
entity: HPRCDataExplorerRawSequencingData | HPRCDataExplorerAlignment
entity: HPRCDataExplorerEntity
): React.ComponentProps<typeof C.BasicCell> => {
return {
value: entity.filename,
Expand All @@ -272,14 +302,14 @@ export const buildFilename = (

/**
* Build props for the file size cell.
* @param alignment - Alignment entity.
* @param entity - Entity.
* @returns Props to be used for the cell.
*/
export const buildFileSize = (
alignment: HPRCDataExplorerAlignment
entity: HPRCDataExplorerEntity
): React.ComponentProps<typeof C.BasicCell> => {
return {
value: formatFileSize(alignment.fileSize),
value: entity.fileSize === LABEL.NA ? LABEL.NA : formatFileSize(Number(entity.fileSize)),
};
};

Expand Down Expand Up @@ -900,6 +930,20 @@ export const buildSampleId = (
};
};

/**
* Build props for the sequencing data download cell.
* @param rawSequencingData - Raw sequencing data entity.
* @returns Props to be used for the cell.
*/
export const buildSequencingDataDownload = (
rawSequencingData: HPRCDataExplorerRawSequencingData
): React.ComponentProps<typeof C.FileDownload> => {
return {
fileName: rawSequencingData.filename,
fileUrl: getDownloadUrl(rawSequencingData.path),
};
};

/**
* Build props for the seq kit cell.
* @param rawSequencingData - Raw sequencing data entity.
Expand Down Expand Up @@ -1148,13 +1192,13 @@ function formatPercentage(decimalFraction: number): string {
}

/**
* Get the download URL for an alignment.
* @param alignment - Alignment entity.
* Get a download URL from a given URI, by converting it from an S3 URI or returning it as-is.
* @param uri - URI.
* @returns download URL.
*/
function getAlignmentDownloadUrl(alignment: HPRCDataExplorerAlignment): string {
const s3UriMatch = /^s3:\/\/([^/]+)\/(.*)$/.exec(alignment.loc);
if (!s3UriMatch) return alignment.loc;
function getDownloadUrl(uri: string): string {
const s3UriMatch = /^s3:\/\/([^/]+)\/(.*)$/.exec(uri);
if (!s3UriMatch) return uri;
const [, bucketName, filePath] = s3UriMatch;
return `https://${bucketName}.s3.amazonaws.com/${filePath}`;
}
6 changes: 3 additions & 3 deletions catalog-build/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ pip install -r catalog-build/requirements.txt

Then run the scripts from this directory with:
```shell
python3 build-sequencing-data.py
python3 build-assemblies.py
python3 build-annotations.py
python3 build_sequencing-data.py
python3 build_assemblies.py
python3 build_annotations.py
```


Expand Down
105 changes: 0 additions & 105 deletions catalog-build/build-assemblies.py

This file was deleted.

9 changes: 9 additions & 0 deletions catalog-build/build-catalog.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ async function buildRawSequencingData(): Promise<
deepConsensusVersion: row.DeepConsensus_version,
designDescription: row.design_description,
familyId: parseStringOrNull(row.familyID),
fileSize: parseNumberOrNA(row.file_size).toString(),
filename: row.filename,
filetype: row.filetype,
fiveHundredkbPlus: parseNumberOrNA(row["500kb+"]).toString(),
Expand Down Expand Up @@ -139,6 +140,8 @@ async function buildAssemblies(): Promise<HPRCDataExplorerAssembly[]> {
awsFasta: parseStringOrNull(row.aws_fasta),
familyId: parseStringOrNull(row.familyID),
fastaSha256: parseStringOrNull(row.fasta_sha256),
fileSize: parseNumberOrNA(row.file_size).toString(),
filename: getFileNameFromPath(row.aws_fasta),
frag: parseNumberOrNull(row.frag),
fullDup: parseNumberOrNull(row.full_dup),
fullSgl: parseNumberOrNull(row.full_sgl),
Expand Down Expand Up @@ -170,6 +173,8 @@ async function buildAnnotations(): Promise<HPRCDataExplorerAnnotation[]> {
(row): HPRCDataExplorerAnnotation => ({
annotationType: row.annotation_type,
fileLocation: row.file_location,
fileSize: parseNumberOrNA(row.file_size).toString(),
filename: getFileNameFromPath(row.file_location),
haplotype: parseStringOrNull(row.haplotype),
reference: parseStringOrNull(row.reference),
sampleId: row.sample,
Expand Down Expand Up @@ -215,6 +220,10 @@ async function saveJson(filePath: string, data: unknown): Promise<void> {
await fsp.writeFile(filePath, JSON.stringify(data, undefined, 2));
}

function getFileNameFromPath(p: string): string {
return p.substring(p.lastIndexOf("/") + 1);
}

function parseStringOrNull(value: string): string | null {
return value.trim() || null;
}
Expand Down
56 changes: 0 additions & 56 deletions catalog-build/build-sequencing-data.py

This file was deleted.

14 changes: 0 additions & 14 deletions catalog-build/buildHelp.py

This file was deleted.

Loading

0 comments on commit 861b769

Please sign in to comment.