Skip to content

Commit

Permalink
Merge pull request #262 from umccr/feat/crypt4gh-storage
Browse files Browse the repository at this point in the history
feat: Crypt4GH support using LocalStorage
  • Loading branch information
brainstorm authored Sep 11, 2024
2 parents 1156d85 + 4adac26 commit 7a2023e
Show file tree
Hide file tree
Showing 62 changed files with 2,363 additions and 612 deletions.
358 changes: 355 additions & 3 deletions Cargo.lock

Large diffs are not rendered by default.

34 changes: 34 additions & 0 deletions data/c4gh/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Crypt4GH example file

This is just a customised summary for htsget-rs. Please refer to the official [`crypt4gh-rust` documentation](https://ega-archive.github.io/crypt4gh-rust) for further information.

## Keygen

```sh
cargo install crypt4gh
crypt4gh keygen --sk keys/alice.sec --pk keys/alice.pub
crypt4gh keygen --sk keys/bob.sec --pk keys/bob.pub
```

## Encrypt
```
crypt4gh encrypt --sk keys/alice.sec --recipient_pk keys/bob.pub < htsnexus_test_NA12878.bam > htsnexus_test_NA12878.bam.c4gh
```

## Decrypt

```sh
crypt4gh decryptor --range 0-65535 --sk data/crypt4gh/keys/bob.sec \
--sender-pk data/crypt4gh/keys/alice.pub \
< data/crypt4gh/htsnexus_test_NA12878.bam.c4gh \
> out.bam

samtools view out.bam
(...)
SRR098401.61822403 83 11 5009470 60 76M = 5009376 -169 TCTTCTTGCCCTGGTGTTTCGCCGTTCCAGTGCCCCCTGCTGCAGACCATAAAGGATGGGACTTTGTTGAGGTAGG ?B6BDCD@I?JFI?FHHFEAIIAHHDIJHHFIIIIIJEIIFIJGHCIJDDEEHHHDEHHHCIGGEGFDGFGFBEDC X0:i:1 X1:i:0 MD:Z:76 RG:Z:SRR098401 AM:i:37 NM:i:0 SM:i:37 MQ:i:60 XT:A:U BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@B

samtools view: error reading file "out.bam"
samtools view: error closing "out.bam": -1
```

The last samtools view error suggests that the returned bytes do not include BAM file termination.
Binary file added data/c4gh/htsnexus_test_NA12878.bam.bai
Binary file not shown.
Binary file added data/c4gh/htsnexus_test_NA12878.bam.c4gh
Binary file not shown.
Binary file added data/c4gh/htsnexus_test_NA12878.cram.c4gh
Binary file not shown.
Binary file added data/c4gh/htsnexus_test_NA12878.cram.crai
Binary file not shown.
3 changes: 3 additions & 0 deletions data/c4gh/keys/alice.pub
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-----BEGIN CRYPT4GH PUBLIC KEY-----
ToQrpj4UfuLgxZRe1wSGIZtXC19fOEHUHe3RQy63qwM=
-----END CRYPT4GH PUBLIC KEY-----
3 changes: 3 additions & 0 deletions data/c4gh/keys/alice.sec
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-----BEGIN CRYPT4GH PRIVATE KEY-----
YzRnaC12MQAEbm9uZQAEbm9uZQAgxi4tNmUO++HAApv9ryZB9S8QfqrWKKe5CunJuChH5vU=
-----END CRYPT4GH PRIVATE KEY-----
3 changes: 3 additions & 0 deletions data/c4gh/keys/bob.pub
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-----BEGIN CRYPT4GH PUBLIC KEY-----
TyKEXZPnfon6dj1kRXl6HumfZDzo/h60RIc8Wd0Ig2s=
-----END CRYPT4GH PUBLIC KEY-----
3 changes: 3 additions & 0 deletions data/c4gh/keys/bob.sec
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-----BEGIN CRYPT4GH PRIVATE KEY-----
YzRnaC12MQAEbm9uZQAEbm9uZQAg6uLXNqcXAi6FRKzRBk2KBKF4BnmueySZv5MGzKjIPcI=
-----END CRYPT4GH PRIVATE KEY-----
Binary file added data/c4gh/sample1-bcbio-cancer.bcf.c4gh
Binary file not shown.
Binary file added data/c4gh/sample1-bcbio-cancer.bcf.csi
Binary file not shown.
Binary file added data/c4gh/spec-v4.3.vcf.gz.c4gh
Binary file not shown.
Binary file added data/c4gh/spec-v4.3.vcf.gz.tbi
Binary file not shown.
7 changes: 7 additions & 0 deletions htsget-actix/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ repository = "https://github.com/umccr/htsget-rs"
[features]
s3-storage = ["htsget-config/s3-storage", "htsget-search/s3-storage", "htsget-http/s3-storage", "htsget-axum/s3-storage", "htsget-test/s3-storage"]
url-storage = ["htsget-config/url-storage", "htsget-search/url-storage", "htsget-http/url-storage", "htsget-axum/url-storage", "htsget-test/url-storage"]
c4gh-experimental = [
"htsget-config/c4gh-experimental",
"htsget-search/c4gh-experimental",
"htsget-http/c4gh-experimental",
"htsget-axum/c4gh-experimental",
"htsget-test/c4gh-experimental"
]
default = []

[dependencies]
Expand Down
1 change: 1 addition & 0 deletions htsget-actix/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ are exposed in the public API.
This crate has the following features:
* `s3-storage`: used to enable `S3Storage` functionality.
* `url-storage`: used to enable `UrlStorage` functionality.
* `c4gh-experimental`: used to enable `C4GHStorage` functionality.

## Benchmarks
Benchmarks for this crate written using [Criterion.rs][criterion-rs], and aim to compare the performance of this crate with the
Expand Down
3 changes: 2 additions & 1 deletion htsget-actix/benches/request_benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use serde::{Deserialize, Serialize};

use htsget_config::types::{Headers, JsonResponse};
use htsget_http::{PostRequest, Region};
use htsget_test::http::{default_config_fixed_port, default_dir, default_dir_data};
use htsget_test::http::default_config_fixed_port;
use htsget_test::util::{default_dir, default_dir_data};

const REFSERVER_DOCKER_IMAGE: &str = "ga4gh/htsget-refserver:1.5.0";
const BENCHMARK_DURATION_SECONDS: u64 = 30;
Expand Down
4 changes: 0 additions & 4 deletions htsget-actix/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,12 @@ use tracing_actix_web::TracingLogger;
use htsget_config::config::cors::CorsConfig;
pub use htsget_config::config::{Config, DataServerConfig, ServiceInfo, TicketServerConfig, USAGE};
pub use htsget_config::storage::Storage;
use htsget_search::from_storage::HtsGetFromStorage;
use htsget_search::HtsGet;
use htsget_search::LocalStorage;

use crate::handlers::{get, post, reads_service_info, variants_service_info, HttpVersionCompat};

pub mod handlers;

pub type HtsGetStorage<T> = HtsGetFromStorage<LocalStorage<T>>;

/// Represents the actix app state.
pub struct AppState<H: HtsGet> {
pub htsget: Arc<H>,
Expand Down
12 changes: 10 additions & 2 deletions htsget-axum/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,20 @@ s3-storage = [
"htsget-config/s3-storage",
"htsget-search/s3-storage",
"htsget-test/s3-storage",
"htsget-test/aws-mocks"
"htsget-test/aws-mocks",
"htsget-http/s3-storage"
]
url-storage = [
"htsget-config/url-storage",
"htsget-search/url-storage",
"htsget-test/url-storage"
"htsget-test/url-storage",
"htsget-http/url-storage"
]
c4gh-experimental = [
"htsget-config/c4gh-experimental",
"htsget-search/c4gh-experimental",
"htsget-test/c4gh-experimental",
"htsget-http/c4gh-experimental"
]
default = []

Expand Down
1 change: 1 addition & 0 deletions htsget-axum/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ htsget-rs. It also contains the data block server which fetches data from a `Loc
This crate has the following features:
* `s3-storage`: used to enable `S3Storage` functionality.
* `url-storage`: used to enable `UrlStorage` functionality.
* `c4gh-experimental`: used to enable `C4GHStorage` functionality.

## License

Expand Down
4 changes: 4 additions & 0 deletions htsget-config/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ repository = "https://github.com/umccr/htsget-rs"
[features]
s3-storage = []
url-storage = ["dep:reqwest"]
c4gh-experimental = ["dep:crypt4gh"]
default = []

[dependencies]
Expand All @@ -37,6 +38,9 @@ rustls-pki-types = "1"
# url-storage
reqwest = { version = "0.12", features = ["rustls-tls"], default-features = false, optional = true }

# Crypt4GH
crypt4gh = { version = "0.4", git = "https://github.com/EGA-archive/crypt4gh-rust", optional = true }

[dev-dependencies]
serde_json = "1"
figment = { version = "0.10", features = ["test"] }
Expand Down
39 changes: 38 additions & 1 deletion htsget-config/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,40 @@ addressing by setting the `MINIO_DOMAIN` environment variable. [Path][path-addre

See the MinIO deployment [example][minio-deployment] for more information on how to configure htsget-rs and MinIO.

### Crypt4GH

There is experimental support for serving [Crypt4GH][c4gh] encrypted files. This can be enabled by compiling with the
`c4gh-experimental` feature flag.

This allows htsget-rs to read Crypt4GH files and serve them encrypted, directly to the client. In the process of
serving the data, htsget-rs will decrypt the headers of the Crypt4GH files and reencrypt them so that the client can read
them. When the client receives byte ranges from htsget-rs and concatenates them, the output bytes will be Crypt4GH encrypted,
and will need to be decrypted before they can be read. All file formats (BAM, CRAM, VCF, and BCF) are supported using Crypt4GH.

To use this feature, an additional config option called `object_type` under `resolvers.storage` is required,
which allows specifying the private and public keys:

| Option | Description | Type | Default |
|------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------|---------|
| `private_key` | The path to private key which htsget-rs uses to decrypt Crypt4GH data. | Filesystem path | Not Set |
| `recipient_public_key` | The path to the public key which the recipient of the data will use. This is what the client will use to decrypt the returned data, using the corresponding private key. | Filesystem path | Not Set |

For example:

```toml
[[resolvers]]
regex = ".*"
substitution_string = "$0"

[resolvers.storage]
object_type = { private_key = "data/c4gh/keys/bob.sec", recipient_public_key = "data/c4gh/keys/alice.pub" } # pragma: allowlist secret
```

The htsget-rs server expects the Crypt4GH file to end with `.c4gh`, and the index file to be unencrypted. See the [`data/c4gh`][data-c4gh] for examples of file structure.

> [!NOTE]
> This option is currently only supported for `LocalStorage`. The `object_type` will not have an effect if using `S3Storage` or `UrlStorage`.
### As a library

This crate reads config files and environment variables using [figment], and accepts command-line arguments using clap. The main function for this is `from_config`,
Expand All @@ -501,6 +535,7 @@ regex, and changing it by using a substitution string.
This crate has the following features:
* `s3-storage`: used to enable `S3Storage` functionality.
* `url-storage`: used to enable `UrlStorage` functionality.
* `c4gh-experimental`: used to enable `C4GHStorage` functionality.

## License

Expand All @@ -511,4 +546,6 @@ This project is licensed under the [MIT license][license].
[virtual-addressing]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access
[minio-deployment]: ../deploy/examples/minio/README.md
[license]: LICENSE
[minio]: https://min.io/
[minio]: https://min.io/
[c4gh]: https://samtools.github.io/hts-specs/crypt4gh.pdf
[data-c4gh]: ../data/c4gh
12 changes: 12 additions & 0 deletions htsget-config/examples/config-files/c4gh.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# An example of running htsget-rs with Crypt4GH enabled.
# Run with `cargo run -p htsget-axum --features c4gh-experimental -- --config htsget-config/examples/config-files/c4gh.toml`

ticket_server_addr = "127.0.0.1:8080"
data_server_addr = "127.0.0.1:8081"

[[resolvers]]
regex = ".*"
substitution_string = "$0"

[resolvers.storage]
object_type = { private_key = "data/c4gh/keys/bob.sec", recipient_public_key = "data/c4gh/keys/alice.pub" } # pragma: allowlist secret
1 change: 1 addition & 0 deletions htsget-config/src/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,7 @@ mod tests {
Authority::from_static("127.0.0.1:8080"),
"data".to_string(),
"/data".to_string(),
Default::default(),
);
let resolver = Resolver::new(
Storage::Local { local_storage },
Expand Down
12 changes: 12 additions & 0 deletions htsget-config/src/storage/local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use http::uri::Authority;
use serde::{Deserialize, Serialize};

use crate::config::{default_localstorage_addr, default_path, DataServerConfig};
use crate::storage::object::ObjectType;
use crate::tls::KeyPairScheme;
use crate::types::Scheme;

Expand All @@ -23,6 +24,7 @@ pub struct LocalStorage {
authority: Authority,
local_path: String,
path_prefix: String,
object_type: ObjectType,
}

impl LocalStorage {
Expand All @@ -32,12 +34,14 @@ impl LocalStorage {
authority: Authority,
local_path: String,
path_prefix: String,
object_type: ObjectType,
) -> Self {
Self {
scheme,
authority,
local_path,
path_prefix,
object_type,
}
}

Expand All @@ -60,6 +64,11 @@ impl LocalStorage {
pub fn path_prefix(&self) -> &str {
&self.path_prefix
}

/// Get the object type.
pub fn object_type(&self) -> &ObjectType {
&self.object_type
}
}

impl Default for LocalStorage {
Expand All @@ -69,6 +78,7 @@ impl Default for LocalStorage {
authority: default_authority(),
local_path: default_local_path(),
path_prefix: Default::default(),
object_type: Default::default(),
}
}
}
Expand All @@ -80,6 +90,7 @@ impl From<&DataServerConfig> for Option<LocalStorage> {
Authority::from_str(&config.addr().to_string()).ok()?,
config.local_path().to_str()?.to_string(),
config.serve_at().to_string(),
Default::default(),
))
}
}
Expand Down Expand Up @@ -134,6 +145,7 @@ mod tests {
Authority::from_static("127.0.0.1:8080"),
"data".to_string(),
"/data".to_string(),
Default::default(),
);

assert_eq!(result.unwrap(), expected);
Expand Down
1 change: 1 addition & 0 deletions htsget-config/src/storage/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::storage::url::UrlStorageClient;
use crate::types::{Query, Response, Result};

pub mod local;
pub mod object;
#[cfg(feature = "s3-storage")]
pub mod s3;
#[cfg(feature = "url-storage")]
Expand Down
62 changes: 62 additions & 0 deletions htsget-config/src/storage/object/c4gh.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//! Crypt4GH key parsing.
//!
use crate::error::Error::ParseError;
use crate::error::{Error, Result};
use crypt4gh::error::Crypt4GHError;
use crypt4gh::keys::{get_private_key, get_public_key};
use crypt4gh::Keys;
use serde::Deserialize;
use std::path::PathBuf;

/// Config for Crypt4GH keys.
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(try_from = "C4GHPath")]
pub struct C4GHKeys {
keys: Vec<Keys>,
}

impl C4GHKeys {
/// Get the inner value.
pub fn into_inner(self) -> Vec<Keys> {
self.keys
}
}

#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
pub struct C4GHPath {
private_key: PathBuf,
recipient_public_key: PathBuf,
}

impl C4GHPath {
pub fn new(private_key: PathBuf, recipient_public_key: PathBuf) -> Self {
Self {
private_key,
recipient_public_key,
}
}
}

impl TryFrom<C4GHPath> for C4GHKeys {
type Error = Error;

fn try_from(path: C4GHPath) -> Result<Self> {
let private_key = get_private_key(path.private_key, Ok("".to_string()))?;
let recipient_public_key = get_public_key(path.recipient_public_key)?;

Ok(C4GHKeys {
keys: vec![Keys {
method: 0,
privkey: private_key,
recipient_pubkey: recipient_public_key,
}],
})
}
}

impl From<Crypt4GHError> for Error {
fn from(err: Crypt4GHError) -> Self {
ParseError(err.to_string())
}
}
23 changes: 23 additions & 0 deletions htsget-config/src/storage/object/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//! Defines the type of object used by storage.
//!
#[cfg(feature = "c4gh-experimental")]
pub mod c4gh;

#[cfg(feature = "c4gh-experimental")]
use crate::storage::object::c4gh::C4GHKeys;
use serde::{Deserialize, Serialize};

/// An object type, can be regular or Crypt4GH encrypted.
#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq, Eq)]
#[serde(untagged, deny_unknown_fields)]
#[non_exhaustive]
pub enum ObjectType {
#[default]
Regular,
#[cfg(feature = "c4gh-experimental")]
C4GH {
#[serde(flatten, skip_serializing)]
keys: C4GHKeys,
},
}
Loading

0 comments on commit 7a2023e

Please sign in to comment.