Skip to content

Commit 3c6f7a8

Browse files
Piotr DębskiPiotr Dębski
authored andcommitted
add object storage unit tests
1 parent be05e84 commit 3c6f7a8

File tree

1 file changed

+143
-1
lines changed

1 file changed

+143
-1
lines changed

datafusion/bio-format-core/src/object_storage.rs

Lines changed: 143 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ impl CompressionType {
5555
}
5656
}
5757
}
58-
#[derive(Debug)]
58+
59+
#[derive(Debug, PartialEq, Eq)]
5960
pub enum StorageType {
6061
GCS,
6162
S3,
@@ -413,3 +414,144 @@ pub async fn get_remote_stream(
413414
_ => panic!("Invalid object storage type"),
414415
}
415416
}
417+
418+
#[cfg(test)]
419+
mod tests {
420+
use super::*;
421+
use tokio::runtime::Runtime;
422+
423+
#[test]
424+
fn test_compression_type_from_string() {
425+
assert_eq!(
426+
CompressionType::from_string("gz".to_string()),
427+
CompressionType::GZIP
428+
);
429+
assert_eq!(
430+
CompressionType::from_string("bgz".to_string()),
431+
CompressionType::BGZF
432+
);
433+
assert_eq!(
434+
CompressionType::from_string("none".to_string()),
435+
CompressionType::NONE
436+
);
437+
}
438+
439+
#[test]
440+
fn test_storage_type_from_string() {
441+
assert_eq!(StorageType::from_prefix("gs".to_string()), StorageType::GCS);
442+
assert_eq!(StorageType::from_prefix("s3".to_string()), StorageType::S3);
443+
assert_eq!(
444+
StorageType::from_prefix("abfs".to_string()),
445+
StorageType::AZBLOB
446+
);
447+
assert_eq!(
448+
StorageType::from_prefix("local".to_string()),
449+
StorageType::LOCAL
450+
);
451+
assert_eq!(
452+
StorageType::from_prefix("file".to_string()),
453+
StorageType::LOCAL
454+
);
455+
}
456+
457+
#[test]
458+
#[should_panic(expected = "Invalid object storage type")]
459+
fn test_storage_type_from_string_invalid() {
460+
StorageType::from_prefix("invalid".to_string());
461+
}
462+
463+
#[test]
464+
fn test_get_file_path() {
465+
let file_path = "s3://bucket_name/folder/file.txt".to_string();
466+
assert_eq!(get_file_path(file_path), "folder/file.txt".to_string());
467+
}
468+
469+
#[test]
470+
fn test_get_bucket_name() {
471+
let file_path = "s3://bucket_name/folder/file.txt".to_string();
472+
assert_eq!(get_bucket_name(file_path), "bucket_name".to_string());
473+
}
474+
475+
#[test]
476+
fn test_get_storage_type() {
477+
let file_path = "s3://bucket_name/folder/file.txt".to_string();
478+
assert_eq!(get_storage_type(file_path), StorageType::S3);
479+
480+
let local_file_path = "/folder/file.txt".to_string();
481+
assert_eq!(get_storage_type(local_file_path), StorageType::LOCAL);
482+
}
483+
484+
#[test]
485+
fn test_get_compression_type() {
486+
let file_path_gz = "file.vcf.gz".to_string();
487+
assert_eq!(
488+
get_compression_type(file_path_gz, None),
489+
CompressionType::GZIP
490+
);
491+
492+
let file_path_bgz = "file.vcf.bgz".to_string();
493+
assert_eq!(
494+
get_compression_type(file_path_bgz, None),
495+
CompressionType::BGZF
496+
);
497+
498+
let file_path_none = "file.vcf".to_string();
499+
assert_eq!(
500+
get_compression_type(file_path_none, None),
501+
CompressionType::NONE
502+
);
503+
}
504+
505+
#[test]
506+
fn test_get_remote_stream_bgzf_s3() {
507+
let rt = Runtime::new().unwrap();
508+
rt.block_on(async {
509+
let file_path = "s3://gnomad-public-us-east-1/release/4.1/vcf/exomes/gnomad.exomes.v4.1.sites.chr21.vcf.bgz".to_string();
510+
unsafe {
511+
env::set_var("AWS_REGION", "us-east-1");
512+
}
513+
let options = ObjectStorageOptions {
514+
chunk_size: Some(64),
515+
concurrent_fetches: Some(8),
516+
allow_anonymous: true,
517+
enable_request_payer: false,
518+
max_retries: Some(3),
519+
timeout: Some(120),
520+
compression_type: Some(CompressionType::AUTO),
521+
};
522+
let result = get_remote_stream_bgzf(file_path, options).await;
523+
if let Err(err) = &result {
524+
eprintln!("Error fetching remote BGZF stream: {:?}", err);
525+
}
526+
if let Err(err) = result.as_ref() {
527+
eprintln!("Fetch failed with error: {:?}", err);
528+
}
529+
assert!(result.is_ok(), "Fetch failed with error.");
530+
});
531+
}
532+
533+
#[test]
534+
fn test_get_remote_stream_bgzf_gs() {
535+
let rt = Runtime::new().unwrap();
536+
rt.block_on(async {
537+
let file_path = "gs://gcp-public-data--gnomad/release/4.1/vcf/exomes/gnomad.exomes.v4.1.sites.chr21.vcf.bgz".to_string();
538+
let options = ObjectStorageOptions {
539+
chunk_size: Some(64),
540+
concurrent_fetches: Some(8),
541+
allow_anonymous: true,
542+
enable_request_payer: false,
543+
max_retries: Some(3),
544+
timeout: Some(120),
545+
compression_type: Some(CompressionType::AUTO),
546+
};
547+
let result = get_remote_stream_bgzf(file_path, options).await;
548+
if let Err(err) = &result {
549+
eprintln!("Error fetching remote BGZF stream from GCS: {:?}", err);
550+
}
551+
if let Err(err) = result.as_ref() {
552+
eprintln!("Fetch failed with error: {:?}", err);
553+
}
554+
assert!(result.is_ok(), "Fetch failed with error.");
555+
});
556+
}
557+
}

0 commit comments

Comments
 (0)