Skip to content

Commit 163e40f

Browse files
Piotr DębskiPiotr Dębski
authored andcommitted
add object storage unit tests
1 parent be05e84 commit 163e40f

File tree

1 file changed

+140
-1
lines changed

1 file changed

+140
-1
lines changed

datafusion/bio-format-core/src/object_storage.rs

Lines changed: 140 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ impl CompressionType {
5555
}
5656
}
5757
}
58-
#[derive(Debug)]
58+
59+
#[derive(Debug, PartialEq, Eq)]
5960
pub enum StorageType {
6061
GCS,
6162
S3,
@@ -413,3 +414,141 @@ pub async fn get_remote_stream(
413414
_ => panic!("Invalid object storage type"),
414415
}
415416
}
417+
418+
#[cfg(test)]
419+
mod tests {
420+
use super::*;
421+
use tokio::runtime::Runtime;
422+
423+
#[test]
424+
fn test_compression_type_from_string() {
425+
assert_eq!(
426+
CompressionType::from_string("gz".to_string()),
427+
CompressionType::GZIP
428+
);
429+
assert_eq!(
430+
CompressionType::from_string("bgz".to_string()),
431+
CompressionType::BGZF
432+
);
433+
assert_eq!(
434+
CompressionType::from_string("none".to_string()),
435+
CompressionType::NONE
436+
);
437+
}
438+
439+
#[test]
440+
fn test_storage_type_from_string() {
441+
assert_eq!(
442+
StorageType::from_prefix("gs".to_string()),
443+
StorageType::GCS
444+
);
445+
assert_eq!(
446+
StorageType::from_prefix("s3".to_string()),
447+
StorageType::S3
448+
);
449+
assert_eq!(
450+
StorageType::from_prefix("abfs".to_string()),
451+
StorageType::AZBLOB
452+
);
453+
assert_eq!(
454+
StorageType::from_prefix("local".to_string()),
455+
StorageType::LOCAL
456+
);
457+
assert_eq!(
458+
StorageType::from_prefix("file".to_string()),
459+
StorageType::LOCAL
460+
);
461+
}
462+
463+
#[test]
464+
#[should_panic(expected = "Invalid object storage type")]
465+
fn test_storage_type_from_string_invalid() {
466+
StorageType::from_prefix("invalid".to_string());
467+
}
468+
469+
#[test]
470+
fn test_get_file_path() {
471+
let file_path = "s3://bucket_name/folder/file.txt".to_string();
472+
assert_eq!(get_file_path(file_path), "folder/file.txt".to_string());
473+
}
474+
475+
#[test]
476+
fn test_get_bucket_name() {
477+
let file_path = "s3://bucket_name/folder/file.txt".to_string();
478+
assert_eq!(get_bucket_name(file_path), "bucket_name".to_string());
479+
}
480+
481+
#[test]
482+
fn test_get_storage_type() {
483+
let file_path = "s3://bucket_name/folder/file.txt".to_string();
484+
assert_eq!(get_storage_type(file_path), StorageType::S3);
485+
486+
let local_file_path = "/folder/file.txt".to_string();
487+
assert_eq!(get_storage_type(local_file_path), StorageType::LOCAL);
488+
}
489+
490+
#[test]
491+
fn test_get_compression_type() {
492+
let file_path_gz = "file.vcf.gz".to_string();
493+
assert_eq!(get_compression_type(file_path_gz, None), CompressionType::GZIP);
494+
495+
let file_path_bgz = "file.vcf.bgz".to_string();
496+
assert_eq!(get_compression_type(file_path_bgz, None), CompressionType::BGZF);
497+
498+
let file_path_none = "file.vcf".to_string();
499+
assert_eq!(get_compression_type(file_path_none, None), CompressionType::NONE);
500+
}
501+
502+
#[test]
503+
fn test_get_remote_stream_bgzf_s3() {
504+
let rt = Runtime::new().unwrap();
505+
rt.block_on(async {
506+
let file_path = "s3://gnomad-public-us-east-1/release/4.1/vcf/exomes/gnomad.exomes.v4.1.sites.chr21.vcf.bgz".to_string();
507+
unsafe {
508+
env::set_var("AWS_REGION", "us-east-1");
509+
}
510+
let options = ObjectStorageOptions {
511+
chunk_size: Some(64),
512+
concurrent_fetches: Some(8),
513+
allow_anonymous: true,
514+
enable_request_payer: false,
515+
max_retries: Some(3),
516+
timeout: Some(120),
517+
compression_type: Some(CompressionType::AUTO),
518+
};
519+
let result = get_remote_stream_bgzf(file_path, options).await;
520+
if let Err(err) = &result {
521+
eprintln!("Error fetching remote BGZF stream: {:?}", err);
522+
}
523+
if let Err(err) = result.as_ref() {
524+
eprintln!("Fetch failed with error: {:?}", err);
525+
}
526+
assert!(result.is_ok(), "Fetch failed with error.");
527+
});
528+
}
529+
530+
#[test]
531+
fn test_get_remote_stream_bgzf_gs() {
532+
let rt = Runtime::new().unwrap();
533+
rt.block_on(async {
534+
let file_path = "gs://gcp-public-data--gnomad/release/4.1/vcf/exomes/gnomad.exomes.v4.1.sites.chr21.vcf.bgz".to_string();
535+
let options = ObjectStorageOptions {
536+
chunk_size: Some(64),
537+
concurrent_fetches: Some(8),
538+
allow_anonymous: true,
539+
enable_request_payer: false,
540+
max_retries: Some(3),
541+
timeout: Some(120),
542+
compression_type: Some(CompressionType::AUTO),
543+
};
544+
let result = get_remote_stream_bgzf(file_path, options).await;
545+
if let Err(err) = &result {
546+
eprintln!("Error fetching remote BGZF stream from GCS: {:?}", err);
547+
}
548+
if let Err(err) = result.as_ref() {
549+
eprintln!("Fetch failed with error: {:?}", err);
550+
}
551+
assert!(result.is_ok(), "Fetch failed with error.");
552+
});
553+
}
554+
}

0 commit comments

Comments
 (0)