Skip to content
1 change: 1 addition & 0 deletions hts-sys/wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "htslib/htslib/tbx.h"
#include "htslib/htslib/synced_bcf_reader.h"
#include "htslib/htslib/kbitset.h"
#include "htslib/htslib/faidx.h"


// The following functions have to be wrapped here because they are inline in htslib.
Expand Down
13 changes: 13 additions & 0 deletions src/faidx/errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
use snafu::Snafu;
use std::path::PathBuf;

pub type Result<T, E = Error> = std::result::Result<T, E>;

#[derive(Snafu, Debug, PartialEq)]
#[snafu(visibility = "pub")]
pub enum Error {
#[snafu(display("file not found: {}", path.display()))]
FileNotFound { path: PathBuf },
#[snafu(display("invalid (non-unique) characters in path"))]
Comment thread
dlaehnemann marked this conversation as resolved.
NonUnicodePath,
}
146 changes: 146 additions & 0 deletions src/faidx/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
// Copyright 2019 Manuel Landesfeind, Evotec International GmbH
// Licensed under the MIT license (http://opensource.org/licenses/MIT)
// This file may not be copied, modified, or distributed
// except according to those terms.

//!
//! Module for working with faidx-indexed FASTA files.
//!

use std::ffi;
use std::path::Path;
use url::Url;

use crate::htslib;

pub mod errors;
pub use errors::{Error, Result};

fn path_as_bytes<'a, P: 'a + AsRef<Path>>(path: P, must_exist: bool) -> Result<Vec<u8>> {
if path.as_ref().exists() || !must_exist {
Ok(path
.as_ref()
.to_str()
.ok_or(Error::NonUnicodePath)?
.as_bytes()
.to_owned())
} else {
Err(Error::FileNotFound {
path: path.as_ref().to_owned(),
})
}
}
Comment thread
dlaehnemann marked this conversation as resolved.
/// A Fasta reader.
#[derive(Debug)]
pub struct Reader {
inner: *mut htslib::faidx_t,
}

impl Reader {
/// Create a new Reader from path.
///
/// # Arguments
///
/// * `path` - the path to open.
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
Self::new(&path_as_bytes(path, true)?)
}

/// Create a new Reader from URL.
Comment thread
dlaehnemann marked this conversation as resolved.
pub fn from_url(url: &Url) -> Result<Self, Error> {
Self::new(url.as_str().as_bytes())
}

/// Create a new Reader.
///
/// # Arguments
///
/// * `path` - the path to open
fn new(path: &[u8]) -> Result<Self, Error> {
let cpath = ffi::CString::new(path).unwrap();
let inner = unsafe { htslib::fai_load(cpath.as_ptr()) };
Ok(Self { inner })
}
Comment thread
dlaehnemann marked this conversation as resolved.

/// Fetches the sequence and returns it
///
/// # Arguments
///
/// * `name` - the name of the template sequence (e.g., "chr1")
/// * `begin` - the offset within the template sequence (starting with 0)
/// * `end` - the end position to return
pub fn fetch_seq<N: AsRef<str>>(&self, name: N, begin: usize, end: usize) -> String {
let cname = ffi::CString::new(name.as_ref().as_bytes()).unwrap();
let len_out: i32 = 0;
let cseq = unsafe {
let ptr = htslib::faidx_fetch_seq(
self.inner, //*const faidx_t,
cname.as_ptr(), // c_name
begin as ::std::os::raw::c_int, // p_beg_i
end as ::std::os::raw::c_int, // p_end_i
&mut (len_out as ::std::os::raw::c_int), //len
);
ffi::CStr::from_ptr(ptr)
};

cseq.to_str().unwrap().to_owned()
}
}


#[cfg(test)]
mod tests {
use super::*;

fn open_reader() -> Reader {
Reader::from_path(format!("{}/test/test_cram.fa", env!("CARGO_MANIFEST_DIR"))).ok().unwrap()
}

#[test]
fn faidx_open() {
}
Comment thread
dlaehnemann marked this conversation as resolved.

#[test]
fn faidx_read_chr_first_base() {
let r = open_reader();
let seq = r.fetch_seq("chr1", 0 , 0);
assert_eq!(seq.len(), 1);
assert_eq!(seq, "G");
}

#[test]
fn faidx_read_chr_start() {
let r = open_reader();
let seq = r.fetch_seq("chr1", 0 , 9);
assert_eq!(seq.len(), 10);
assert_eq!(seq, "GGGCACAGCC");
}

#[test]
fn faidx_read_chr_between() {
let r = open_reader();
let seq = r.fetch_seq("chr1", 4 , 14);
assert_eq!(seq.len(), 11);
assert_eq!(seq, "ACAGCCTCACC");
}

#[test]
fn faidx_read_chr_end() {
let r = open_reader();
let seq = r.fetch_seq("chr1", 110, 120);
assert_eq!(seq.len(), 10);
assert_eq!(seq, "CCCCTCCGTG");
}

#[test]
fn faidx_read_twice() {
let r = open_reader();
let seq = r.fetch_seq("chr1", 110, 120);
assert_eq!(seq.len(), 10);
assert_eq!(seq, "CCCCTCCGTG");

let seq = r.fetch_seq("chr1", 5, 9);
assert_eq!(seq.len(), 5);
assert_eq!(seq, "CAGCC");
}
}
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ extern crate snafu;

pub mod bam;
pub mod bcf;
pub mod faidx;
pub mod htslib;
pub mod tbx;
pub mod utils;
mod utils;