1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
//! Module with useful utilities

use RosalindResult;
use constants::FASTA_LABEL_SYMBOL;

fn is_fasta_label(s: &str) -> bool {
    s.contains(FASTA_LABEL_SYMBOL)
}

/// This parses dataset in FASTA format into array of DNA strings
///
/// ## Examples
/// ```
/// use rosalind::utils::*;
///
/// let fasta_dataset = ">Rosalind_1
///     CCTGCGGAAG
///     TCCCACTAAT
///     >Rosalind_2
///     CCATCGGTAG
///     ATATCCATTT
///     >Rosalind_3
///     CCACCCTCGT
///     TGGGAACCTG";
///
/// let expected_dataset = vec![
///     "CCTGCGGAAGTCCCACTAAT",
///     "CCATCGGTAGATATCCATTT",
///     "CCACCCTCGTTGGGAACCTG",
/// ];
///
/// assert_eq!(parse_fasta_dataset(fasta_dataset).unwrap(), expected_dataset);
/// ```
pub fn parse_fasta_dataset(dataset: &str) -> RosalindResult<Vec<String>> {
    let delimiter: char = '\x00';
    let mut buf: String = String::new();

    for dataset_line in dataset.lines() {
        if is_fasta_label(dataset_line) {
            if !buf.is_empty() {
                buf.push(delimiter);
            }
        } else {
            buf.push_str(dataset_line.trim());
        }
    }

    Ok(buf.split(delimiter).map(|s| s.to_string()).collect())
}

#[test]
fn it_should_determine_fasta_label() {
    assert_eq!(is_fasta_label(">Rosalind_1"), true);
    assert_eq!(is_fasta_label("CCTGCGGAAG"), false);
}

#[cfg(test)]
mod tests {
    use super::parse_fasta_dataset;

    #[test]
    fn it_should_parse_fasta_dataset() {
        let fasta_dataset = ">Rosalind_1
            CCTGCGGAAG
            TCCCACTAAT
            >Rosalind_2
            CCATCGGTAG
            ATATCCATTT
            >Rosalind_3
            CCACCCTCGT
            TGGGAACCTG";

        let expected_dataset = vec![
            "CCTGCGGAAGTCCCACTAAT",
            "CCATCGGTAGATATCCATTT",
            "CCACCCTCGTTGGGAACCTG",
        ];

        let dataset = parse_fasta_dataset(fasta_dataset).unwrap();

        assert_eq!(dataset, expected_dataset);
    }
}