Skip to content

Commit 77f6bd5

Browse files
authored
Ported from zig (#475)
1 parent f951bfc commit 77f6bd5

File tree

2 files changed

+168
-0
lines changed

2 files changed

+168
-0
lines changed

bench/algorithm/fasta/6.rs

+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
// Ported from 1.zig
2+
3+
use std::io::{self, BufWriter, StdoutLock, Write};
4+
5+
const MAX_LINE_LENGTH: usize = 60;
6+
const IM: u32 = 139968;
7+
const IA: u32 = 3877;
8+
const IC: u32 = 29573;
9+
10+
struct AminoAcid {
11+
l: u8,
12+
p: f64,
13+
}
14+
15+
struct Random {
16+
seed: u32,
17+
}
18+
19+
impl Random {
20+
fn new() -> Self {
21+
Self { seed: 42 }
22+
}
23+
24+
fn next(&mut self) -> f64 {
25+
self.seed = (self.seed * IA + IC) % IM;
26+
(IM as f64 * self.seed as f64) / IM as f64
27+
}
28+
}
29+
30+
impl Iterator for Random {
31+
type Item = f64;
32+
33+
fn next(&mut self) -> Option<Self::Item> {
34+
Some(self.next())
35+
}
36+
}
37+
38+
fn repeat_and_wrap(
39+
out: &mut BufWriter<StdoutLock<'static>>,
40+
sequence: &[u8],
41+
count: usize,
42+
) -> io::Result<()> {
43+
let n = sequence.len();
44+
let padded_sequence = (0..(n + MAX_LINE_LENGTH))
45+
.map(|i| sequence[i % n])
46+
.collect::<Vec<_>>();
47+
48+
let mut off = 0;
49+
let mut idx = 0;
50+
while idx < count {
51+
let rem = count - idx;
52+
let line_length = MAX_LINE_LENGTH.min(rem);
53+
out.write_all(&padded_sequence[off..off + line_length])?;
54+
out.write_all(&[b'\n'])?;
55+
56+
off += line_length;
57+
if off > n {
58+
off -= n;
59+
}
60+
idx += line_length;
61+
}
62+
Ok(())
63+
}
64+
65+
fn generate_and_wrap(
66+
random: &mut Random,
67+
out: &mut BufWriter<StdoutLock<'static>>,
68+
nucleotides: &[AminoAcid],
69+
count: usize,
70+
) -> io::Result<()> {
71+
let mut cum_prob = 0.0;
72+
let cum_prob_total = nucleotides
73+
.iter()
74+
.map(|&AminoAcid { l: _, p }| {
75+
cum_prob += p;
76+
cum_prob * IM as f64
77+
})
78+
.collect::<Vec<_>>();
79+
80+
let mut line = [0u8; MAX_LINE_LENGTH + 1];
81+
line[MAX_LINE_LENGTH] = b'\n';
82+
83+
let mut idx = 0;
84+
while idx < count {
85+
let rem = count - idx;
86+
let line_length = MAX_LINE_LENGTH.min(rem);
87+
88+
line[..line_length]
89+
.iter_mut()
90+
.zip(&mut *random)
91+
.for_each(|(col, r)| {
92+
let c = cum_prob_total
93+
.iter()
94+
.fold(0, |acc, &n| acc + (n <= r) as usize);
95+
96+
*col = nucleotides[c].l;
97+
});
98+
99+
line[line_length] = b'\n';
100+
out.write_all(&line[..line_length + 1])?;
101+
idx += line_length;
102+
}
103+
Ok(())
104+
}
105+
106+
pub fn main() -> io::Result<()> {
107+
let stdout = io::stdout();
108+
let mut stdout = io::BufWriter::new(stdout.lock());
109+
let n = std::env::args_os()
110+
.nth(1)
111+
.and_then(|s| s.as_os_str().to_str().and_then(|s| s.parse().ok()))
112+
.unwrap_or(1000);
113+
114+
let homo_sapiens_alu =
115+
b"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTC\
116+
AGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCG\
117+
TGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGG\
118+
AGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
119+
120+
stdout.write_all(b">ONE Homo sapiens alu\n".as_slice())?;
121+
repeat_and_wrap(&mut stdout, &homo_sapiens_alu[..], 2 * n)?;
122+
let mut random = Random::new();
123+
124+
let iub_nucleotide_info = &[
125+
AminoAcid { l: b'a', p: 0.27 },
126+
AminoAcid { l: b'c', p: 0.12 },
127+
AminoAcid { l: b'g', p: 0.12 },
128+
AminoAcid { l: b't', p: 0.27 },
129+
AminoAcid { l: b'B', p: 0.02 },
130+
AminoAcid { l: b'D', p: 0.02 },
131+
AminoAcid { l: b'H', p: 0.02 },
132+
AminoAcid { l: b'K', p: 0.02 },
133+
AminoAcid { l: b'M', p: 0.02 },
134+
AminoAcid { l: b'N', p: 0.02 },
135+
AminoAcid { l: b'R', p: 0.02 },
136+
AminoAcid { l: b'S', p: 0.02 },
137+
AminoAcid { l: b'V', p: 0.02 },
138+
AminoAcid { l: b'W', p: 0.02 },
139+
AminoAcid { l: b'Y', p: 0.02 },
140+
];
141+
142+
stdout.write_all(b">TWO IUB ambiguity codes\n".as_slice())?;
143+
generate_and_wrap(&mut random, &mut stdout, iub_nucleotide_info, 3 * n)?;
144+
145+
let homo_sapien_nucleotide_info = &[
146+
AminoAcid {
147+
l: b'a',
148+
p: 0.3029549426680,
149+
},
150+
AminoAcid {
151+
l: b'c',
152+
p: 0.1979883004921,
153+
},
154+
AminoAcid {
155+
l: b'g',
156+
p: 0.1975473066391,
157+
},
158+
AminoAcid {
159+
l: b't',
160+
p: 0.3015094502008,
161+
},
162+
];
163+
164+
stdout.write_all(b">THREE Homo sapiens frequency\n".as_slice())?;
165+
generate_and_wrap(&mut random, &mut stdout, homo_sapien_nucleotide_info, 5 * n)?;
166+
Ok(())
167+
}

bench/bench_rust.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ problems:
3232
- 1c.rs
3333
- 5-m.rs
3434
- 5c-m.rs
35+
- 6.rs
3536
- name: knucleotide
3637
source:
3738
- 8.rs

0 commit comments

Comments
 (0)