use of cz1.gbs.core.Digest in project polyGembler by c-zhou.
the class GBS method simulate.
public void simulate(final int sim_sample_index) {
try {
// GBSFastqFileBufferedWriter = getGZIPBufferedWriter(GBSFastqFilePath);
String name, line, fastaFilePath;
ArrayList<Integer> cut, recognization;
StringBuilder chromosome;
FastqRead fastq;
int l;
Digest digestion;
fastaFilePath = fastaFileList.get(sim_sample_index);
StringBuilder oos = new StringBuilder();
// try{
BufferedWriter GBSFastqFileBufferedWriter = getGZIPBufferedWriter(GBSFastqFilePath + "_" + sim_sample_index + ".gz", 65536);
BufferedReader br = getBufferedReader(fastaFilePath);
line = br.readLine();
while (line != null) {
name = parseSampleName(fastaFilePath) + "|" + line.replaceFirst(">", "");
chromosome = new StringBuilder();
while ((line = br.readLine()) != null && !line.startsWith(">")) {
chromosome.append(line);
}
l = chromosome.length();
// System.out.println(getSystemTime()+">>> digest starting...");
digestion = new Digest(enzyme, chromosome.toString());
// System.out.println(getSystemTime()+">>> done.");
cut = digestion.getCutsite();
recognization = digestion.getRecognization();
// System.out.println(getSystemTime()+">>> simulate starting...");
// simulate 5'-3' end genome
int coverage;
for (int i = 2; i < cut.size(); i++) {
if (cut.get(i) - cut.get(i - 1) < MIN_FRAGMENT_SIZE)
continue;
coverage = (int) Math.round(meanDepth + random.nextGaussian() * sdDepth);
for (int j = 0; j < coverage; j++) {
fastq = generateFastqRead(chromosome.substring(cut.get(i - 1), cut.get(i)), recognization.get(i - 1), fastaFileBarcodeMap.get(fastaFilePath), "@" + name + "|" + cut.get(i - 1) + "|" + j, false);
// writeFastqRead(fastq);
oos.setLength(0);
oos.append(fastq.identifier);
oos.append(NLS);
oos.append(fastq.sequence);
oos.append(NLS);
oos.append(fastq.plus);
oos.append(NLS);
oos.append(fastq.quality);
oos.append(NLS);
GBSFastqFileBufferedWriter.write(oos.toString());
}
}
// simulate reverse complementary genome
chromosome.reverse();
for (int i = 0; i < l; i++) chromosome.setCharAt(i, baseComplementaryMap.get(chromosome.charAt(i)));
for (int i = 0; i < cut.size(); i++) cut.set(i, l - cut.get(i));
Collections.reverse(cut);
for (int i = 2; i < cut.size(); i++) {
if (cut.get(i) - cut.get(i - 1) < MIN_FRAGMENT_SIZE)
continue;
coverage = (int) Math.round(meanDepth + random.nextGaussian() * sdDepth);
for (int j = 0; j < coverage; j++) {
fastq = generateFastqRead(chromosome.substring(cut.get(i - 1), cut.get(i)), recognization.get(i - 1), fastaFileBarcodeMap.get(fastaFilePath), "@" + name + "|" + cut.get(i - 1) + "|1", true);
// writeFastqRead(fastq);
oos.setLength(0);
oos.append(fastq.identifier);
oos.append(NLS);
oos.append(fastq.sequence);
oos.append(NLS);
oos.append(fastq.plus);
oos.append(NLS);
oos.append(fastq.quality);
oos.append(NLS);
GBSFastqFileBufferedWriter.write(oos.toString());
}
}
// System.out.println(getSystemTime()+">>> done.");
System.out.println(getSystemTime() + ">>> " + name + " done.");
}
br.close();
GBSFastqFileBufferedWriter.close();
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
// GBSFastqFileBufferedWriter.close();
// } catch (IOException | InterruptedException e) {
// } catch (InterruptedException e) {
// e.printStackTrace();
// System.exit(1);
// }
}
Aggregations