use of htsjdk.samtools.fastq.FastqRecord in project jvarkit by lindenb.
the class PadEmptyFastq method copyTo.
private void copyTo(final FastqReader r, final FastqWriter w) {
int padLength = this.N;
long nReads = 0L;
long nFill = 0L;
String fillN = null;
String fillQ = null;
r.setValidationStringency(ValidationStringency.LENIENT);
while (r.hasNext()) {
FastqRecord rec = r.next();
if (++nReads % 1E6 == 0) {
LOG.info("Read " + nReads + " reads. empty reads=" + nFill);
}
if (rec.getReadString().isEmpty()) {
++nFill;
if (padLength < 1) {
padLength = DEFAULT_LENGTH;
}
if (fillN == null) {
StringBuilder b1 = new StringBuilder();
while (b1.length() < padLength) b1.append("N");
fillN = b1.toString();
fillQ = fillN.replace('N', '#');
}
rec = new FastqRecord(rec.getReadName(), fillN, rec.getBaseQualityHeader(), fillQ);
} else if (padLength < 1) {
padLength = rec.getReadString().length();
}
w.write(rec);
}
LOG.info("Done. Read " + nReads + " reads. empty reads=" + nFill);
}
use of htsjdk.samtools.fastq.FastqRecord in project jvarkit by lindenb.
the class SamRetrieveSeqAndQual method doWork.
@Override
public int doWork(List<String> args) {
FastqReader[] fastqReaders = null;
SamReader samReader = null;
SAMFileWriter samWriter = null;
SAMRecordIterator iter = null;
try {
if (fastqFin == null) {
LOG.error("undefined fastq file");
return -1;
} else {
LOG.info("opening " + fastqFin);
FastqReader r1 = new FastqReader(fastqFin);
if (fastqRin == null) {
fastqReaders = new FastqReader[] { r1 };
} else {
LOG.info("opening " + fastqRin);
FastqReader r2 = new FastqReader(fastqRin);
fastqReaders = new FastqReader[] { r1, r2 };
}
}
samReader = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader.SortOrder sortOrder = samReader.getFileHeader().getSortOrder();
if (sortOrder == null) {
LOG.warning("undefined sort order read are in the sam order");
} else if (!sortOrder.equals(SAMFileHeader.SortOrder.queryname)) {
LOG.error("Bad Sort Order. Sort this input on read name");
return -1;
}
SAMFileHeader header = samReader.getFileHeader().clone();
SAMProgramRecord prg = header.createProgramRecord();
prg.setCommandLine(this.getProgramCommandLine());
prg.setProgramName(this.getProgramName());
prg.setProgramVersion(this.getVersion());
samWriter = writingBamArgs.openSAMFileWriter(bamOut, header, true);
iter = samReader.iterator();
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
FastqRecord[] currFastq = new FastqRecord[] { null, null };
while (iter.hasNext()) {
SAMRecord rec = progress.watch(iter.next());
String readName = rec.getReadName();
int fastq_index = 0;
if (rec.getReadPairedFlag()) {
if (fastqReaders.length != 2) {
LOG.error("Not paired but number of fastq!=2");
return -1;
}
fastq_index = (rec.getFirstOfPairFlag() ? 0 : 1);
} else {
if (fastqReaders.length != 1) {
LOG.error("Not paired but number of fastq!=1");
return -1;
}
fastq_index = 0;
}
if (sortOrder == SAMFileHeader.SortOrder.queryname) {
while (currFastq[fastq_index] == null || normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) < 0) {
if (!fastqReaders[fastq_index].hasNext()) {
LOG.error("Read Missing for " + readName);
return -1;
}
currFastq[fastq_index] = fastqReaders[fastq_index].next();
if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) > 0) {
LOG.error("Read Missing for " + readName);
return -1;
}
}
} else {
if (!fastqReaders[fastq_index].hasNext()) {
LOG.error("Read Missing for " + readName);
return -1;
}
currFastq[fastq_index] = fastqReaders[fastq_index].next();
}
if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) != 0) {
LOG.error("Read Missing/Error for " + readName + " current:" + currFastq[fastq_index].getReadName());
return -1;
}
String fastqBases = currFastq[fastq_index].getReadString();
String fastqQuals = currFastq[fastq_index].getBaseQualityString();
/* handle orientation */
if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
fastqBases = AcidNucleics.reverseComplement(fastqBases);
StringBuilder sb = new StringBuilder(fastqQuals.length());
for (int i = fastqQuals.length() - 1; i >= 0; --i) sb.append(fastqQuals.charAt(i));
fastqQuals = sb.toString();
}
/* remove hard clip */
Cigar cigar = rec.getCigar();
if (cigar != null) {
List<CigarElement> ceList = cigar.getCigarElements();
if (!ceList.isEmpty()) {
CigarElement ce = ceList.get(ceList.size() - 1);
if (ce.getOperator() == CigarOperator.HARD_CLIP) {
fastqBases = fastqBases.substring(0, fastqBases.length() - ce.getLength());
fastqQuals = fastqQuals.substring(0, fastqQuals.length() - ce.getLength());
}
ce = ceList.get(0);
if (ce.getOperator() == CigarOperator.HARD_CLIP) {
fastqBases = fastqBases.substring(ce.getLength());
fastqQuals = fastqQuals.substring(ce.getLength());
}
}
}
rec.setBaseQualityString(fastqQuals);
rec.setReadString(fastqBases);
samWriter.addAlignment(rec);
}
progress.finish();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
if (fastqReaders != null)
for (FastqReader r : fastqReaders) CloserUtil.close(r);
CloserUtil.close(iter);
CloserUtil.close(samReader);
CloserUtil.close(samWriter);
}
}
use of htsjdk.samtools.fastq.FastqRecord in project jvarkit by lindenb.
the class SamExtractClip method run.
private void run(final SamReader r, final FastqWriter out) {
int[] startend = new int[2];
final SAMFileHeader header = r.getFileHeader();
// w=swf.make(header, System.out);
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
SAMRecordIterator it = r.iterator();
while (it.hasNext()) {
final SAMRecord rec = progress.watch(it.next());
if (rec.getReadUnmappedFlag())
continue;
if (rec.isSecondaryOrSupplementary())
continue;
if (rec.getDuplicateReadFlag())
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null || cigar.isEmpty())
continue;
String suffix = "";
if (rec.getReadPairedFlag()) {
suffix = (rec.getFirstOfPairFlag() ? "/1" : "/2");
}
startend[0] = 0;
startend[1] = rec.getReadLength();
boolean found = false;
for (int side = 0; side < 2; ++side) {
final CigarElement ce = cigar.getCigarElement(side == 0 ? 0 : cigar.numCigarElements() - 1);
if (!ce.getOperator().equals(CigarOperator.S))
continue;
if (ce.getLength() < min_clip_length)
continue;
found = true;
final String clippedSeq;
final String clippedQual;
if (side == 0) {
startend[0] = ce.getLength();
clippedSeq = rec.getReadString().substring(0, startend[0]);
clippedQual = rec.getBaseQualityString().substring(0, startend[0]);
} else {
startend[1] = rec.getReadLength() - ce.getLength();
clippedSeq = rec.getReadString().substring(startend[1]);
clippedQual = rec.getBaseQualityString().substring(startend[1]);
}
out.write(new FastqRecord(rec.getReadName() + suffix + ";" + side + ";" + rec.getReferenceName() + ";" + rec.getAlignmentStart() + ";" + rec.getFlags() + ";" + rec.getCigarString() + ";" + (side == 0 ? "5'" : "3'"), clippedSeq, "", clippedQual));
}
if (!found)
continue;
String bases = rec.getReadString();
String qual = rec.getBaseQualityString();
if (rec.getReadNegativeStrandFlag()) {
bases = AcidNucleics.reverseComplement(bases);
qual = new StringBuilder(qual).reverse().toString();
}
if (print_original_read) {
out.write(new FastqRecord(rec.getReadName() + suffix, bases, "", qual));
}
if (print_clipped_read) {
out.write(new FastqRecord(rec.getReadName() + suffix + ":clipped", bases.substring(startend[0], startend[1]), "", qual.substring(startend[0], startend[1])));
}
}
it.close();
progress.finish();
}
use of htsjdk.samtools.fastq.FastqRecord in project jvarkit by lindenb.
the class AbstractFastqReader method next.
public FastqRecord next() {
if (!hasNext()) {
throw new NoSuchElementException("next() called when !hasNext()");
}
final FastqRecord rec = nextRecord;
nextRecord = null;
_has_next_called = false;
seqHeader = null;
return rec;
}
use of htsjdk.samtools.fastq.FastqRecord in project jvarkit by lindenb.
the class FourLinesFastqReaderTest method test2.
public void test2() throws Exception {
final String fastqs = "@IL31_4368:1:1:996:8507/2\n" + "\n" + "+\n" + "\n" + "@IL31_4368:1:1:996:21421/2\n" + "CAAAAACTTTCACTTTACCTGCCGGGTTTCCCAGTTTACATTCCACTGTTTGAC\n" + "+\n" + ">DBDDB,B9BAA4AAB7BB?7BBB=91;+*@;5<87+*=/*@@?9=73=.7)7*\n";
FourLinesFastqReader r = new FourLinesFastqReader(new ByteArrayInputStream(fastqs.getBytes()));
r.setValidationStringency(ValidationStringency.LENIENT);
Assert.assertTrue(r.hasNext());
FastqRecord rec = r.next();
Assert.assertNotNull(rec);
Assert.assertTrue(rec.getBaseQualityString().isEmpty());
Assert.assertTrue(rec.getReadString().isEmpty());
Assert.assertTrue(r.hasNext());
rec = r.next();
Assert.assertNotNull(rec);
Assert.assertFalse(rec.getBaseQualityString().isEmpty());
Assert.assertFalse(rec.getReadString().isEmpty());
Assert.assertFalse(r.hasNext());
r.close();
}
Aggregations