use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.
the class SamSlop method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null) {
LOG.error("Reference was not specified.");
return -1;
}
if (this.defaultQual.length() != 1) {
LOG.error("default quality should have length==1 " + this.defaultQual);
}
GenomicSequence genomicSequence = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
IndexedFastaSequenceFile indexedFastaSequenceFile = null;
final char defaultQUAL = this.defaultQual.charAt(0);
try {
final String inputName = oneFileOrNull(args);
LOG.info("Loading reference");
indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
sfr = openSamReader(inputName);
final SAMFileHeader header = sfr.getFileHeader();
header.setSortOrder(SortOrder.unsorted);
sfw = writingBamArgs.openSAMFileWriter(outputFile, header, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
final Cigar cigar = rec.getCigar();
if (rec.getReadUnmappedFlag() || cigar == null || cigar.isEmpty() || rec.getReadBases() == SAMRecord.NULL_SEQUENCE || (this.extend5 <= 0 && this.extend3 <= 0)) {
sfw.addAlignment(rec);
continue;
}
final StringBuilder sbs = new StringBuilder(rec.getReadString());
final StringBuilder sbq = new StringBuilder(rec.getBaseQualityString());
if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
final int refPos1 = (this.removeClip ? rec.getAlignmentStart() : rec.getUnclippedStart());
final int endAlignmend1 = (this.removeClip ? rec.getAlignmentEnd() : rec.getUnclippedEnd());
final List<CigarElement> cl = new ArrayList<>(cigar.getCigarElements());
if (!this.removeClip) {
// replace clip S/H by match M
for (int i = 0; i < cl.size(); ++i) {
final CigarElement ce = cl.get(i);
if (ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H) {
cl.set(i, new CigarElement(ce.getLength(), CigarOperator.M));
}
}
}
if (this.extend5 > 0 && refPos1 > 1) {
if (this.removeClip) {
// /remove hard + soft clip 5'
while (!cl.isEmpty()) {
// first
final CigarElement ce = cl.get(0);
// not a clip
if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
break;
}
if (ce.getOperator() == CigarOperator.S) {
sbs.replace(0, ce.getLength(), "");
sbq.replace(0, ce.getLength(), "");
}
// remove first
cl.remove(0);
}
}
final StringBuilder prefix = new StringBuilder(this.extend5);
// /append + soft clip 5'
for (int i = 0; i < this.extend5; ++i) {
int x1 = (refPos1 - 1) - i;
// break if out of genome
if (x1 < 1)
break;
prefix.insert(0, genomicSequence.charAt(x1 - 1));
}
sbs.insert(0, prefix.toString());
// preprend quality
for (int i = 0; i < prefix.length(); ++i) sbq.insert(0, defaultQUAL);
// prepend cigar
cl.add(0, new CigarElement(prefix.length(), CigarOperator.M));
// update start pos
rec.setAlignmentStart(refPos1 - prefix.length());
}
if (this.extend3 > 0 && rec.getAlignmentEnd() < genomicSequence.length()) {
if (this.removeClip) {
// /remove hard + soft clip 3'
while (!cl.isEmpty()) {
// last
final CigarElement ce = cl.get(cl.size() - 1);
// not a clip
if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
break;
}
if (ce.getOperator() == CigarOperator.S) {
sbs.setLength(sbs.length() - ce.getLength());
sbq.setLength(sbq.length() - ce.getLength());
}
// remove last
cl.remove(cl.size() - 1);
}
}
int extend = 0;
for (int pos1 = endAlignmend1 + 1; pos1 <= (endAlignmend1 + this.extend3) && pos1 <= genomicSequence.length(); ++pos1) {
sbs.append(genomicSequence.charAt(pos1 - 1));
sbq.append(defaultQUAL);
++extend;
}
// append cigar
cl.add(new CigarElement(extend, CigarOperator.M));
}
// simplify cigar
int idx = 0;
while (idx + 1 < cl.size()) {
final CigarElement ce1 = cl.get(idx);
final CigarElement ce2 = cl.get(idx + 1);
if (ce1.getOperator() == ce2.getOperator()) {
cl.set(idx, new CigarElement(ce1.getLength() + ce2.getLength(), ce1.getOperator()));
cl.remove(idx + 1);
} else {
idx++;
}
}
rec.setCigar(new Cigar(cl));
rec.setReadString(sbs.toString());
rec.setBaseQualityString(sbq.toString());
final List<SAMValidationError> errors = rec.isValid();
if (errors != null && !errors.isEmpty()) {
for (SAMValidationError err : errors) {
LOG.error(err.getMessage());
}
}
// info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
sfw.addAlignment(rec);
}
progress.finish();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(indexedFastaSequenceFile);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.
the class VcfToBam method doWork.
@Override
public int doWork(final List<String> args) {
if (this.faidx == null) {
LOG.error("No REF defined");
return -1;
}
if (readSize <= 0) {
LOG.error("bad read size");
return -1;
}
if (fragmentSize <= readSize * 2) {
LOG.error("bad fragment size");
return -1;
}
VcfIterator iter = null;
try {
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidx);
iter = super.openVcfIterator(super.oneFileOrNull(args));
run(iter);
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
}
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.
the class Sam2Tsv method doWork.
@Override
public int doWork(final List<String> args) {
if (printAlignment) {
L1 = new StringBuilder();
L2 = new StringBuilder();
L3 = new StringBuilder();
}
SamReader samFileReader = null;
try {
if (refFile != null) {
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile);
}
this.out = openFileOrStdoutAsPrintWriter(outputFile);
this.out.println("#READ_NAME\tFLAG\tCHROM\tREAD_POS\tBASE\tQUAL\tREF_POS\tREF\tOP");
samFileReader = openSamReader(oneFileOrNull(args));
scan(samFileReader);
samFileReader.close();
samFileReader = null;
this.out.flush();
this.out.close();
this.out = null;
return RETURN_OK;
} catch (final Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
CloserUtil.close(samFileReader);
CloserUtil.close(out);
L1 = null;
L2 = null;
L3 = null;
}
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gatk by broadinstitute.
the class ReadUtilsUnitTest method testReadWithNsRefIndexInDeletion.
@Test
public void testReadWithNsRefIndexInDeletion() throws FileNotFoundException {
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(exampleReference));
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(seq.getSequenceDictionary());
final int readLength = 76;
final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "myRead", 0, 8975, readLength);
read.setBases(Utils.dupBytes((byte) 'A', readLength));
read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
read.setCigar("3M414N1D73M");
final int result = ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, 9392, ReadUtils.ClippingTail.LEFT_TAIL);
Assert.assertEquals(result, 2);
}
use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project gatk by broadinstitute.
the class ReadUtilsUnitTest method testReadWithNsRefAfterDeletion.
@Test
public void testReadWithNsRefAfterDeletion() throws FileNotFoundException {
final IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(exampleReference));
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(seq.getSequenceDictionary());
final int readLength = 76;
final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "myRead", 0, 8975, readLength);
read.setBases(Utils.dupBytes((byte) 'A', readLength));
read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
read.setCigar("3M414N1D73M");
final int result = ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, 9393, ReadUtils.ClippingTail.LEFT_TAIL);
Assert.assertEquals(result, 3);
}
Aggregations