use of htsjdk.samtools.SAMProgramRecord in project gatk by broadinstitute.
the class GATKTool method getHeaderForSAMWriter.
/**
* Returns the SAM header suitable for writing SAM/BAM/CRAM files produced by this tool.
*
* The default implementation calls {@link #getHeaderForReads} (and makes an empty header if that call returns null)
* and optionally adds program tag to the header with a program version {@link #getVersion()}, program name {@link #getToolName()}
* and command line {@link #getCommandLine()}.
*
* Subclasses may override.
*
* @return SAM header for the SAM writer with (optionally, if {@link #addOutputSAMProgramRecord} is true) program record appropriately.
*/
protected SAMFileHeader getHeaderForSAMWriter() {
final SAMFileHeader header = getHeaderForReads() == null ? new SAMFileHeader() : getHeaderForReads();
if (addOutputSAMProgramRecord) {
final SAMProgramRecord programRecord = new SAMProgramRecord(createProgramGroupID(header));
programRecord.setProgramVersion(getVersion());
programRecord.setCommandLine(getCommandLine());
programRecord.setProgramName(getToolName());
header.addProgramRecord(programRecord);
}
return header;
}
use of htsjdk.samtools.SAMProgramRecord in project polyGembler by c-zhou.
the class SamFileSplit method run.
@Override
public void run() {
// TODO Auto-generated method stub
Utils.makeOutputDir(bam_out);
final File[] beds = new File(bed_in).listFiles();
final String[] out_prefix = new String[beds.length];
for (int i = 0; i < beds.length; i++) {
out_prefix[i] = bam_out + "/" + beds[i].getName().replaceAll(".bed$", "");
Utils.makeOutputDir(out_prefix[i]);
}
final File[] bams = new File(bam_in).listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith(".bam");
}
});
this.initial_thread_pool();
for (File bam : bams) {
executor.submit(new Runnable() {
private File bam;
@Override
public void run() {
// TODO Auto-generated method stub
try {
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(bam);
final SAMFileHeader header = inputSam.getFileHeader();
final SAMRecordIterator iter = inputSam.iterator();
final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
final SAMFileWriter[] outputSam = new SAMFileWriter[beds.length];
final SAMSequenceDictionary[] seqdics = new SAMSequenceDictionary[beds.length];
final Map<String, Integer> outMap = new HashMap<String, Integer>();
final String out = bam.getName();
for (int i = 0; i < beds.length; i++) {
Set<String> bed_seq = new HashSet<String>();
String tmp;
BufferedReader br = new BufferedReader(new FileReader(beds[i]));
String line;
while ((line = br.readLine()) != null) {
tmp = line.split("\\s+")[0];
bed_seq.add(tmp);
outMap.put(tmp, i);
}
br.close();
final SAMFileHeader header_i = new SAMFileHeader();
final SAMSequenceDictionary seqdic_i = new SAMSequenceDictionary();
header_i.setAttribute("VN", header.getAttribute("VN"));
header_i.setAttribute("SO", header.getAttribute("SO"));
List<SAMSequenceRecord> seqs = seqdic.getSequences();
for (SAMSequenceRecord seq : seqs) if (bed_seq.contains(seq.getSequenceName()))
seqdic_i.addSequence(seq);
header_i.setSequenceDictionary(seqdic_i);
for (SAMReadGroupRecord rg : header.getReadGroups()) header_i.addReadGroup(rg);
for (SAMProgramRecord pg : header.getProgramRecords()) header_i.addProgramRecord(pg);
outputSam[i] = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_i, true, new File(out_prefix[i] + "/" + out));
seqdics[i] = seqdic_i;
}
Set<String> refs = outMap.keySet();
String ref;
int f;
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (refs.contains(ref = rec.getReferenceName())) {
f = outMap.get(ref);
rec.setReferenceIndex(seqdics[f].getSequenceIndex(ref));
outputSam[f].addAlignment(rec);
}
}
iter.close();
inputSam.close();
for (int i = 0; i < outputSam.length; i++) outputSam[i].close();
myLogger.info(out + " return true");
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
public Runnable init(File bam) {
this.bam = bam;
return (this);
}
}.init(bam));
}
this.waitFor();
}
use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.
the class PcrClipReads method run.
private int run(final SamReader reader) {
final SAMFileHeader header1 = reader.getFileHeader();
final SAMFileHeader header2 = header1.clone();
Optional<SAMProgramRecord> samProgramRecord = Optional.empty();
if (this.programId) {
final SAMProgramRecord spr = header2.createProgramRecord();
samProgramRecord = Optional.of(spr);
spr.setProgramName(PcrClipReads.class.getSimpleName());
spr.setProgramVersion(this.getGitHash());
spr.setCommandLine(getProgramCommandLine().replace('\t', ' '));
}
header2.addComment(getProgramName() + " " + getVersion() + ": Processed with " + getProgramCommandLine());
header2.setSortOrder(SortOrder.unsorted);
SAMFileWriter sw = null;
SAMRecordIterator iter = null;
try {
sw = this.writingBamArgs.openSAMFileWriter(outputFile, header2, false);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1).logger(LOG);
iter = reader.iterator();
while (iter.hasNext()) {
SAMRecord rec = progress.watch(iter.next());
if (this.onlyFlag != -1 && (rec.getFlags() & this.onlyFlag) != 0) {
sw.addAlignment(rec);
continue;
}
if (rec.getReadUnmappedFlag()) {
sw.addAlignment(rec);
continue;
}
final Interval fragment = findInterval(rec);
if (fragment == null) {
rec.setMappingQuality(0);
sw.addAlignment(rec);
continue;
}
// strand is '-' and overap in 5' of PCR fragment
if (rec.getReadNegativeStrandFlag() && fragment.getStart() < rec.getAlignmentStart() && rec.getAlignmentStart() < fragment.getEnd()) {
rec.setMappingQuality(0);
sw.addAlignment(rec);
continue;
}
// strand is '+' and overap in 3' of PCR fragment
if (!rec.getReadNegativeStrandFlag() && fragment.getStart() < rec.getAlignmentEnd() && rec.getAlignmentEnd() < fragment.getEnd()) {
rec.setMappingQuality(0);
sw.addAlignment(rec);
continue;
}
// contained int PCR fragment
if (rec.getAlignmentStart() >= fragment.getStart() && rec.getAlignmentEnd() <= fragment.getEnd()) {
sw.addAlignment(rec);
continue;
}
final ReadClipper readClipper = new ReadClipper();
if (samProgramRecord.isPresent()) {
readClipper.setProgramGroup(samProgramRecord.get().getId());
}
rec = readClipper.clip(rec, fragment);
sw.addAlignment(rec);
}
progress.finish();
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sw);
}
}
use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.
the class SamRetrieveSeqAndQual method doWork.
@Override
public int doWork(List<String> args) {
FastqReader[] fastqReaders = null;
SamReader samReader = null;
SAMFileWriter samWriter = null;
SAMRecordIterator iter = null;
try {
if (fastqFin == null) {
LOG.error("undefined fastq file");
return -1;
} else {
LOG.info("opening " + fastqFin);
FastqReader r1 = new FastqReader(fastqFin);
if (fastqRin == null) {
fastqReaders = new FastqReader[] { r1 };
} else {
LOG.info("opening " + fastqRin);
FastqReader r2 = new FastqReader(fastqRin);
fastqReaders = new FastqReader[] { r1, r2 };
}
}
samReader = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader.SortOrder sortOrder = samReader.getFileHeader().getSortOrder();
if (sortOrder == null) {
LOG.warning("undefined sort order read are in the sam order");
} else if (!sortOrder.equals(SAMFileHeader.SortOrder.queryname)) {
LOG.error("Bad Sort Order. Sort this input on read name");
return -1;
}
SAMFileHeader header = samReader.getFileHeader().clone();
SAMProgramRecord prg = header.createProgramRecord();
prg.setCommandLine(this.getProgramCommandLine());
prg.setProgramName(this.getProgramName());
prg.setProgramVersion(this.getVersion());
samWriter = writingBamArgs.openSAMFileWriter(bamOut, header, true);
iter = samReader.iterator();
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
FastqRecord[] currFastq = new FastqRecord[] { null, null };
while (iter.hasNext()) {
SAMRecord rec = progress.watch(iter.next());
String readName = rec.getReadName();
int fastq_index = 0;
if (rec.getReadPairedFlag()) {
if (fastqReaders.length != 2) {
LOG.error("Not paired but number of fastq!=2");
return -1;
}
fastq_index = (rec.getFirstOfPairFlag() ? 0 : 1);
} else {
if (fastqReaders.length != 1) {
LOG.error("Not paired but number of fastq!=1");
return -1;
}
fastq_index = 0;
}
if (sortOrder == SAMFileHeader.SortOrder.queryname) {
while (currFastq[fastq_index] == null || normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) < 0) {
if (!fastqReaders[fastq_index].hasNext()) {
LOG.error("Read Missing for " + readName);
return -1;
}
currFastq[fastq_index] = fastqReaders[fastq_index].next();
if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) > 0) {
LOG.error("Read Missing for " + readName);
return -1;
}
}
} else {
if (!fastqReaders[fastq_index].hasNext()) {
LOG.error("Read Missing for " + readName);
return -1;
}
currFastq[fastq_index] = fastqReaders[fastq_index].next();
}
if (normalizeFastqName(currFastq[fastq_index].getReadName()).compareTo(readName) != 0) {
LOG.error("Read Missing/Error for " + readName + " current:" + currFastq[fastq_index].getReadName());
return -1;
}
String fastqBases = currFastq[fastq_index].getReadString();
String fastqQuals = currFastq[fastq_index].getBaseQualityString();
/* handle orientation */
if (!rec.getReadUnmappedFlag() && rec.getReadNegativeStrandFlag()) {
fastqBases = AcidNucleics.reverseComplement(fastqBases);
StringBuilder sb = new StringBuilder(fastqQuals.length());
for (int i = fastqQuals.length() - 1; i >= 0; --i) sb.append(fastqQuals.charAt(i));
fastqQuals = sb.toString();
}
/* remove hard clip */
Cigar cigar = rec.getCigar();
if (cigar != null) {
List<CigarElement> ceList = cigar.getCigarElements();
if (!ceList.isEmpty()) {
CigarElement ce = ceList.get(ceList.size() - 1);
if (ce.getOperator() == CigarOperator.HARD_CLIP) {
fastqBases = fastqBases.substring(0, fastqBases.length() - ce.getLength());
fastqQuals = fastqQuals.substring(0, fastqQuals.length() - ce.getLength());
}
ce = ceList.get(0);
if (ce.getOperator() == CigarOperator.HARD_CLIP) {
fastqBases = fastqBases.substring(ce.getLength());
fastqQuals = fastqQuals.substring(ce.getLength());
}
}
}
rec.setBaseQualityString(fastqQuals);
rec.setReadString(fastqBases);
samWriter.addAlignment(rec);
}
progress.finish();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
if (fastqReaders != null)
for (FastqReader r : fastqReaders) CloserUtil.close(r);
CloserUtil.close(iter);
CloserUtil.close(samReader);
CloserUtil.close(samWriter);
}
}
use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.
the class FindNewSpliceSites method doWork.
@Override
public int doWork(final List<String> args) {
if (this.knownGeneUri == null || this.knownGeneUri.trim().isEmpty()) {
LOG.error("known Gene file undefined");
return -1;
}
SamReader sfr = null;
try {
final Pattern tab = Pattern.compile("[\t]");
{
LOG.info("Opening " + this.knownGeneUri);
LineIterator r = IOUtils.openURIForLineIterator(this.knownGeneUri);
while (r.hasNext()) {
final KnownGene g = new KnownGene(tab.split(r.next()));
// need spliced one
if (g.getExonCount() == 1)
continue;
final Interval interval = new Interval(g.getContig(), g.getTxStart() + 1, g.getTxEnd());
List<KnownGene> L = this.knownGenesMap.get(interval);
if (L == null) {
L = new ArrayList<>();
this.knownGenesMap.put(interval, L);
}
L.add(g);
}
LOG.info("Done reading: " + this.knownGeneUri);
}
sfr = super.openSamReader(oneFileOrNull(args));
SAMFileHeader header = sfr.getFileHeader().clone();
SAMProgramRecord p = header.createProgramRecord();
p.setCommandLine(getProgramCommandLine());
p.setProgramVersion(getVersion());
p.setProgramName(getProgramName());
this.sfw = this.writingBamArgs.openSAMFileWriter(outputFile, header, true);
header = sfr.getFileHeader().clone();
p = header.createProgramRecord();
p.setCommandLine(getProgramCommandLine());
p.setProgramVersion(getVersion());
p.setProgramName(getProgramName());
this.weird = this.writingBamArgs.createSAMFileWriterFactory().makeSAMWriter(header, true, new NullOuputStream());
scan(sfr);
sfr.close();
LOG.info("Done");
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfr);
CloserUtil.close(this.sfw);
CloserUtil.close(this.weird);
}
}
Aggregations