use of htsjdk.samtools.SAMProgramRecord in project gatk by broadinstitute.
the class MergeBamAlignment method doWork.
@Override
protected Object doWork() {
// Check the files are readable/writable
SAMProgramRecord prod = null;
if (PROGRAM_RECORD_ID != null) {
prod = new SAMProgramRecord(PROGRAM_RECORD_ID);
prod.setProgramVersion(PROGRAM_GROUP_VERSION);
prod.setCommandLine(PROGRAM_GROUP_COMMAND_LINE);
prod.setProgramName(PROGRAM_GROUP_NAME);
}
if (EXPECTED_ORIENTATIONS == null || EXPECTED_ORIENTATIONS.isEmpty()) {
EXPECTED_ORIENTATIONS = Arrays.asList(SamPairUtil.PairOrientation.FR);
}
final SamAlignmentMerger merger = new SamAlignmentMerger(UNMAPPED_BAM, OUTPUT, REFERENCE_SEQUENCE, prod, CLIP_ADAPTERS, IS_BISULFITE_SEQUENCE, ALIGNED_READS_ONLY, ALIGNED_BAM, MAX_INSERTIONS_OR_DELETIONS, ATTRIBUTES_TO_RETAIN, ATTRIBUTES_TO_REMOVE, READ1_TRIM, READ2_TRIM, READ1_ALIGNED_BAM, READ2_ALIGNED_BAM, EXPECTED_ORIENTATIONS, SORT_ORDER, PRIMARY_ALIGNMENT_STRATEGY.newInstance(), ADD_MATE_CIGAR);
merger.setClipOverlappingReads(CLIP_OVERLAPPING_READS);
merger.setKeepAlignerProperPairFlags(ALIGNER_PROPER_PAIR_FLAGS);
merger.setIncludeSecondaryAlignments(INCLUDE_SECONDARY_ALIGNMENTS);
merger.mergeAlignment(REFERENCE_SEQUENCE);
merger.close();
return null;
}
use of htsjdk.samtools.SAMProgramRecord in project polyGembler by c-zhou.
the class SamFileExtract method run.
@Override
public void run() {
// TODO Auto-generated method stub
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(new File(bam_in));
final SAMFileHeader header = inputSam.getFileHeader();
final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
final SAMFileHeader header_out = new SAMFileHeader();
final SAMSequenceDictionary seqdic_out = new SAMSequenceDictionary();
SAMRecordIterator iter = inputSam.iterator();
File bed_file = new File(bed_in);
final Set<String> extract = new HashSet<String>();
try (BufferedReader br = new BufferedReader(new FileReader(bed_file))) {
String line;
while ((line = br.readLine()) != null) extract.add(line.split("\\s+")[0]);
} catch (IOException e) {
e.printStackTrace();
System.exit(1);
}
header_out.setAttribute("VN", header.getAttribute("VN"));
header_out.setAttribute("SO", header.getAttribute("SO"));
List<SAMSequenceRecord> seqs = seqdic.getSequences();
for (SAMSequenceRecord seq : seqs) if (extract.contains(seq.getSequenceName()))
seqdic_out.addSequence(seq);
header_out.setSequenceDictionary(seqdic_out);
for (SAMReadGroupRecord rg : header.getReadGroups()) header_out.addReadGroup(rg);
for (SAMProgramRecord pg : header.getProgramRecords()) header_out.addProgramRecord(pg);
final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_out, true, new File(bam_out));
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (extract.contains(rec.getReferenceName()))
outputSam.addAlignment(rec);
}
iter.close();
try {
inputSam.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
outputSam.close();
System.err.println(bam_in + " return true");
}
use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.
the class BlastToSam method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null || !this.faidx.exists() || !this.faidx.isFile()) {
LOG.error("Option -R was not defined or dictionary missing");
return -1;
}
final boolean interleaved_input = this.EXPECTED_SIZE > 0;
final int maxRecordsInRam = 5000;
SAMFileWriter sfw = null;
XMLEventReader rx = null;
final SAMFileWriterFactory sfwf = new SAMFileWriterFactory();
sfwf.setCreateIndex(false);
sfwf.setMaxRecordsInRam(maxRecordsInRam);
sfwf.setCreateMd5File(false);
sfwf.setUseAsyncIo(false);
final SAMFileHeader header = new SAMFileHeader();
try {
LOG.info("opening " + faidx);
this.dictionary = SAMSequenceDictionaryExtractor.extractDictionary(faidx);
header.setSortOrder(SortOrder.unsorted);
header.setSequenceDictionary(this.dictionary);
final JAXBContext jc = JAXBContext.newInstance("gov.nih.nlm.ncbi.blast");
this.unmarshaller = jc.createUnmarshaller();
final XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.FALSE);
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
xmlInputFactory.setXMLResolver(new XMLResolver() {
@Override
public Object resolveEntity(String arg0, String arg1, String arg2, String arg3) throws XMLStreamException {
LOG.info("resolveEntity:" + arg0 + "/" + arg1 + "/" + arg2);
return null;
}
});
final String inputName = oneFileOrNull(args);
if (inputName == null) {
LOG.info("Reading from stdin");
rx = xmlInputFactory.createXMLEventReader(stdin());
} else if (args.size() == 1) {
LOG.info("Reading from " + inputName);
rx = xmlInputFactory.createXMLEventReader(IOUtils.openURIForBufferedReading(inputName));
} else {
LOG.error("Illegal number of args");
return -1;
}
final SAMProgramRecord prg2 = header.createProgramRecord();
fillHeader(rx, prg2);
final SAMProgramRecord prg1 = header.createProgramRecord();
prg1.setCommandLine(getProgramCommandLine());
prg1.setProgramVersion(getVersion());
prg1.setProgramName(getProgramName());
prg1.setPreviousProgramGroupId(prg2.getId());
final SAMReadGroupRecord rg1 = new SAMReadGroupRecord("g1");
rg1.setLibrary("blast");
rg1.setSample("blast");
rg1.setDescription("blast");
header.addReadGroup(rg1);
sfw = this.writingBamArgs.openSAMFileWriter(outputFile, header, true);
if (interleaved_input) {
run_paired(sfw, rx, header);
} else {
run_single(sfw, rx, header);
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfw);
CloserUtil.close(rx);
}
}
use of htsjdk.samtools.SAMProgramRecord in project gatk by broadinstitute.
the class IntervalListTools method doWork.
@Override
protected Object doWork() {
// Check inputs
for (final File f : INPUT) IOUtil.assertFileIsReadable(f);
for (final File f : SECOND_INPUT) IOUtil.assertFileIsReadable(f);
if (OUTPUT != null) {
if (SCATTER_COUNT == 1) {
IOUtil.assertFileIsWritable(OUTPUT);
} else {
IOUtil.assertDirectoryIsWritable(OUTPUT);
}
}
// Read in the interval lists and apply any padding
final List<IntervalList> lists = openIntervalLists(INPUT);
// same for the second list
final List<IntervalList> secondLists = openIntervalLists(SECOND_INPUT);
if (UNIQUE && !SORT) {
LOG.warn("UNIQUE=true requires sorting but SORT=false was specified. Results will be sorted!");
}
final IntervalList result = ACTION.act(lists, secondLists);
if (SCATTER_COUNT > 1) {
// Scattering requires a uniqued, sorted interval list. We want to do this up front (before BREAKING AT BANDS)
SORT = true;
UNIQUE = true;
}
if (INVERT) {
// no need to sort, since return will be sorted by definition.
SORT = false;
UNIQUE = true;
}
final IntervalList possiblySortedResult = SORT ? result.sorted() : result;
final IntervalList possiblyInvertedResult = INVERT ? IntervalList.invert(possiblySortedResult) : possiblySortedResult;
//only get unique if this has been asked unless inverting (since the invert will return a unique list)
List<Interval> finalIntervals = UNIQUE ? possiblyInvertedResult.uniqued().getIntervals() : possiblyInvertedResult.getIntervals();
if (BREAK_BANDS_AT_MULTIPLES_OF > 0) {
finalIntervals = IntervalList.breakIntervalsAtBandMultiples(finalIntervals, BREAK_BANDS_AT_MULTIPLES_OF);
}
// Decide on a PG ID and make a program group
final SAMFileHeader header = result.getHeader();
final Set<String> pgs = new HashSet<>();
for (final SAMProgramRecord pg : header.getProgramRecords()) pgs.add(pg.getId());
for (int i = 1; i < Integer.MAX_VALUE; ++i) {
if (!pgs.contains(String.valueOf(i))) {
final SAMProgramRecord pg = new SAMProgramRecord(String.valueOf(i));
pg.setCommandLine(getCommandLine());
pg.setProgramName(getClass().getSimpleName());
header.addProgramRecord(pg);
break;
}
}
// Add any comments
if (COMMENT != null) {
for (final String comment : COMMENT) {
header.addComment(comment);
}
}
final IntervalList output = new IntervalList(header);
for (final Interval i : finalIntervals) {
output.add(i);
}
final List<IntervalList> resultIntervals;
if (OUTPUT != null) {
if (SCATTER_COUNT == 1) {
output.write(OUTPUT);
resultIntervals = Arrays.asList(output);
} else {
final List<IntervalList> scattered = writeScatterIntervals(output);
LOG.info(String.format("Wrote %s scatter subdirectories to %s.", scattered.size(), OUTPUT));
if (scattered.size() != SCATTER_COUNT) {
LOG.warn(String.format("Requested scatter width of %s, but only emitted %s. (This may be an expected consequence of running in %s mode.)", SCATTER_COUNT, scattered.size(), IntervalListScatterer.Mode.BALANCING_WITHOUT_INTERVAL_SUBDIVISION));
}
resultIntervals = scattered;
}
} else {
resultIntervals = Arrays.asList(output);
}
long totalUniqueBaseCount = 0;
long intervalCount = 0;
for (final IntervalList finalInterval : resultIntervals) {
totalUniqueBaseCount = finalInterval.getUniqueBaseCount();
intervalCount += finalInterval.size();
}
LOG.info("Produced " + intervalCount + " intervals totalling " + totalUniqueBaseCount + " unique bases.");
return null;
}
use of htsjdk.samtools.SAMProgramRecord in project gatk by broadinstitute.
the class GATKTool method createProgramGroupID.
/**
* Returns the program group ID that will be used in the SAM writer.
* Starts with {@link #getToolName} and looks for the first available ID by appending consecutive integers.
*/
private String createProgramGroupID(final SAMFileHeader header) {
final String toolName = getToolName();
String pgID = toolName;
SAMProgramRecord record = header.getProgramRecord(pgID);
int count = 1;
while (record != null) {
pgID = toolName + "." + String.valueOf(count++);
record = header.getProgramRecord(pgID);
}
return pgID;
}
Aggregations