use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class TestUtils method sortBamOnQueryName.
protected File sortBamOnQueryName(final Path bamFile, final Predicate<SAMRecord> pred) throws IOException {
File sortedBam = this.createTmpFile(".bam");
SamReader sr = SamReaderFactory.makeDefault().open(bamFile);
SAMFileHeader outHeader = sr.getFileHeader().clone();
outHeader.setSortOrder(SortOrder.queryname);
SAMFileWriter w = new SAMFileWriterFactory().makeBAMWriter(outHeader, false, sortedBam);
sr.iterator().stream().filter(R -> pred == null ? true : pred.test(R)).forEach(R -> w.addAlignment(R));
w.close();
sr.close();
return sortedBam;
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class TestUtils method addClippingToBam.
protected File addClippingToBam(final File bamFile) throws IOException {
final String bases = "ATGC";
File clippedBam = this.createTmpFile(".bam");
SamReader sr = SamReaderFactory.makeDefault().open(bamFile);
SAMFileHeader inHeader = sr.getFileHeader();
boolean createIndex = sr.hasIndex() && inHeader.getSortOrder().equals(SortOrder.coordinate);
if (createIndex) {
this.deleteFilesAtExit.add(new File(bamFile.getParentFile(), IOUtil.basename(clippedBam) + BAMIndex.BAMIndexSuffix));
}
final SAMFileWriter w = new SAMFileWriterFactory().setCreateIndex(createIndex).makeBAMWriter(inHeader, true, clippedBam);
sr.iterator().stream().map(R -> {
if (R.getReadUnmappedFlag() || R.getCigar() == null)
return R;
if (R.getCigar().isClipped())
return R;
if (R.getBaseQualities().equals(SAMRecord.NULL_QUALS))
return R;
if (R.getBaseQualityString().equals(SAMRecord.NULL_QUALS_STRING))
return R;
for (int side = 0; side < 2; side++) {
final String cigar;
boolean hard = this.random.nextBoolean();
final int clipLen = 1 + this.random.nextInt(100);
final StringBuilder seq = new StringBuilder();
final StringBuilder qual = new StringBuilder();
if (hard) {
cigar = String.valueOf(clipLen) + "H";
} else {
cigar = String.valueOf(clipLen) + "S";
for (int x = 0; x < clipLen; ++x) {
seq.append(bases.charAt(this.random.nextInt(bases.length())));
qual.append("#");
}
}
if (side == 0) {
R.setReadString(seq.toString() + R.getReadString());
R.setBaseQualityString(qual.toString() + R.getBaseQualityString());
R.setCigarString(cigar + R.getCigarString());
} else {
R.setCigarString(R.getCigarString() + cigar);
R.setReadString(R.getReadString() + seq.toString());
R.setBaseQualityString(R.getBaseQualityString() + qual.toString());
}
}
return R;
}).forEach(R -> w.addAlignment(R));
w.close();
sr.close();
assertIsValidBam(bamFile);
return bamFile;
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class Biostar90204 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.suffix_length < 0) {
LOG.error("Bad value of suffix_length:" + this.suffix_length);
return -1;
}
if (this.record_per_file < 1L) {
LOG.error("Bad value of record_per_file:" + this.record_per_file);
return -1;
}
SAMFileWriter sfw = null;
SAMRecordIterator iter = null;
SamReader samFileReader = null;
PrintWriter manifest = new PrintWriter(new NullOuputStream());
try {
samFileReader = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader header = samFileReader.getFileHeader();
int split_file_number = 0;
long nReads = 0L;
iter = samFileReader.iterator();
if (this.manifestFile != null) {
manifest.close();
manifest = new PrintWriter(manifestFile);
}
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (this.samRecordFilter.filterOut(rec))
continue;
++nReads;
if (sfw == null) {
split_file_number++;
final String pathname = (this.prefix.isEmpty() ? "" : this.prefix + ".") + String.format("%0" + suffix_length + "d", split_file_number) + ".bam";
final File out = new File(pathname);
manifest.write(pathname);
manifest.write("\t" + (nReads) + "\t");
final SAMFileHeader header2 = header.clone();
header2.addComment("SPLIT:" + split_file_number);
header2.addComment("SPLIT:Starting from Read" + nReads);
sfw = this.writingBamArgs.openSAMFileWriter(out, header2, true);
}
sfw.addAlignment(rec);
if (nReads % record_per_file == 0) {
sfw.close();
manifest.write((nReads) + "\n");
sfw = null;
}
}
if (sfw != null) {
sfw.close();
manifest.write((nReads) + "\n");
}
manifest.flush();
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(manifest);
CloserUtil.close(sfw);
CloserUtil.close(iter);
CloserUtil.close(samFileReader);
}
return 0;
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class Biostar76892 method doWork.
@Override
public int doWork(final List<String> args) {
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
sfr = super.openSamReader(oneFileOrNull(args));
sfw = writingBamArgs.openSAMFileWriter(this.outputFile, sfr.getFileHeader(), true);
long nRecords = 0;
final List<SAMRecord> buffer = new ArrayList<SAMRecord>();
SAMRecordIterator iter = sfr.iterator();
for (; ; ) {
SAMRecord rec = null;
// get next record
if (iter.hasNext()) {
rec = iter.next();
++nRecords;
if (nRecords % 1000000 == 0)
LOG.info("records: " + nRecords);
if (!rec.getReadPairedFlag() || rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag() || rec.getProperPairFlag() || rec.getReferenceIndex() != rec.getMateReferenceIndex() || rec.getReadNegativeStrandFlag() == !rec.getMateNegativeStrandFlag()) {
if (!onlySaveFixed)
sfw.addAlignment(rec);
continue;
}
}
if (rec != null) {
int i = 0;
// cleanup buffer
int mate_index = -1;
while (i < buffer.size()) {
SAMRecord prev = buffer.get(i);
if (prev.getReferenceIndex() != rec.getReferenceIndex() || prev.getAlignmentEnd() + distance < rec.getAlignmentStart()) {
if (!onlySaveFixed)
sfw.addAlignment(prev);
buffer.remove(i);
} else if (prev.getReadName().equals(rec.getReadName()) && ((prev.getFirstOfPairFlag() && rec.getSecondOfPairFlag()) || (rec.getFirstOfPairFlag() && prev.getSecondOfPairFlag()))) {
mate_index = i;
++i;
} else {
++i;
}
}
if (mate_index == -1) {
buffer.add(rec);
} else {
final SAMRecord mate = buffer.get(mate_index);
buffer.remove(mate_index);
LOG.info("changing " + rec + " " + mate);
if (mate.getReadNegativeStrandFlag()) {
mate.setReadNegativeStrandFlag(false);
rec.setMateNegativeStrandFlag(mate.getReadNegativeStrandFlag());
} else {
rec.setReadNegativeStrandFlag(false);
mate.setMateNegativeStrandFlag(rec.getReadNegativeStrandFlag());
}
if (!mate.getReadFailsVendorQualityCheckFlag() && !rec.getReadFailsVendorQualityCheckFlag()) {
mate.setProperPairFlag(true);
rec.setProperPairFlag(true);
}
mate.setAttribute("rv", 1);
rec.setAttribute("rv", 1);
sfw.addAlignment(mate);
sfw.addAlignment(rec);
}
} else {
for (final SAMRecord r : buffer) {
if (!onlySaveFixed)
sfw.addAlignment(r);
}
break;
}
}
LOG.info("done");
sfw.close();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfw);
CloserUtil.close(sfr);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class Biostar78400 method doWork.
@Override
public int doWork(List<String> args) {
if (this.XML == null) {
LOG.error("XML file missing");
return -1;
}
final Map<String, Map<Integer, String>> flowcell2lane2id = new HashMap<String, Map<Integer, String>>();
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
final Pattern readNameSignature = Pattern.compile(this.readNameSignatureStr);
final JAXBContext context = JAXBContext.newInstance(ReadGroup.class, ReadGroupList.class);
final Unmarshaller unmarshaller = context.createUnmarshaller();
final ReadGroupList rgl = unmarshaller.unmarshal(new StreamSource(XML), ReadGroupList.class).getValue();
if (rgl.flowcells.isEmpty()) {
LOG.error("empty XML " + XML);
return -1;
}
sfr = openSamReader(oneFileOrNull(args));
final SAMFileHeader header = sfr.getFileHeader().clone();
header.addComment("Processed with " + getProgramName());
final Set<String> seenids = new HashSet<String>();
final List<SAMReadGroupRecord> samReadGroupRecords = new ArrayList<SAMReadGroupRecord>();
for (final FlowCell fc : rgl.flowcells) {
final Map<Integer, String> lane2id = new HashMap<Integer, String>();
for (final Lane lane : fc.lanes) {
for (final ReadGroup rg : lane.readGroups) {
if (seenids.contains(rg.id)) {
LOG.error("Group id " + rg.id + " defined twice");
return -1;
}
seenids.add(rg.id);
// create the read group we'll be using
final SAMReadGroupRecord rgrec = new SAMReadGroupRecord(rg.id);
rgrec.setLibrary(rg.library);
rgrec.setPlatform(rg.platform);
rgrec.setSample(rg.sample);
rgrec.setPlatformUnit(rg.platformunit);
if (rg.center != null)
rgrec.setSequencingCenter(rg.center);
if (rg.description != null)
rgrec.setDescription(rg.description);
lane2id.put(lane.id, rg.id);
samReadGroupRecords.add(rgrec);
}
}
if (flowcell2lane2id.containsKey(fc.name)) {
LOG.error("FlowCell id " + fc.name + " defined twice in XML");
return -1;
}
flowcell2lane2id.put(fc.name, lane2id);
}
header.setReadGroups(samReadGroupRecords);
sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
final Matcher matcher = readNameSignature.matcher(rec.getReadName());
final String flowcellStr;
final String laneStr;
if (matcher.matches()) {
flowcellStr = matcher.group(1);
laneStr = matcher.group(2);
} else {
LOG.error("Read name " + rec.getReadName() + " doesn't match regular expression " + readNameSignature.pattern() + ". please check options");
return -1;
}
String RGID = null;
final Map<Integer, String> lane2id = flowcell2lane2id.get(flowcellStr);
if (lane2id == null)
throw new RuntimeException("Cannot get flowcell/readgroup for " + rec.getReadName());
try {
RGID = lane2id.get(Integer.parseInt(laneStr));
} catch (final Exception e) {
LOG.error("bad lane-Id in " + rec.getReadName());
return -1;
}
if (RGID == null) {
throw new RuntimeException("Cannot get RGID for " + rec.getReadName() + " flowcell:" + flowcellStr + " lane2id:" + laneStr + " dict:" + lane2id);
}
rec.setAttribute(SAMTag.RG.name(), RGID);
sfw.addAlignment(rec);
}
progress.finish();
iter.close();
LOG.info("done");
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfw);
CloserUtil.close(sfr);
}
}
Aggregations