use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class Biostar84452 method doWork.
@Override
public int doWork(final List<String> args) {
if (!StringUtil.isBlank(this.customTag)) {
if (customTag.length() != 2 || !customTag.startsWith("X")) {
LOG.error("Bad tag: expect length=2 && start with 'X'");
return -1;
}
}
SAMFileWriter sfw = null;
SamReader sfr = null;
try {
sfr = super.openSamReader(oneFileOrNull(args));
SAMFileHeader header = sfr.getFileHeader();
sfw = this.WritingBamArgs.openSAMFileWriter(outputFile, header, true);
long nChanged = 0L;
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
if (rec.getReadUnmappedFlag()) {
sfw.addAlignment(rec);
continue;
}
final Cigar cigar = rec.getCigar();
if (cigar == null) {
sfw.addAlignment(rec);
continue;
}
final String originalCigarSting = rec.getCigarString();
final byte[] bases = rec.getReadBases();
if (bases == null) {
sfw.addAlignment(rec);
continue;
}
final ArrayList<CigarElement> L = new ArrayList<CigarElement>();
final ByteArrayOutputStream nseq = new ByteArrayOutputStream();
final ByteArrayOutputStream nqual = new ByteArrayOutputStream();
final byte[] quals = rec.getBaseQualities();
int indexBases = 0;
for (final CigarElement ce : cigar.getCigarElements()) {
switch(ce.getOperator()) {
case S:
indexBases += ce.getLength();
break;
// cont
case H:
// cont
case P:
// cont
case N:
case D:
{
L.add(ce);
break;
}
case I:
case EQ:
case X:
case M:
{
L.add(ce);
nseq.write(bases, indexBases, ce.getLength());
if (quals.length != 0)
nqual.write(quals, indexBases, ce.getLength());
indexBases += ce.getLength();
break;
}
default:
{
throw new SAMException("Unsupported Cigar opertator:" + ce.getOperator());
}
}
}
if (indexBases != bases.length) {
throw new SAMException("ERRROR " + rec.getCigarString());
}
if (L.size() == cigar.numCigarElements()) {
sfw.addAlignment(rec);
continue;
}
++nChanged;
if (!StringUtil.isBlank(this.customTag))
rec.setAttribute(this.customTag, originalCigarSting);
rec.setCigar(new Cigar(L));
rec.setReadBases(nseq.toByteArray());
if (quals.length != 0)
rec.setBaseQualities(nqual.toByteArray());
sfw.addAlignment(rec);
}
progress.finish();
iter.close();
LOG.info("Num records changed:" + nChanged);
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfw);
CloserUtil.close(sfr);
}
return 0;
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class SamJmx method doWork.
private int doWork(SamReader in) throws IOException {
String name = this.projectName;
if (name == null || name.trim().isEmpty()) {
name = "undefined";
}
MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
LocatableStreamInfo dynamicMBean = new LocatableStreamInfo(name);
LocatableStreamInfo.Status status = LocatableStreamInfo.Status.IDLE;
ObjectName objectMBean = null;
SAMFileWriter out = null;
SAMRecordIterator iter = null;
try {
out = this.writingBamArgs.openSAMFileWriter(output, in.getFileHeader(), true);
objectMBean = new ObjectName(dynamicMBean.getClass().getPackage().getName() + ":type=" + dynamicMBean.getClass().getSimpleName());
mbeanServer.registerMBean(dynamicMBean, objectMBean);
iter = in.iterator();
while (iter.hasNext()) {
SAMRecord rec = iter.next();
out.addAlignment(rec);
status = dynamicMBean.watch(rec);
if (status == LocatableStreamInfo.Status.ABORT || status == LocatableStreamInfo.Status.BREAK) {
LOG.error("#### Process \"" + name + "\" received message " + status.name());
break;
}
}
if (status == LocatableStreamInfo.Status.ABORT) {
LOG.error("#### Process \"" + name + "\" : Exit failure");
mbeanServer.unregisterMBean(objectMBean);
if (this.output != null) {
CloserUtil.close(out);
out = null;
this.output.delete();
}
System.exit(-1);
}
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(out);
if (objectMBean != null) {
try {
mbeanServer.unregisterMBean(objectMBean);
} catch (Exception err2) {
}
}
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class Biostar78400Test method test01.
@Test
public void test01() throws IOException {
final String flowcell = "HS20001259127";
final String lane = "1";
final File in = createTmpFile(".bam");
SAMFileHeader header = new SAMFileHeader();
header.setSortOrder(SortOrder.unsorted);
SAMFileWriter sfw = new SAMFileWriterFactory().makeBAMWriter(header, true, in);
DefaultSAMRecordFactory recfactory = new DefaultSAMRecordFactory();
SAMRecord rec = recfactory.createSAMRecord(header);
rec.setReadName(flowcell + ":" + lane + ":1210:15640:52255");
rec.setReadString("GAATTC");
rec.setBaseQualityString("222222");
SAMUtils.makeReadUnmapped(rec);
sfw.addAlignment(rec);
sfw.close();
assertIsValidBam(in);
final File xml = createTmpFile(".xml");
PrintWriter pw = new PrintWriter(xml);
pw.println("<?xml version=\"1.0\"?><read-groups>" + "<flowcell name=\"" + flowcell + "\"><lane index=\"" + lane + "\">" + "<group ID=\"X1\"><library>L1</library><platform>P1</platform>" + "<sample>S1</sample><platformunit>PU1</platformunit>" + "<center>C1</center><description>blabla</description></group>" + "</lane></flowcell><flowcell name=\"HS20001259128\">" + "<lane index=\"2\"><group ID=\"x2\"><library>L2</library>" + "<platform>P2</platform><sample>S2</sample><platformunit>PU1</platformunit>" + "<center>C1</center><description>blabla</description></group></lane>" + "</flowcell></read-groups>");
pw.flush();
pw.close();
assertIsXml(xml);
final File out = createTmpFile(".bam");
Assert.assertEquals(new Biostar78400().instanceMain(newCmd().add("-o").add(out).add("-x").add(xml).add(in).make()), 0);
SamReader r = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(out);
Assert.assertTrue(r.getFileHeader() != null);
Assert.assertTrue(r.getFileHeader().getReadGroups() != null);
Assert.assertFalse(r.getFileHeader().getReadGroups().isEmpty());
SAMRecordIterator iter = r.iterator();
Assert.assertTrue(iter.hasNext());
rec = iter.next();
SAMReadGroupRecord rg = rec.getReadGroup();
Assert.assertNotNull(rg);
Assert.assertEquals(rg.getId(), "X1");
Assert.assertEquals(rg.getSample(), "S1");
Assert.assertFalse(iter.hasNext());
iter.close();
r.close();
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class Biostar139647 method doWork.
@Override
public int doWork(final List<String> args) {
SAMFileWriter w = null;
LineIterator r = null;
try {
final String filename = super.oneFileOrNull(args);
if (filename == null) {
LOG.info("Reading from stdin");
r = IOUtils.openStreamForLineIterator(stdin());
} else {
LOG.info("Reading from " + filename);
r = IOUtils.openURIForLineIterator(filename);
}
Format format = Format.None;
while (r.hasNext() && format == Format.None) {
String line = r.peek();
if (line.trim().isEmpty()) {
r.next();
continue;
}
if (line.startsWith("CLUSTAL")) {
format = Format.Clustal;
// consume
r.next();
break;
} else if (line.startsWith(">")) {
format = Format.Fasta;
break;
} else {
LOG.error("MSA format not recognized in " + line);
return -1;
}
}
LOG.info("format : " + format);
if (Format.Fasta.equals(format)) {
// this.consensus=new FastaConsensus();
AlignSequence curr = null;
while (r.hasNext()) {
String line = r.next();
if (line.startsWith(">")) {
curr = new AlignSequence();
curr.name = line.substring(1).trim();
if (sample2sequence.containsKey(curr.name)) {
LOG.error("Sequence ID " + curr.name + " defined twice");
return -1;
}
sample2sequence.put(curr.name, curr);
} else if (curr != null) {
curr.seq.append(line.trim());
this.align_length = Math.max(this.align_length, curr.seq.length());
}
}
} else if (Format.Clustal.equals(format)) {
AlignSequence curr = null;
int columnStart = -1;
while (r.hasNext()) {
String line = r.next();
if (line.trim().isEmpty() || line.startsWith("CLUSTAL W")) {
columnStart = -1;
continue;
}
if (line.charAt(0) == ' ') {
if (columnStart == -1) {
LOG.error("illegal consensus line for " + line);
return -1;
}
} else {
if (columnStart == -1) {
columnStart = line.indexOf(' ');
if (columnStart == -1) {
LOG.error("no whithespace in " + line);
return -1;
}
while (columnStart < line.length() && line.charAt(columnStart) == ' ') {
columnStart++;
}
}
String seqname = line.substring(0, columnStart).trim();
curr = this.sample2sequence.get(seqname);
if (curr == null) {
curr = new AlignSequence();
curr.name = seqname;
this.sample2sequence.put(curr.name, curr);
}
curr.seq.append(line.substring(columnStart));
this.align_length = Math.max(align_length, curr.seq.length());
}
}
} else {
LOG.error("Undefined input format");
return -1;
}
final SAMFileHeader header = new SAMFileHeader();
final SAMSequenceDictionary dict = new SAMSequenceDictionary();
dict.addSequence(new SAMSequenceRecord(REF, this.align_length));
header.setSortOrder(SortOrder.unsorted);
header.setSequenceDictionary(dict);
final SAMProgramRecord pgr = header.createProgramRecord();
pgr.setProgramName(getProgramName());
pgr.setProgramVersion(getVersion());
if (getProgramCommandLine().trim().isEmpty()) {
pgr.setCommandLine("(empty)");
} else {
pgr.setCommandLine(getProgramCommandLine());
}
w = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, false);
final DefaultSAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
for (final String seqName : this.sample2sequence.keySet()) {
final AlignSequence shortRead = this.sample2sequence.get(seqName);
final SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadName(seqName);
rec.setReadString(shortRead.seq.toString().replaceAll("[\\*\\- ]+", ""));
int start = 0;
while (start < shortRead.seq.length() && !Character.isLetter(shortRead.at(start))) {
start++;
}
rec.setAlignmentStart(start + 1);
int end = shortRead.seq.length() - 1;
while (end > 0 && !Character.isLetter(shortRead.at(end))) {
--end;
}
// rec.setAlignmentEnd(end+1); not supported
int n = start;
StringBuilder dna = new StringBuilder();
final List<CigarElement> cigars = new ArrayList<>();
while (n <= end) {
if (!Character.isLetter(shortRead.at(n))) {
cigars.add(new CigarElement(1, CigarOperator.D));
} else {
cigars.add(new CigarElement(1, CigarOperator.M));
dna.append(shortRead.at(n));
}
n++;
}
// simplify cigar string
n = 0;
while (n + 1 < cigars.size()) {
CigarElement c1 = cigars.get(n);
CigarElement c2 = cigars.get(n + 1);
if (c1.getOperator().equals(c2.getOperator())) {
cigars.set(n, new CigarElement(c1.getLength() + c2.getLength(), c1.getOperator()));
cigars.remove(n + 1);
} else {
++n;
}
}
rec.setReadPairedFlag(false);
rec.setMappingQuality(60);
rec.setReferenceName(REF);
rec.setReadString(dna.toString());
rec.setCigar(new Cigar(cigars));
rec.setAttribute("PG", pgr.getId());
w.addAlignment(rec);
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(w);
}
}
use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.
the class Biostar145820 method doWork.
@Override
public int doWork(final List<String> args) {
SamReader samReader = null;
SAMRecordIterator iter = null;
SAMFileWriter samWriter = null;
Random random = new Random();
CloseableIterator<RandSamRecord> iter2 = null;
try {
final String input = oneFileOrNull(args);
samReader = super.openSamReader(input);
final SAMFileHeader header = samReader.getFileHeader().clone();
header.setSortOrder(SortOrder.unsorted);
header.addComment("Processed with " + getProgramName() + " : " + getProgramCommandLine());
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(samReader.getFileHeader()).logger(LOG);
iter = samReader.iterator();
final SortingCollection<RandSamRecord> sorter = SortingCollection.newInstance(RandSamRecord.class, new RandSamRecordCodec(header), new RandSamRecordComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
sorter.setDestructiveIteration(true);
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
if (this.filter.filterOut(rec)) {
continue;
}
final RandSamRecord r = new RandSamRecord();
r.rand_index = random.nextInt();
r.samRecord = progress.watch(rec);
sorter.add(r);
}
iter.close();
iter = null;
sorter.doneAdding();
iter2 = sorter.iterator();
samWriter = writingBamArgs.openSAMFileWriter(outputFile, header, true);
final SAMFileWriter finalw = samWriter;
iter2.stream().limit(this.count < 0L ? Long.MAX_VALUE - 1 : this.count).map(R -> R.samRecord).forEach(R -> finalw.addAlignment(R));
iter2.close();
iter2 = null;
sorter.cleanup();
progress.finish();
} catch (final Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(iter2);
CloserUtil.close(samReader);
CloserUtil.close(samWriter);
}
return 0;
}
Aggregations