use of htsjdk.samtools.DefaultSAMRecordFactory in project jvarkit by lindenb.
the class MergingIteratorTest method test3.
@Test
public void test3() {
SAMSequenceDictionary dict = new SAMSequenceDictionary();
dict.addSequence(new SAMSequenceRecord("1", 10000));
SAMFileHeader header = new SAMFileHeader();
header.setSequenceDictionary(dict);
final List<SAMRecord> L = new ArrayList<>();
DefaultSAMRecordFactory srf = new DefaultSAMRecordFactory();
SAMRecord r1 = srf.createSAMRecord(header);
r1.setReadName("R1");
r1.setReferenceName("1");
r1.setAlignmentStart(1);
r1.setFlags(99);
r1.setReadString("GAATTC");
r1.setBaseQualityString("222222");
r1.setCigarString("6M");
L.add(r1);
r1 = srf.createSAMRecord(header);
r1.setReadName("R2");
r1.setReferenceName("1");
r1.setAlignmentStart(83);
r1.setFlags(83);
r1.setReadString("GAATTC");
r1.setBaseQualityString("222222");
r1.setCigarString("6M");
L.add(r1);
final MergingIterator<SAMRecord> iter = new MergingIterator<>(new SAMRecordCoordinateComparator(), Collections.singletonList(L.iterator()));
while (iter.hasNext()) iter.next();
iter.close();
}
use of htsjdk.samtools.DefaultSAMRecordFactory in project jvarkit by lindenb.
the class BWAMemNOp method doWork.
@Override
public int doWork(List<String> args) {
SamReader r = null;
SAMFileWriter w = null;
try {
r = super.openSamReader(oneFileOrNull(args));
SAMFileHeader header = r.getFileHeader();
OtherCanonicalAlignFactory ocaf = new OtherCanonicalAlignFactory(header);
w = writingBamArgs.openSAMFileWriter(outputFile, header, true);
SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
SAMRecordIterator iter = r.iterator();
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (rec.getSupplementaryAlignmentFlag()) {
continue;
}
if (rec.getReadUnmappedFlag()) {
if (!print_only_spit_read)
w.addAlignment(rec);
continue;
}
Cigar cigar1 = rec.getCigar();
if (cigar1 == null || cigar1.isEmpty() || !(cigar1.getCigarElement(cigar1.numCigarElements() - 1).getOperator().equals(CigarOperator.S) || cigar1.getCigarElement(0).getOperator().equals(CigarOperator.S))) // last or first is soft clipping
{
if (!print_only_spit_read)
w.addAlignment(rec);
continue;
}
rec.getAlignmentStart();
List<OtherCanonicalAlign> xps = ocaf.getXPAligns(rec);
if (xps.isEmpty()) {
if (!print_only_spit_read)
w.addAlignment(rec);
continue;
}
boolean found_one = false;
for (OtherCanonicalAlign xp : xps) {
if (!rec.getReferenceName().equals(xp.getReferenceName()))
continue;
if (xp.getReadNegativeStrandFlag() != rec.getReadNegativeStrandFlag())
continue;
Cigar cigar2 = xp.getCigar();
if (cigar2 == null || cigar2.isEmpty()) {
continue;
}
SAMRecord newrec = null;
List<CigarEvt> L1 = null;
List<CigarEvt> L2 = null;
if (cigar1.getCigarElement(cigar1.numCigarElements() - 1).getOperator().equals(CigarOperator.S) && cigar1.getCigarElement(cigar1.numCigarElements() - 1).getLength() >= this.min_soft_clip_length && cigar2.getCigarElement(0).getOperator().equals(CigarOperator.S) && cigar2.getCigarElement(0).getLength() >= this.min_soft_clip_length && rec.getAlignmentEnd() < xp.getAlignmentStart()) {
newrec = samRecordFactory.createSAMRecord(header);
int ref1 = rec.getAlignmentStart();
newrec.setAlignmentStart(ref1);
L1 = cigarEvents(0, ref1, cigar1);
L2 = cigarEvents(0, xp.getAlignmentStart(), cigar2);
} else if (cigar2.getCigarElement(cigar2.numCigarElements() - 1).getOperator().equals(CigarOperator.S) && cigar2.getCigarElement(cigar2.numCigarElements() - 1).getLength() >= this.min_soft_clip_length && cigar1.getCigarElement(0).getOperator().equals(CigarOperator.S) && cigar1.getCigarElement(0).getLength() >= this.min_soft_clip_length && xp.getAlignmentEnd() < rec.getAlignmentStart()) {
newrec = samRecordFactory.createSAMRecord(header);
int ref1 = xp.getAlignmentStart();
newrec.setAlignmentStart(ref1);
L1 = cigarEvents(0, ref1, cigar2);
L2 = cigarEvents(0, rec.getAlignmentStart(), cigar1);
}
if (newrec == null)
continue;
newrec.setFlags(rec.getFlags());
newrec.setReadName(rec.getReadName());
newrec.setReadBases(rec.getReadBases());
newrec.setMappingQuality(rec.getMappingQuality());
newrec.setReferenceIndex(rec.getReferenceIndex());
newrec.setBaseQualities(rec.getBaseQualities());
if (found_one) {
newrec.setNotPrimaryAlignmentFlag(true);
}
found_one = true;
for (SAMTagAndValue tav : rec.getAttributes()) {
if (tav.tag.equals(ocaf.getAttributeKey()))
continue;
if (tav.tag.equals("NM"))
continue;
newrec.setAttribute(tav.tag, tav.value);
}
if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
newrec.setMateAlignmentStart(rec.getMateAlignmentStart());
newrec.setMateReferenceIndex(rec.getMateReferenceIndex());
newrec.setInferredInsertSize(rec.getInferredInsertSize());
}
while (!L1.isEmpty() && (L1.get(L1.size() - 1).op.equals(CigarOperator.S) || L1.get(L1.size() - 1).op.equals(CigarOperator.D) || L1.get(L1.size() - 1).op.equals(CigarOperator.H))) {
L1.remove(L1.size() - 1);
}
while (!L2.isEmpty() && L2.get(0).read0 <= L1.get(L1.size() - 1).read0) {
L2.remove(0);
}
List<CigarElement> cigarElements = new ArrayList<CigarElement>();
int i = 0;
while (i < L1.size()) {
int j = i + 1;
while (j < L1.size() && L1.get(i).op.equals(L1.get(j).op)) {
j++;
}
cigarElements.add(new CigarElement(j - i, L1.get(i).op));
i = j;
}
// add 'N'
cigarElements.add(new CigarElement((L2.get(0).ref1 - L1.get(L1.size() - 1).ref1) - 1, CigarOperator.N));
i = 0;
while (i < L2.size()) {
int j = i + 1;
while (j < L2.size() && L2.get(i).op.equals(L2.get(j).op)) {
j++;
}
cigarElements.add(new CigarElement(j - i, L2.get(i).op));
i = j;
}
// cleanup : case where 'S' is close to 'N'
i = 0;
while (i + 1 < cigarElements.size()) {
CigarElement ce1 = cigarElements.get(i);
CigarElement ce2 = cigarElements.get(i + 1);
if (i > 0 && ce1.getOperator().equals(CigarOperator.S) && ce2.getOperator().equals(CigarOperator.N)) {
cigarElements.set(i, new CigarElement(ce1.getLength(), CigarOperator.X));
} else if (i + 2 < cigarElements.size() && ce1.getOperator().equals(CigarOperator.N) && ce2.getOperator().equals(CigarOperator.S)) {
cigarElements.set(i + 1, new CigarElement(ce2.getLength(), CigarOperator.X));
}
i++;
}
newrec.setCigar(new Cigar(cigarElements));
List<SAMValidationError> validations = newrec.isValid();
if (validations != null) {
for (SAMValidationError err : validations) {
LOG.warning(err.getType() + ":" + err.getMessage());
}
}
w.addAlignment(newrec);
}
if (!found_one) {
if (!print_only_spit_read)
w.addAlignment(rec);
}
}
iter.close();
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(w);
}
}
use of htsjdk.samtools.DefaultSAMRecordFactory in project jvarkit by lindenb.
the class Biostar78400Test method test01.
@Test
public void test01() throws IOException {
final String flowcell = "HS20001259127";
final String lane = "1";
final File in = createTmpFile(".bam");
SAMFileHeader header = new SAMFileHeader();
header.setSortOrder(SortOrder.unsorted);
SAMFileWriter sfw = new SAMFileWriterFactory().makeBAMWriter(header, true, in);
DefaultSAMRecordFactory recfactory = new DefaultSAMRecordFactory();
SAMRecord rec = recfactory.createSAMRecord(header);
rec.setReadName(flowcell + ":" + lane + ":1210:15640:52255");
rec.setReadString("GAATTC");
rec.setBaseQualityString("222222");
SAMUtils.makeReadUnmapped(rec);
sfw.addAlignment(rec);
sfw.close();
assertIsValidBam(in);
final File xml = createTmpFile(".xml");
PrintWriter pw = new PrintWriter(xml);
pw.println("<?xml version=\"1.0\"?><read-groups>" + "<flowcell name=\"" + flowcell + "\"><lane index=\"" + lane + "\">" + "<group ID=\"X1\"><library>L1</library><platform>P1</platform>" + "<sample>S1</sample><platformunit>PU1</platformunit>" + "<center>C1</center><description>blabla</description></group>" + "</lane></flowcell><flowcell name=\"HS20001259128\">" + "<lane index=\"2\"><group ID=\"x2\"><library>L2</library>" + "<platform>P2</platform><sample>S2</sample><platformunit>PU1</platformunit>" + "<center>C1</center><description>blabla</description></group></lane>" + "</flowcell></read-groups>");
pw.flush();
pw.close();
assertIsXml(xml);
final File out = createTmpFile(".bam");
Assert.assertEquals(new Biostar78400().instanceMain(newCmd().add("-o").add(out).add("-x").add(xml).add(in).make()), 0);
SamReader r = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(out);
Assert.assertTrue(r.getFileHeader() != null);
Assert.assertTrue(r.getFileHeader().getReadGroups() != null);
Assert.assertFalse(r.getFileHeader().getReadGroups().isEmpty());
SAMRecordIterator iter = r.iterator();
Assert.assertTrue(iter.hasNext());
rec = iter.next();
SAMReadGroupRecord rg = rec.getReadGroup();
Assert.assertNotNull(rg);
Assert.assertEquals(rg.getId(), "X1");
Assert.assertEquals(rg.getSample(), "S1");
Assert.assertFalse(iter.hasNext());
iter.close();
r.close();
}
use of htsjdk.samtools.DefaultSAMRecordFactory in project jvarkit by lindenb.
the class Biostar139647 method doWork.
@Override
public int doWork(final List<String> args) {
SAMFileWriter w = null;
LineIterator r = null;
try {
final String filename = super.oneFileOrNull(args);
if (filename == null) {
LOG.info("Reading from stdin");
r = IOUtils.openStreamForLineIterator(stdin());
} else {
LOG.info("Reading from " + filename);
r = IOUtils.openURIForLineIterator(filename);
}
Format format = Format.None;
while (r.hasNext() && format == Format.None) {
String line = r.peek();
if (line.trim().isEmpty()) {
r.next();
continue;
}
if (line.startsWith("CLUSTAL")) {
format = Format.Clustal;
// consume
r.next();
break;
} else if (line.startsWith(">")) {
format = Format.Fasta;
break;
} else {
LOG.error("MSA format not recognized in " + line);
return -1;
}
}
LOG.info("format : " + format);
if (Format.Fasta.equals(format)) {
// this.consensus=new FastaConsensus();
AlignSequence curr = null;
while (r.hasNext()) {
String line = r.next();
if (line.startsWith(">")) {
curr = new AlignSequence();
curr.name = line.substring(1).trim();
if (sample2sequence.containsKey(curr.name)) {
LOG.error("Sequence ID " + curr.name + " defined twice");
return -1;
}
sample2sequence.put(curr.name, curr);
} else if (curr != null) {
curr.seq.append(line.trim());
this.align_length = Math.max(this.align_length, curr.seq.length());
}
}
} else if (Format.Clustal.equals(format)) {
AlignSequence curr = null;
int columnStart = -1;
while (r.hasNext()) {
String line = r.next();
if (line.trim().isEmpty() || line.startsWith("CLUSTAL W")) {
columnStart = -1;
continue;
}
if (line.charAt(0) == ' ') {
if (columnStart == -1) {
LOG.error("illegal consensus line for " + line);
return -1;
}
} else {
if (columnStart == -1) {
columnStart = line.indexOf(' ');
if (columnStart == -1) {
LOG.error("no whithespace in " + line);
return -1;
}
while (columnStart < line.length() && line.charAt(columnStart) == ' ') {
columnStart++;
}
}
String seqname = line.substring(0, columnStart).trim();
curr = this.sample2sequence.get(seqname);
if (curr == null) {
curr = new AlignSequence();
curr.name = seqname;
this.sample2sequence.put(curr.name, curr);
}
curr.seq.append(line.substring(columnStart));
this.align_length = Math.max(align_length, curr.seq.length());
}
}
} else {
LOG.error("Undefined input format");
return -1;
}
final SAMFileHeader header = new SAMFileHeader();
final SAMSequenceDictionary dict = new SAMSequenceDictionary();
dict.addSequence(new SAMSequenceRecord(REF, this.align_length));
header.setSortOrder(SortOrder.unsorted);
header.setSequenceDictionary(dict);
final SAMProgramRecord pgr = header.createProgramRecord();
pgr.setProgramName(getProgramName());
pgr.setProgramVersion(getVersion());
if (getProgramCommandLine().trim().isEmpty()) {
pgr.setCommandLine("(empty)");
} else {
pgr.setCommandLine(getProgramCommandLine());
}
w = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, false);
final DefaultSAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
for (final String seqName : this.sample2sequence.keySet()) {
final AlignSequence shortRead = this.sample2sequence.get(seqName);
final SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadName(seqName);
rec.setReadString(shortRead.seq.toString().replaceAll("[\\*\\- ]+", ""));
int start = 0;
while (start < shortRead.seq.length() && !Character.isLetter(shortRead.at(start))) {
start++;
}
rec.setAlignmentStart(start + 1);
int end = shortRead.seq.length() - 1;
while (end > 0 && !Character.isLetter(shortRead.at(end))) {
--end;
}
// rec.setAlignmentEnd(end+1); not supported
int n = start;
StringBuilder dna = new StringBuilder();
final List<CigarElement> cigars = new ArrayList<>();
while (n <= end) {
if (!Character.isLetter(shortRead.at(n))) {
cigars.add(new CigarElement(1, CigarOperator.D));
} else {
cigars.add(new CigarElement(1, CigarOperator.M));
dna.append(shortRead.at(n));
}
n++;
}
// simplify cigar string
n = 0;
while (n + 1 < cigars.size()) {
CigarElement c1 = cigars.get(n);
CigarElement c2 = cigars.get(n + 1);
if (c1.getOperator().equals(c2.getOperator())) {
cigars.set(n, new CigarElement(c1.getLength() + c2.getLength(), c1.getOperator()));
cigars.remove(n + 1);
} else {
++n;
}
}
rec.setReadPairedFlag(false);
rec.setMappingQuality(60);
rec.setReferenceName(REF);
rec.setReadString(dna.toString());
rec.setCigar(new Cigar(cigars));
rec.setAttribute("PG", pgr.getId());
w.addAlignment(rec);
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(w);
}
}
use of htsjdk.samtools.DefaultSAMRecordFactory in project jvarkit by lindenb.
the class BlastToSam method convertIterationToSequenceIteration.
private SequenceIteration convertIterationToSequenceIteration(final List<Iteration> stack, final SAMFileHeader header) throws XMLStreamException, JAXBException {
final SequenceIteration sequenceIteration = new SequenceIteration();
if (stack.isEmpty())
return sequenceIteration;
final SAMReadGroupRecord rg1 = header.getReadGroup("g1");
// sequenceIteration.iteration=iter1;
final SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
final StringBuilder readContent = new StringBuilder();
final int iterLength = Integer.parseInt(stack.get(0).getIterationQueryLen());
for (final Iteration iter1 : stack) {
for (final Hit hit : iter1.getIterationHits().getHit()) {
for (final Hsp hsp : hit.getHitHsps().getHsp()) {
for (final BlastHspAlignment.Align a : new BlastHspAlignment(hsp)) {
char c = a.getQueryChar();
if (!Character.isLetter(c))
continue;
final int queryIndex0 = a.getQueryIndex1() - 1;
while (readContent.length() <= queryIndex0) readContent.append('N');
if (readContent.charAt(queryIndex0) == 'N') {
readContent.setCharAt(queryIndex0, c);
} else if (readContent.charAt(queryIndex0) != c) {
throw new IllegalStateException("Expected character '" + readContent.charAt(queryIndex0) + "' but got '" + c + "' at " + queryIndex0 + "\n" + hsp.getHspQseq() + "\n" + hsp.getHspMidline() + "\n" + hsp.getHspHseq() + "\n" + readContent + "\n");
}
}
}
}
}
for (Iteration iter1 : stack) {
for (Hit hit : iter1.getIterationHits().getHit()) {
for (Hsp hsp : hit.getHitHsps().getHsp()) {
SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadUnmappedFlag(false);
rec.setReadName(iter1.getIterationQueryDef());
if (hit.getHitAccession() != null && !hit.getHitAccession().trim().isEmpty() && this.dictionary.getSequence(hit.getHitAccession()) != null) {
rec.setReferenceName(hit.getHitAccession());
} else {
rec.setReferenceName(hit.getHitDef());
}
final SAMSequenceRecord ssr = this.dictionary.getSequence(hit.getHitDef());
if (ssr == null) {
LOG.warn("Hit is not in SAMDictionary " + hit.getHitDef());
rec.setReferenceIndex(-1);
} else {
rec.setReferenceIndex(ssr.getSequenceIndex());
}
final BlastHspAlignment blastHspAlignment = new BlastHspAlignment(hsp);
rec.setReadNegativeStrandFlag(blastHspAlignment.isPlusMinus());
final List<CigarOperator> cigarL = new ArrayList<CigarOperator>();
for (BlastHspAlignment.Align a : blastHspAlignment) {
// System.err.println("##"+a);
if (a.getMidChar() == '|') {
cigarL.add(CigarOperator.EQ);
} else if (a.getMidChar() == ':') {
cigarL.add(CigarOperator.M);
} else if (a.getHitChar() == '-') {
cigarL.add(CigarOperator.I);
} else if (a.getQueryChar() == '-') {
cigarL.add(CigarOperator.D);
} else {
cigarL.add(CigarOperator.X);
}
}
if (cigarL.size() != hsp.getHspMidline().length()) {
throw new IllegalStateException("Boumm");
}
Cigar cigarE = new Cigar();
if (blastHspAlignment.getQueryFrom1() > 1) {
cigarE.add(new CigarElement(blastHspAlignment.getQueryFrom1() - 1, CigarOperator.S));
}
int x = 0;
while (x < cigarL.size()) {
int y = x + 1;
while (y < cigarL.size() && cigarL.get(x) == cigarL.get(y)) {
++y;
}
cigarE.add(new CigarElement(y - x, cigarL.get(x)));
x = y;
}
/* soft clip */
if (blastHspAlignment.getQueryTo1() < readContent.length()) {
cigarE.add(new CigarElement((readContent.length() - blastHspAlignment.getQueryTo1()), CigarOperator.S));
}
/* hard clip */
if (readContent.length() < iterLength) {
cigarE.add(new CigarElement((iterLength - readContent.length()), CigarOperator.H));
}
rec.setCigar(cigarE);
rec.setMappingQuality(40);
rec.setAlignmentStart(Math.min(blastHspAlignment.getHitFrom1(), blastHspAlignment.getHitTo1()));
rec.setAttribute("BB", Float.parseFloat(hsp.getHspBitScore()));
rec.setAttribute("BE", Float.parseFloat(hsp.getHspEvalue()));
rec.setAttribute("BS", Float.parseFloat(hsp.getHspScore()));
rec.setAttribute("NM", Integer.parseInt(hsp.getHspGaps()));
rec.setAttribute("RG", rg1.getId());
// setAlignmentEnd not supported in SAM API
// rec.setAlignmentEnd(Math.max(blastHspAlignment.getHitFrom1(),blastHspAlignment.getHitTo1()));
sequenceIteration.records.add(rec);
}
}
}
if (readContent.length() == 0) {
readContent.append('N');
}
byte[] readBases = readContent.toString().getBytes();
char[] readQuals = new char[readBases.length];
for (int i = 0; i < readBases.length; ++i) {
readQuals[i] = (readBases[i] == 'N' ? '#' : 'J');
}
if (sequenceIteration.records.isEmpty()) {
SAMRecord rec = samRecordFactory.createSAMRecord(header);
rec.setReadName(stack.get(0).getIterationQueryDef());
rec.setReadUnmappedFlag(true);
rec.setAttribute("RG", rg1.getId());
sequenceIteration.records.add(rec);
}
for (SAMRecord rec : sequenceIteration.records) {
rec.setReadString(new String(readBases));
rec.setReadBases(readBases);
rec.setBaseQualityString(new String(readQuals, 0, readQuals.length));
rec.setBaseQualities(htsjdk.samtools.SAMUtils.fastqToPhred(new String(readQuals, 0, readQuals.length)));
}
return sequenceIteration;
}
Aggregations