use of htsjdk.samtools.SAMFlag in project jvarkit by lindenb.
the class BamStats02 method doWork.
@Override
public int doWork(List<String> args) {
SamReader samFileReader = null;
PrintWriter out = null;
try {
if (bedFile != null) {
LOG.info("Reading BED file " + bedFile);
this.intervals = super.readBedFileAsBooleanIntervalTreeMap(bedFile);
}
out = super.openFileOrStdoutAsPrintWriter(outputFile);
boolean first = true;
out.print("#");
for (final STRING_PROPS p : STRING_PROPS.values()) {
if (!first)
out.print("\t");
first = false;
out.print(p.name());
}
for (final INT_PROPS p : INT_PROPS.values()) {
out.print("\t");
out.print(p.name());
}
for (final SAMFlag flg : SAMFlag.values()) {
out.print("\t");
out.print(flg.name());
}
out.print("\t");
out.print("count");
out.println();
final SamReaderFactory srf = super.createSamReaderFactory();
if (args.isEmpty()) {
LOG.info("Reading from stdin");
samFileReader = srf.open(SamInputResource.of(stdin()));
run("stdin", samFileReader, out);
samFileReader.close();
samFileReader = null;
} else {
for (final String filename : IOUtils.unrollFiles(args)) {
LOG.info("Reading from " + filename);
samFileReader = srf.open(new File(filename));
run(filename, samFileReader, out);
samFileReader.close();
samFileReader = null;
}
}
out.flush();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(samFileReader);
CloserUtil.close(out);
}
}
use of htsjdk.samtools.SAMFlag in project jvarkit by lindenb.
the class Biostar59647 method doWork.
@Override
public int doWork(final List<String> args) {
IndexedFastaSequenceFile indexedFastaSequenceFile = null;
SamReader samFileReader = null;
PrintStream pout;
try {
GenomicSequence genomicSequence = null;
indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile);
samFileReader = null;
final String bamFile = oneFileOrNull(args);
samFileReader = super.openSamReader(bamFile);
if (!SequenceUtil.areSequenceDictionariesEqual(indexedFastaSequenceFile.getSequenceDictionary(), samFileReader.getFileHeader().getSequenceDictionary())) {
LOG.warning("Not the same sequence dictionaries");
}
final XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
pout = (outputFile == null ? stdout() : new PrintStream(this.outputFile));
final XMLStreamWriter w = xmlfactory.createXMLStreamWriter(pout, "UTF-8");
w.writeStartDocument("UTF-8", "1.0");
w.writeStartElement("sam");
w.writeAttribute("bam", (bamFile == null ? "stdin" : bamFile));
w.writeAttribute("ref", refFile.getPath());
w.writeComment(getProgramCommandLine());
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(samFileReader.getFileHeader().getSequenceDictionary());
final SAMRecordIterator iter = samFileReader.iterator();
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
progess.watch(rec);
final byte[] readbases = rec.getReadBases();
w.writeStartElement("read");
w.writeStartElement("name");
w.writeCharacters(rec.getReadName());
w.writeEndElement();
w.writeStartElement("sequence");
w.writeCharacters(new String(readbases));
w.writeEndElement();
w.writeStartElement("flags");
for (SAMFlag f : SAMFlag.values()) {
w.writeAttribute(f.name(), String.valueOf(f.isSet(rec.getFlags())));
}
w.writeCharacters(String.valueOf(rec.getFlags()));
// flags
w.writeEndElement();
if (!rec.getReadUnmappedFlag()) {
w.writeStartElement("qual");
w.writeCharacters(String.valueOf(rec.getMappingQuality()));
w.writeEndElement();
w.writeStartElement("chrom");
w.writeAttribute("index", String.valueOf(rec.getReferenceIndex()));
w.writeCharacters(rec.getReferenceName());
w.writeEndElement();
w.writeStartElement("pos");
w.writeCharacters(String.valueOf(rec.getAlignmentStart()));
w.writeEndElement();
w.writeStartElement("cigar");
w.writeCharacters(rec.getCigarString());
w.writeEndElement();
}
if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
w.writeStartElement("mate-chrom");
w.writeAttribute("index", String.valueOf(rec.getMateReferenceIndex()));
w.writeCharacters(rec.getMateReferenceName());
w.writeEndElement();
w.writeStartElement("mate-pos");
w.writeCharacters(String.valueOf(rec.getMateAlignmentStart()));
w.writeEndElement();
}
if (!rec.getReadUnmappedFlag() && rec.getCigar() != null) {
if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
w.writeStartElement("align");
int readIndex = 0;
int refIndex = rec.getAlignmentStart();
for (final CigarElement e : rec.getCigar().getCigarElements()) {
switch(e.getOperator()) {
// ignore hard clips
case H:
break;
// ignore pads
case P:
break;
// cont.
case I:
case S:
{
final int length = e.getLength();
for (int i = 0; i < length; ++i) {
w.writeEmptyElement(e.getOperator().name());
w.writeAttribute("read-index", String.valueOf(readIndex + 1));
if (readIndex >= 0 && readIndex < readbases.length) {
w.writeAttribute("read-base", String.valueOf((char) (readbases[readIndex])));
}
readIndex++;
}
break;
}
// cont. -- reference skip
case N:
case D:
{
final int length = e.getLength();
for (int i = 0; i < length; ++i) {
w.writeEmptyElement(e.getOperator().name());
w.writeAttribute("ref-index", String.valueOf(refIndex));
if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
w.writeAttribute("ref-base", String.valueOf(genomicSequence.charAt(refIndex - 1)));
}
refIndex++;
}
break;
}
case M:
case EQ:
case X:
{
final int length = e.getLength();
for (int i = 0; i < length; ++i) {
w.writeEmptyElement(e.getOperator().name());
char baseRead = '\0';
if (readIndex >= 0 && readIndex < readbases.length) {
baseRead = (char) (rec.getReadBases()[readIndex]);
w.writeAttribute("read-index", String.valueOf(readIndex + 1));
w.writeAttribute("read-base", String.valueOf(baseRead));
}
w.writeAttribute("ref-index", String.valueOf(refIndex));
if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
char baseRef = genomicSequence.charAt(refIndex - 1);
w.writeAttribute("ref-base", String.valueOf(baseRef));
if (Character.toUpperCase(baseRef) != Character.toUpperCase(baseRead)) {
w.writeAttribute("mismatch", "true");
}
}
refIndex++;
readIndex++;
}
break;
}
default:
throw new IllegalStateException("Case statement didn't deal with cigar op: " + e.getOperator());
}
}
w.writeEndElement();
}
w.writeEndElement();
}
iter.close();
w.writeEndElement();
w.writeEndDocument();
w.flush();
pout.flush();
CloserUtil.close(w);
CloserUtil.close(pout);
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(samFileReader);
CloserUtil.close(indexedFastaSequenceFile);
}
return 0;
}
use of htsjdk.samtools.SAMFlag in project jvarkit by lindenb.
the class BamToSql method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidxFile == null) {
LOG.error("ref sequence faidx not defined");
return -1;
}
SAMRecordIterator iter = null;
SamReader sfr = null;
PrintWriter out = null;
GenomicSequence genomicSequence = null;
IndexedFastaSequenceFile indexedFastaSequenceFile = null;
args = new ArrayList<String>(IOUtils.unrollFiles(args));
try {
out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidxFile);
out.println("CREATE TABLE IF NOT EXISTS SamFile");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("filename TEXT");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS Dictionary");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("name TEXT NOT NULL,");
out.println("length INT NOT NULL,");
out.println("tid INT NOT NULL,");
out.println("samfile_id INT NOT NULL,");
out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS ReadGroup");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("groupId TEXT NOT NULL,");
out.println("sample TEXT NOT NULL,");
out.println("samfile_id INT NOT NULL,");
out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS Read");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("name TEXT NOT NULL,");
out.println("flag INTEGER NOT NULL,");
if (this.printflag) {
for (final SAMFlag flg : SAMFlag.values()) {
out.println(flg.name() + " INTEGER NOT NULL,");
}
}
out.println("rname TEXT,");
out.println("pos INTEGER,");
out.println("mapq INTEGER NOT NULL,");
out.println("cigar TEXT,");
out.println("rnext TEXT,");
out.println("pnext INTEGER,");
out.println("tlen INTEGER,");
out.println("sequence TEXT NOT NULL,");
out.println("qualities TEXT NOT NULL,");
out.println("samfile_id INT NOT NULL,");
out.println("group_id INT,");
out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id),");
out.println("FOREIGN KEY(group_id) REFERENCES ReadGroup(id)");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS Cigar");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("read_pos INT ,");
out.println("read_base TEXT,");
out.println("read_qual INT ,");
out.println("ref_pos INT ,");
out.println("ref_base TEXT,");
out.println("operator TEXT NOT NULL,");
out.println("read_id INT NOT NULL,");
out.println("FOREIGN KEY(read_id) REFERENCES Read(id)");
out.println(");");
out.println("begin transaction;");
int samIndex = 0;
do {
final String inputName;
if (samIndex == 0 && args.isEmpty()) {
sfr = openSamReader(null);
inputName = "<stdin>";
} else {
inputName = args.get(samIndex);
sfr = openSamReader(inputName);
}
final SAMFileHeader header1 = sfr.getFileHeader();
if (header1 == null) {
throw new JvarkitException.FileFormatError("File header missing");
}
final SAMSequenceDictionary dict = header1.getSequenceDictionary();
if (dict == null) {
throw new JvarkitException.DictionaryMissing("No Dictionary in input");
}
final IntervalParser intervalParser = new IntervalParser(dict);
final Interval userInterval;
iter = null;
if (this.regionStr == null || this.regionStr.isEmpty()) {
LOG.warn("You're currently scanning the whole BAM ???!!!");
iter = sfr.iterator();
userInterval = null;
} else {
userInterval = intervalParser.parse(this.regionStr);
if (userInterval == null) {
throw new JvarkitException.UserError("cannot parse interval " + this.regionStr);
}
iter = sfr.query(userInterval.getContig(), userInterval.getStart(), userInterval.getEnd(), false);
}
out.println(String.join(" ", "insert into SamFile(filename) values(", quote(inputName), ");"));
for (int i = 0; i < dict.size(); ++i) {
final SAMSequenceRecord ssr = dict.getSequence(i);
out.println("insert into Dictionary(name,length,tid,samfile_id) select " + quote(inputName) + "," + ssr.getSequenceLength() + "," + i + ",max(id) from SamFile;");
}
for (final SAMReadGroupRecord g : header1.getReadGroups()) {
out.println("insert into ReadGroup(groupId,sample,samfile_id) select " + quote(g.getId()) + "," + quote(g.getSample()) + "," + "max(id) from SamFile;");
}
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
final StringBuilder sql = new StringBuilder();
sql.append("insert into Read(" + "name,flag,");
if (this.printflag) {
for (final SAMFlag flg : SAMFlag.values()) {
sql.append(flg.name()).append(",");
}
}
sql.append("rname,pos,mapq,cigar,rnext,pnext,tlen,sequence,qualities,group_id,samfile_id) select ");
sql.append(quote(rec.getReadName())).append(",");
sql.append(rec.getFlags()).append(",");
if (this.printflag) {
for (final SAMFlag flg : SAMFlag.values()) {
sql.append(flg.isSet(rec.getFlags()) ? 1 : 0);
sql.append(",");
}
}
if (rec.getReferenceName() == null || rec.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
sql.append("NULL,NULL");
} else {
sql.append(quote(rec.getReferenceName()));
sql.append(",");
sql.append(rec.getAlignmentStart());
}
sql.append(",");
sql.append(rec.getMappingQuality());
sql.append(",");
// cigar
if (rec.getCigarString() == null || rec.getCigarString().equals(SAMRecord.NO_ALIGNMENT_CIGAR)) {
sql.append("NULL");
} else {
sql.append(quote(rec.getCigarString()));
}
sql.append(",");
// rnext
if (rec.getMateReferenceName() == null || rec.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
sql.append("NULL,NULL");
} else {
sql.append(quote(rec.getMateReferenceName()));
sql.append(",");
sql.append(rec.getMateAlignmentStart());
}
sql.append(",");
// tlen
sql.append(rec.getInferredInsertSize());
sql.append(",");
// sequence
sql.append(quote(rec.getReadString()));
sql.append(",");
// qualities
sql.append(quote(rec.getBaseQualityString()));
sql.append(",");
if (rec.getReadGroup() == null) {
sql.append("NULL");
} else {
sql.append("G.id");
}
sql.append(",F.id FROM SamFile as F");
if (rec.getReadGroup() != null) {
sql.append(" , ReadGroup as G where G.groupId=").append(quote(rec.getReadGroup().getId())).append(" and F.id = G.samfile_id ");
}
sql.append(" ORDER BY F.id DESC LIMIT 1;");
out.println(sql.toString());
if (this.printcigar && !rec.getReadUnmappedFlag() && rec.getCigar() != null) {
if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
int ref = rec.getUnclippedStart();
final byte[] bases = rec.getReadBases();
final byte[] quals = rec.getBaseQualities();
int read = 0;
for (final CigarElement ce : rec.getCigar()) {
final CigarOperator op = ce.getOperator();
if (op.equals(CigarOperator.P))
continue;
for (int i = 0; i < ce.getLength(); ++i) {
sql.setLength(0);
boolean in_user_interval = true;
sql.append("insert into Cigar(operator,read_pos,read_base,read_qual,ref_pos,ref_base,read_id) ");
sql.append("select '");
sql.append(op.name());
sql.append("',");
if (userInterval != null && !(rec.getReferenceName().equals(userInterval.getContig()) && ref >= userInterval.getStart() && ref <= userInterval.getEnd())) {
in_user_interval = false;
}
switch(op) {
case I:
{
sql.append(read);
sql.append(",");
sql.append("'" + (char) bases[read] + "',");
sql.append("" + quals[read] + "");
sql.append(",");
sql.append("NULL,NULL");
read++;
break;
}
case D:
case N:
case // yes H (hard clip)
H:
{
sql.append("NULL,NULL,NULL,");
sql.append(ref);
sql.append(",'");
sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
sql.append("'");
ref++;
break;
}
case M:
case X:
case EQ:
case // yes S, soft clip
S:
{
sql.append(read);
sql.append(",");
sql.append("'" + (char) bases[read] + "',");
sql.append("" + quals[read] + "");
sql.append(",");
sql.append(ref);
sql.append(",'");
sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
sql.append("'");
ref++;
read++;
break;
}
default:
throw new IllegalStateException();
}
sql.append(", id from Read ORDER BY id DESC LIMIT 1;");
if (in_user_interval)
out.println(sql.toString());
}
}
}
}
iter.close();
iter = null;
sfr.close();
sfr = null;
progress.finish();
samIndex++;
} while (samIndex < args.size());
out.println("COMMIT;");
out.flush();
out.close();
LOG.info("done");
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(out);
CloserUtil.close(indexedFastaSequenceFile);
}
}
use of htsjdk.samtools.SAMFlag in project jvarkit by lindenb.
the class BamStage method makeFlagPredicate.
/**
* build a Predicate for filtering on SAM FLAG using the checkboxes
*/
private Predicate<SAMRecord> makeFlagPredicate() {
java.util.function.Predicate<SAMRecord> recFilter = x -> true;
for (final SAMFlag flag : this.flag2filterInMenuItem.keySet()) {
CheckMenuItem cbox = this.flag2filterInMenuItem.get(flag);
if (!cbox.isSelected())
continue;
recFilter = recFilter.and(R -> flag.isSet(R.getFlags()));
}
for (final SAMFlag flag : this.flag2filterOutMenuItem.keySet()) {
CheckMenuItem cbox = this.flag2filterOutMenuItem.get(flag);
if (!cbox.isSelected())
continue;
recFilter = recFilter.and(R -> !flag.isSet(R.getFlags()));
}
return recFilter;
}
Aggregations