use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class LumpyMoreSamples method doWork.
@Override
public int doWork(final List<String> args) {
VcfIterator r = null;
VariantContextWriter vcw = null;
final Map<String, SamReader> sample2samreaders = new HashMap<>();
try {
r = super.openVcfIterator(oneFileOrNull(args));
final VCFHeader headerIn = r.getHeader();
final SAMSequenceDictionary dict = headerIn.getSequenceDictionary();
if (dict == null) {
LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("input vcf"));
return -1;
}
final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
IOUtil.slurpLines(this.bamFileList).stream().forEach(F -> {
if (F.trim().isEmpty())
return;
final SamReader sr = samReaderFactory.open(SamInputResource.of(F));
final SAMFileHeader samHeader = sr.getFileHeader();
final SAMSequenceDictionary dict2 = samHeader.getSequenceDictionary();
if (dict2 == null) {
throw new JvarkitException.BamDictionaryMissing(F);
}
if (!SequenceUtil.areSequenceDictionariesEqual(dict, dict2)) {
throw new JvarkitException.DictionariesAreNotTheSame(dict, dict2);
}
for (final SAMReadGroupRecord rg : samHeader.getReadGroups()) {
final String sample = rg.getSample();
if (StringUtil.isBlank(sample))
continue;
final SamReader reader = sample2samreaders.get(sample);
if (reader == null) {
sample2samreaders.put(sample, reader);
} else if (reader == sr) {
continue;
} else {
throw new JvarkitException.UserError("more than one sample per bam:" + F);
}
}
});
final Set<String> inVcfSampleNames = new HashSet<>(headerIn.getSampleNamesInOrder());
final Set<String> outVcfSampleNames = new HashSet<>(inVcfSampleNames);
outVcfSampleNames.addAll(sample2samreaders.keySet());
final VCFHeader headerOut = new VCFHeader(headerIn.getMetaDataInInputOrder(), outVcfSampleNames);
final VCFFormatHeaderLine SU2 = new VCFFormatHeaderLine("SU2", 1, VCFHeaderLineType.Integer, "Number of pieces of evidence supporting the variant");
final VCFFormatHeaderLine PE2 = new VCFFormatHeaderLine("PE2", 1, VCFHeaderLineType.Integer, "Number of split reads supporting the variant");
final VCFFormatHeaderLine SR2 = new VCFFormatHeaderLine("SR2", 1, VCFHeaderLineType.Integer, "Number of paired-end reads supporting the variant");
headerOut.addMetaDataLine(SU2);
headerOut.addMetaDataLine(PE2);
headerOut.addMetaDataLine(SR2);
vcw = super.openVariantContextWriter(this.outputFile);
vcw.writeHeader(headerOut);
while (r.hasNext()) {
final VariantContext ctx = r.next();
final StructuralVariantType sttype = ctx.getStructuralVariantType();
if (sttype == null)
continue;
final int tid = dict.getSequenceIndex(ctx.getContig());
final Map<String, Genotype> genotypeMap = new HashMap<>();
ctx.getGenotypes().stream().forEach(G -> genotypeMap.put(G.getSampleName(), G));
for (final String sample : sample2samreaders.keySet()) {
final SamReader samReader = sample2samreaders.get(sample);
final SupportingReads sr = new SupportingReads();
switch(sttype) {
case DEL:
{
int pos = ctx.getStart();
int[] ci = confidenceIntervalPos(ctx);
final QueryInterval left = new QueryInterval(tid, pos - ci[0], pos + ci[1]);
int end = ctx.getEnd();
ci = confidenceIntervalEnd(ctx);
final QueryInterval right = new QueryInterval(tid, end - ci[0], end + ci[1]);
for (final SAMRecord rec : extractSupportingReads(ctx, sample, samReader, new QueryInterval[] { left, right })) {
final Cigar cigar = rec.getCigar();
if (cigar.isLeftClipped()) {
final QueryInterval qi = new QueryInterval(tid, rec.getUnclippedStart(), rec.getStart() - 1);
if (qi.overlaps(left)) {
sr.splitReads++;
if (rec.getReadPairedFlag())
sr.pairedReads++;
}
}
if (cigar.isRightClipped()) {
final QueryInterval qi = new QueryInterval(tid, rec.getEnd() + 1, rec.getUnclippedEnd());
if (qi.overlaps(right)) {
sr.splitReads++;
if (rec.getReadPairedFlag())
sr.pairedReads++;
}
}
}
break;
}
default:
break;
}
final GenotypeBuilder gb;
if (genotypeMap.containsKey(sample)) {
gb = new GenotypeBuilder(genotypeMap.get(sample));
} else {
gb = new GenotypeBuilder(sample, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
}
gb.attribute(SR2.getID(), sr.splitReads);
gb.attribute(PE2.getID(), sr.pairedReads);
gb.attribute(SU2.getID(), 0);
genotypeMap.put(sample, gb.make());
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
// add missing samples.
for (final String sampleName : outVcfSampleNames) {
if (genotypeMap.containsKey(sampleName))
continue;
genotypeMap.put(sampleName, new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).make());
}
vcb.genotypes(genotypeMap.values());
vcw.add(vcb.make());
}
r.close();
r = null;
sample2samreaders.values().stream().forEach(R -> CloserUtil.close(R));
LOG.info("done");
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
}
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class BWAMemDigest method doWork.
@Override
public int doWork(List<String> args) {
IntervalTreeMap<Boolean> ignore = null;
DefaultMemOuput output = new DefaultMemOuput();
final float limitcigar = 0.15f;
SamReader r = null;
try {
r = super.openSamReader(oneFileOrNull(args));
SAMFileHeader header = r.getFileHeader();
if (IGNORE_BED != null) {
LOG.info("open " + IGNORE_BED);
ignore = new IntervalTreeMap<>();
BufferedReader in = IOUtils.openFileForBufferedReading(IGNORE_BED);
String line;
while ((line = in.readLine()) != null) {
if (line.isEmpty() || line.startsWith("#"))
continue;
String[] tokens = line.split("[\t]");
if (tokens.length < 3)
continue;
if (ignore.put(new Interval(tokens[0], Math.max(1, Integer.parseInt(tokens[1]) - (1 + IGNORE_EXTEND)), Integer.parseInt(tokens[2]) + IGNORE_EXTEND), Boolean.TRUE)) {
LOG.warn("BED:ignoring " + line);
}
}
in.close();
}
OtherCanonicalAlignFactory xPalignFactory = new OtherCanonicalAlignFactory(header);
SAMRecordIterator iter = r.iterator();
long readNum = 0L;
while (iter.hasNext()) {
SAMRecord record = iter.next();
++readNum;
if (!record.getReadPairedFlag())
continue;
if (record.getProperPairFlag())
continue;
if (record.getReadFailsVendorQualityCheckFlag())
continue;
if (record.getDuplicateReadFlag())
continue;
if (record.getReadUnmappedFlag())
continue;
if (ignore != null && ignore.containsOverlapping(new Interval(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd()))) {
LOG.info("ignore " + record);
continue;
}
float countM = 0f;
float countS = 0f;
for (CigarElement c : record.getCigar().getCigarElements()) {
switch(c.getOperator()) {
case M:
countM += c.getLength();
break;
case S:
countS += c.getLength();
break;
default:
break;
}
}
if (countM > 10 && ((countS / countM) > (1f - limitcigar) && (countS / countM) < (1f + limitcigar))) {
output.insertion(record, readNum, countS, countM);
}
for (OtherCanonicalAlign xp : xPalignFactory.getXPAligns(record)) {
if (ignore != null && ignore.containsOverlapping(new Interval(xp.getReferenceName(), xp.getAlignmentStart(), xp.getAlignmentStart()))) {
LOG.info("ignore " + record);
continue;
}
output.xp(record, readNum, xp);
}
if (record.getMateUnmappedFlag()) {
output.orphan(record, readNum);
continue;
}
if (ignore != null && ignore.containsOverlapping(new Interval(record.getMateReferenceName(), record.getMateAlignmentStart(), record.getMateAlignmentStart()))) {
LOG.info("ignore " + record);
continue;
}
if (record.getReferenceIndex() == record.getMateReferenceIndex()) {
output.deletion(record, readNum);
} else {
output.transloc(record, readNum);
}
}
} catch (Exception e) {
e.printStackTrace();
LOG.error(e);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(output);
}
return 0;
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class BamClipToInsertion method doWork.
@Override
public int doWork(final List<String> args) {
SAMRecordIterator iter = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
sfr = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader header1 = sfr.getFileHeader();
if (header1 == null) {
LOG.error("File header missing");
return -1;
}
if (header1.getSortOrder() != SortOrder.coordinate) {
LOG.error("Input is not sorted on coordinate.");
return -1;
}
final SAMFileHeader header2 = header1.clone();
header2.addComment(getProgramName() + ":" + getVersion() + ":" + getProgramCommandLine());
header2.setSortOrder(SortOrder.unsorted);
sfw = this.writingBamArgs.openSAMFileWriter(output, header2, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
iter = sfr.iterator();
String curContig = null;
LinkedList<SamAndCigar> buffer = new LinkedList<>();
for (; ; ) {
SAMRecord rec = null;
if (iter.hasNext()) {
rec = progress.watch(iter.next());
// ignore unmapped reads
if (rec.getReadUnmappedFlag()) {
sfw.addAlignment(rec);
continue;
}
}
if (rec == null || (curContig != null && !curContig.equals(rec.getReferenceName()))) {
for (final SamAndCigar r : buffer) sfw.addAlignment(r.getSAMRecord());
buffer.clear();
// we're done
if (rec == null)
break;
curContig = rec.getReferenceName();
}
final SamAndCigar sac = new SamAndCigar(rec);
if (!sac.containsIorS) {
sfw.addAlignment(rec);
continue;
}
buffer.add(sac);
int i = 0;
while (i < buffer.size()) {
final SamAndCigar ri = buffer.get(i);
if (ri.getSAMRecord().getUnclippedEnd() < rec.getUnclippedStart()) {
for (int j = 0; j < buffer.size(); ++j) {
if (i == j)
continue;
ri.merge(buffer.get(j));
}
sfw.addAlignment(ri.build());
buffer.remove(i);
} else {
++i;
}
}
}
progress.finish();
LOG.info("done");
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class BamTile method doWork.
@Override
public int doWork(final List<String> args) {
SAMRecordIterator iter = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
sfr = openSamReader(oneFileOrNull(args));
final SAMFileHeader header1 = sfr.getFileHeader();
if (header1 == null) {
LOG.error("File header missing");
return -1;
}
if (header1.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
LOG.error("File header not sorted on coordinate");
return -1;
}
final SAMFileHeader header2 = header1.clone();
header2.addComment("BamTile:" + getVersion() + ":" + getProgramCommandLine());
sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header2, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
iter = sfr.iterator();
final LinkedList<SAMRecord> buffer = new LinkedList<>();
for (; ; ) {
SAMRecord rec = null;
if (iter.hasNext()) {
rec = progress.watch(iter.next());
if (rec.getReadUnmappedFlag())
continue;
if (this.filterOut.filterOut(rec))
continue;
if (!buffer.isEmpty()) {
final SAMRecord last = buffer.getLast();
if (this.no_overlap) {
if (last.getReferenceIndex() == rec.getReferenceIndex() && last.getEnd() >= rec.getStart()) {
continue;
}
} else {
if (last.getReferenceIndex() == rec.getReferenceIndex() && last.getAlignmentStart() <= rec.getAlignmentStart() && last.getAlignmentEnd() >= rec.getAlignmentEnd()) {
continue;
}
}
}
}
if (rec == null || (!buffer.isEmpty() && buffer.getLast().getReferenceIndex() != rec.getReferenceIndex())) {
while (!buffer.isEmpty()) {
sfw.addAlignment(buffer.removeFirst());
}
if (rec == null)
break;
}
buffer.add(rec);
if (!this.no_overlap && buffer.size() > 2) {
final int index = buffer.size();
final SAMRecord prev = buffer.get(index - 3);
final SAMRecord curr = buffer.get(index - 2);
final SAMRecord next = buffer.get(index - 1);
if (prev.getAlignmentEnd() >= next.getAlignmentStart() || curr.getAlignmentEnd() <= prev.getAlignmentEnd()) {
buffer.remove(index - 2);
} else if (curr.getAlignmentStart() == prev.getAlignmentStart() && curr.getAlignmentEnd() > prev.getAlignmentEnd()) {
buffer.remove(index - 3);
}
}
while (buffer.size() > 3) {
sfw.addAlignment(buffer.removeFirst());
}
}
progress.finish();
sfw.close();
sfw = null;
iter.close();
iter = null;
sfr.close();
sfr = null;
LOG.info("done");
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class BamToSql method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidxFile == null) {
LOG.error("ref sequence faidx not defined");
return -1;
}
SAMRecordIterator iter = null;
SamReader sfr = null;
PrintWriter out = null;
GenomicSequence genomicSequence = null;
IndexedFastaSequenceFile indexedFastaSequenceFile = null;
args = new ArrayList<String>(IOUtils.unrollFiles(args));
try {
out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.faidxFile);
out.println("CREATE TABLE IF NOT EXISTS SamFile");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("filename TEXT");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS Dictionary");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("name TEXT NOT NULL,");
out.println("length INT NOT NULL,");
out.println("tid INT NOT NULL,");
out.println("samfile_id INT NOT NULL,");
out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS ReadGroup");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("groupId TEXT NOT NULL,");
out.println("sample TEXT NOT NULL,");
out.println("samfile_id INT NOT NULL,");
out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id)");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS Read");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("name TEXT NOT NULL,");
out.println("flag INTEGER NOT NULL,");
if (this.printflag) {
for (final SAMFlag flg : SAMFlag.values()) {
out.println(flg.name() + " INTEGER NOT NULL,");
}
}
out.println("rname TEXT,");
out.println("pos INTEGER,");
out.println("mapq INTEGER NOT NULL,");
out.println("cigar TEXT,");
out.println("rnext TEXT,");
out.println("pnext INTEGER,");
out.println("tlen INTEGER,");
out.println("sequence TEXT NOT NULL,");
out.println("qualities TEXT NOT NULL,");
out.println("samfile_id INT NOT NULL,");
out.println("group_id INT,");
out.println("FOREIGN KEY(samfile_id) REFERENCES SamFile(id),");
out.println("FOREIGN KEY(group_id) REFERENCES ReadGroup(id)");
out.println(");");
out.println("CREATE TABLE IF NOT EXISTS Cigar");
out.println("(");
out.println("id INTEGER PRIMARY KEY,");
out.println("read_pos INT ,");
out.println("read_base TEXT,");
out.println("read_qual INT ,");
out.println("ref_pos INT ,");
out.println("ref_base TEXT,");
out.println("operator TEXT NOT NULL,");
out.println("read_id INT NOT NULL,");
out.println("FOREIGN KEY(read_id) REFERENCES Read(id)");
out.println(");");
out.println("begin transaction;");
int samIndex = 0;
do {
final String inputName;
if (samIndex == 0 && args.isEmpty()) {
sfr = openSamReader(null);
inputName = "<stdin>";
} else {
inputName = args.get(samIndex);
sfr = openSamReader(inputName);
}
final SAMFileHeader header1 = sfr.getFileHeader();
if (header1 == null) {
throw new JvarkitException.FileFormatError("File header missing");
}
final SAMSequenceDictionary dict = header1.getSequenceDictionary();
if (dict == null) {
throw new JvarkitException.DictionaryMissing("No Dictionary in input");
}
final IntervalParser intervalParser = new IntervalParser(dict);
final Interval userInterval;
iter = null;
if (this.regionStr == null || this.regionStr.isEmpty()) {
LOG.warn("You're currently scanning the whole BAM ???!!!");
iter = sfr.iterator();
userInterval = null;
} else {
userInterval = intervalParser.parse(this.regionStr);
if (userInterval == null) {
throw new JvarkitException.UserError("cannot parse interval " + this.regionStr);
}
iter = sfr.query(userInterval.getContig(), userInterval.getStart(), userInterval.getEnd(), false);
}
out.println(String.join(" ", "insert into SamFile(filename) values(", quote(inputName), ");"));
for (int i = 0; i < dict.size(); ++i) {
final SAMSequenceRecord ssr = dict.getSequence(i);
out.println("insert into Dictionary(name,length,tid,samfile_id) select " + quote(inputName) + "," + ssr.getSequenceLength() + "," + i + ",max(id) from SamFile;");
}
for (final SAMReadGroupRecord g : header1.getReadGroups()) {
out.println("insert into ReadGroup(groupId,sample,samfile_id) select " + quote(g.getId()) + "," + quote(g.getSample()) + "," + "max(id) from SamFile;");
}
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header1);
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
final StringBuilder sql = new StringBuilder();
sql.append("insert into Read(" + "name,flag,");
if (this.printflag) {
for (final SAMFlag flg : SAMFlag.values()) {
sql.append(flg.name()).append(",");
}
}
sql.append("rname,pos,mapq,cigar,rnext,pnext,tlen,sequence,qualities,group_id,samfile_id) select ");
sql.append(quote(rec.getReadName())).append(",");
sql.append(rec.getFlags()).append(",");
if (this.printflag) {
for (final SAMFlag flg : SAMFlag.values()) {
sql.append(flg.isSet(rec.getFlags()) ? 1 : 0);
sql.append(",");
}
}
if (rec.getReferenceName() == null || rec.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
sql.append("NULL,NULL");
} else {
sql.append(quote(rec.getReferenceName()));
sql.append(",");
sql.append(rec.getAlignmentStart());
}
sql.append(",");
sql.append(rec.getMappingQuality());
sql.append(",");
// cigar
if (rec.getCigarString() == null || rec.getCigarString().equals(SAMRecord.NO_ALIGNMENT_CIGAR)) {
sql.append("NULL");
} else {
sql.append(quote(rec.getCigarString()));
}
sql.append(",");
// rnext
if (rec.getMateReferenceName() == null || rec.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
sql.append("NULL,NULL");
} else {
sql.append(quote(rec.getMateReferenceName()));
sql.append(",");
sql.append(rec.getMateAlignmentStart());
}
sql.append(",");
// tlen
sql.append(rec.getInferredInsertSize());
sql.append(",");
// sequence
sql.append(quote(rec.getReadString()));
sql.append(",");
// qualities
sql.append(quote(rec.getBaseQualityString()));
sql.append(",");
if (rec.getReadGroup() == null) {
sql.append("NULL");
} else {
sql.append("G.id");
}
sql.append(",F.id FROM SamFile as F");
if (rec.getReadGroup() != null) {
sql.append(" , ReadGroup as G where G.groupId=").append(quote(rec.getReadGroup().getId())).append(" and F.id = G.samfile_id ");
}
sql.append(" ORDER BY F.id DESC LIMIT 1;");
out.println(sql.toString());
if (this.printcigar && !rec.getReadUnmappedFlag() && rec.getCigar() != null) {
if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
int ref = rec.getUnclippedStart();
final byte[] bases = rec.getReadBases();
final byte[] quals = rec.getBaseQualities();
int read = 0;
for (final CigarElement ce : rec.getCigar()) {
final CigarOperator op = ce.getOperator();
if (op.equals(CigarOperator.P))
continue;
for (int i = 0; i < ce.getLength(); ++i) {
sql.setLength(0);
boolean in_user_interval = true;
sql.append("insert into Cigar(operator,read_pos,read_base,read_qual,ref_pos,ref_base,read_id) ");
sql.append("select '");
sql.append(op.name());
sql.append("',");
if (userInterval != null && !(rec.getReferenceName().equals(userInterval.getContig()) && ref >= userInterval.getStart() && ref <= userInterval.getEnd())) {
in_user_interval = false;
}
switch(op) {
case I:
{
sql.append(read);
sql.append(",");
sql.append("'" + (char) bases[read] + "',");
sql.append("" + quals[read] + "");
sql.append(",");
sql.append("NULL,NULL");
read++;
break;
}
case D:
case N:
case // yes H (hard clip)
H:
{
sql.append("NULL,NULL,NULL,");
sql.append(ref);
sql.append(",'");
sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
sql.append("'");
ref++;
break;
}
case M:
case X:
case EQ:
case // yes S, soft clip
S:
{
sql.append(read);
sql.append(",");
sql.append("'" + (char) bases[read] + "',");
sql.append("" + quals[read] + "");
sql.append(",");
sql.append(ref);
sql.append(",'");
sql.append((ref < 1 || ref - 1 >= genomicSequence.length()) ? '*' : genomicSequence.charAt(ref - 1));
sql.append("'");
ref++;
read++;
break;
}
default:
throw new IllegalStateException();
}
sql.append(", id from Read ORDER BY id DESC LIMIT 1;");
if (in_user_interval)
out.println(sql.toString());
}
}
}
}
iter.close();
iter = null;
sfr.close();
sfr = null;
progress.finish();
samIndex++;
} while (samIndex < args.size());
out.println("COMMIT;");
out.flush();
out.close();
LOG.info("done");
return 0;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(sfr);
CloserUtil.close(out);
CloserUtil.close(indexedFastaSequenceFile);
}
}
Aggregations