use of com.github.lindenb.jvarkit.samtools.util.SimplePosition in project jvarkit by lindenb.
the class FindAllCoverageAtPosition method scan.
private void scan(final BufferedReader in, final Set<SimplePosition> mutations) throws Exception {
final String DEFAULT_SAMPLE_NAME = "(undefined)";
String line;
while ((line = in.readLine()) != null) {
if (out.checkError())
break;
if (StringUtils.isBlank(line) || line.startsWith("#"))
continue;
final Path f = Paths.get(line);
if (!Files.exists(f))
continue;
if (!Files.isRegularFile(f))
continue;
if (!Files.isReadable(f))
continue;
final String filename = f.getFileName().toString();
if (filename.endsWith(FileExtensions.CRAM)) {
if (referenceFileFile == null) {
LOG.warn("skipping " + f + " not reference was provided");
continue;
}
final SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(f);
if (dict == null)
continue;
if (!SequenceUtil.areSequenceDictionariesEqual(dict, this.fastaDict)) {
LOG.warn("skipping " + f + " because this is not the same sequence dictionary");
continue;
}
}
if (!(filename.endsWith(FileExtensions.BAM) || filename.endsWith(FileExtensions.CRAM)))
continue;
try {
try (SamReader samReader = this.samReaderFactory.open(f)) {
if (!samReader.hasIndex()) {
LOG.warn("no index for " + f);
continue;
}
final SAMFileHeader header = samReader.getFileHeader();
for (final SimplePosition src : mutations) {
final Map<String, CigarAndBases> sample2count = new TreeMap<String, CigarAndBases>();
for (final SAMReadGroupRecord rg : header.getReadGroups()) {
if (rg != null) {
String sn = this.groupBy.apply(rg);
if (sn != null && !sn.trim().isEmpty()) {
sample2count.put(sn, new CigarAndBases());
}
}
}
if (sample2count.isEmpty()) {
sample2count.put(DEFAULT_SAMPLE_NAME, new CigarAndBases());
}
final SimplePosition m = convertFromSamHeader(f, header, src);
if (m == null)
continue;
try (CloseableIterator<SAMRecord> iter = samReader.query(m.getContig(), Math.max(1, m.getPosition() - this.extend), m.getPosition() + this.extend, false)) {
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (rec.getReadUnmappedFlag())
continue;
if (rec.getMappingQuality() < this.mapq)
continue;
if (this.filter.filterOut(rec))
continue;
final Cigar cigar = rec.getCigar();
if (cigar == null || cigar.isEmpty())
continue;
if (rec.getUnclippedEnd() < m.getPosition())
continue;
if (rec.getUnclippedStart() > m.getPosition())
continue;
if (rec.getReadBases() == SAMRecord.NULL_SEQUENCE)
continue;
final String readString = rec.getReadString().toUpperCase();
final String sampleName = this.groupBy.getPartion(rec, DEFAULT_SAMPLE_NAME);
CigarAndBases counter = sample2count.get(sampleName);
if (counter == null) {
counter = new CigarAndBases();
sample2count.put(sampleName, counter);
}
int ref = rec.getUnclippedStart();
int readPos = 0;
for (int k = 0; k < cigar.numCigarElements() && ref < m.getPosition() + 1; ++k) {
final CigarElement ce = cigar.getCigarElement(k);
final CigarOperator op = ce.getOperator();
switch(op) {
case P:
break;
case I:
{
if (ref == m.getPosition()) {
counter.operators.incr(op);
counter.bases.incr(INSERTION_CHAR);
}
readPos += ce.getLength();
break;
}
case D:
case N:
case M:
case X:
case EQ:
case H:
case S:
{
for (int i = 0; i < ce.getLength(); ++i) {
if (ref == m.getPosition()) {
counter.operators.incr(op);
switch(op) {
case H:
{
if (use_clipped_bases)
counter.bases.incr('N');
break;
}
case S:
{
if (use_clipped_bases)
counter.bases.incr(readString.charAt(readPos));
break;
}
case M:
case X:
case EQ:
counter.bases.incr(readString.charAt(readPos));
break;
case D:
case N:
counter.bases.incr(DELETION_CHAR);
break;
default:
break;
}
}
if (op.consumesReadBases())
++readPos;
ref++;
}
break;
}
default:
throw new RuntimeException("unknown operator:" + op);
}
}
}
}
for (final String sample : sample2count.keySet()) {
final CigarAndBases counter = sample2count.get(sample);
out.print(f);
out.print('\t');
out.print(m.getContig());
out.print('\t');
out.print(m.getPosition());
if (this.indexedFastaSequenceFile != null) {
out.print('\t');
out.print(getReferenceAt(m.getContig(), m.getPosition()));
}
out.print('\t');
out.print(sample);
out.print('\t');
out.print(counter.operators.count(CigarOperator.M) + counter.operators.count(CigarOperator.EQ) + counter.operators.count(CigarOperator.X) + (use_clipped_bases ? counter.operators.count(CigarOperator.H) + counter.operators.count(CigarOperator.S) : 0));
for (final CigarOperator op : CigarOperator.values()) {
out.print('\t');
out.print(counter.operators.count(op));
}
for (char c : BASES_To_PRINT) {
out.print('\t');
out.print(counter.bases.count(c));
}
out.println();
}
}
// end of loop mutations
}
// end of samReader
} catch (final Throwable err) {
LOG.error(err);
throw err;
} finally {
}
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimplePosition in project jvarkit by lindenb.
the class VcfGtfSplitter method testTranscript.
private boolean testTranscript(final Transcript transcript, final VariantContext ctx) {
if (!transcript.overlaps(ctx)) {
if (this.use_upstream) {
final SimplePosition pos = new SimplePosition(transcript.getContig(), transcript.isPositiveStrand() ? transcript.getStart() : transcript.getEnd());
if (ctx.withinDistanceOf(pos, this.xxxxstream_length))
return true;
}
if (this.use_downstream) {
final SimplePosition pos = new SimplePosition(transcript.getContig(), transcript.isPositiveStrand() ? transcript.getEnd() : transcript.getStart());
if (ctx.withinDistanceOf(pos, this.xxxxstream_length))
return true;
}
return false;
}
if (this.use_exon && transcript.hasExon() && transcript.getExons().stream().anyMatch(FEAT -> FEAT.overlaps(ctx)))
return true;
if (this.use_intron && transcript.hasIntron() && transcript.getIntrons().stream().anyMatch(FEAT -> FEAT.overlaps(ctx)))
return true;
if (this.use_cds && transcript.hasCDS() && transcript.getAllCds().stream().anyMatch(FEAT -> FEAT.overlaps(ctx)))
return true;
if (this.use_utr5) {
if (transcript.isPositiveStrand() && transcript.getUTR5().isPresent() && transcript.getUTR5().get().overlaps(ctx))
return true;
if (transcript.isNegativeStrand() && transcript.getUTR3().isPresent() && transcript.getUTR3().get().overlaps(ctx))
return true;
}
if (this.use_utr3) {
if (transcript.isPositiveStrand() && transcript.getUTR3().isPresent() && transcript.getUTR3().get().overlaps(ctx))
return true;
if (transcript.isNegativeStrand() && transcript.getUTR5().isPresent() && transcript.getUTR5().get().overlaps(ctx))
return true;
}
if (this.use_cds_utr5) {
if (transcript.isPositiveStrand() && transcript.getUTR5().isPresent() && transcript.getUTR5().get().getIntervals().stream().anyMatch(FEAT -> FEAT.overlaps(ctx)))
return true;
if (transcript.isNegativeStrand() && transcript.getUTR3().isPresent() && transcript.getUTR3().get().getIntervals().stream().anyMatch(FEAT -> FEAT.overlaps(ctx)))
return true;
}
if (this.use_cds_utr3) {
if (transcript.isPositiveStrand() && transcript.getUTR3().isPresent() && transcript.getUTR3().get().getIntervals().stream().anyMatch(FEAT -> FEAT.overlaps(ctx)))
return true;
if (transcript.isNegativeStrand() && transcript.getUTR5().isPresent() && transcript.getUTR5().get().getIntervals().stream().anyMatch(FEAT -> FEAT.overlaps(ctx)))
return true;
}
if (this.use_stop && transcript.hasCodonStopDefined() && transcript.getCodonStop().get().getBlocks().stream().anyMatch(FEAT -> FEAT.overlaps(ctx)))
return true;
if (this.use_start && transcript.hasCodonStartDefined() && transcript.getCodonStart().get().getBlocks().stream().anyMatch(FEAT -> FEAT.overlaps(ctx)))
return true;
if (this.use_splice && transcript.hasIntron()) {
for (final Intron intron : transcript.getIntrons()) {
final SimpleInterval splice1 = new SimpleInterval(intron.getContig(), intron.getStart() - 1, intron.getStart());
if (ctx.withinDistanceOf(splice1, this.split_length))
return true;
final SimpleInterval splice2 = new SimpleInterval(intron.getContig(), intron.getEnd(), intron.getEnd() + 1);
if (ctx.withinDistanceOf(splice2, this.split_length))
return true;
}
}
return false;
}
use of com.github.lindenb.jvarkit.samtools.util.SimplePosition in project jvarkit by lindenb.
the class FindAVariation method convertFromVcfHeader.
private Set<SimplePosition> convertFromVcfHeader(final String f, final VCFHeader h) {
final SAMSequenceDictionary dict = h.getSequenceDictionary();
if (dict == null || dict.isEmpty()) {
return this.mutations;
}
final Set<SimplePosition> copy = new HashSet<>(this.mutations.size());
final ContigNameConverter ctgCvt = ContigNameConverter.fromOneDictionary(dict);
for (final SimplePosition m : this.mutations) {
final String s = ctgCvt.apply(m.getContig());
if (StringUtils.isBlank(s)) {
LOG.warn("Cannot convert chrom " + m.getContig() + " in " + f);
continue;
}
copy.add(m.renameContig(s));
}
return copy;
}
use of com.github.lindenb.jvarkit.samtools.util.SimplePosition in project jvarkit by lindenb.
the class FindAVariation method doWork.
@Override
public int doWork(final List<String> args) {
BufferedReader r = null;
try {
for (final String f : this.positionFilesList) {
r = IOUtils.openURIForBufferedReading(f);
String line;
while ((line = r.readLine()) != null) {
if (line.isEmpty() || line.startsWith("#"))
continue;
final SimplePosition m = new SimplePosition(line);
this.mutations.add(m);
}
r.close();
}
for (final String s : this.positionsList) {
final SimplePosition m = new SimplePosition(s);
this.mutations.add(m);
}
this.out = super.openPathOrStdoutAsPrintWriter(this.outputFile);
this.out.println("#FILE\tCHROM\tstart\tend\tID\tREF\tsample\ttype\tALLELES\tAD\tDP\tGQ");
if (args.isEmpty()) {
try (BufferedReader br = new BufferedReader(new InputStreamReader(stdin()))) {
br.lines().forEach(L -> scanLine(L));
}
} else if (args.size() == 1 && args.get(0).endsWith(".list")) {
try (BufferedReader br = IOUtils.openPathForBufferedReading(Paths.get(args.get(0)))) {
br.lines().forEach(L -> scanLine(L));
}
} else {
args.forEach(L -> scanLine(L));
}
this.out.flush();
this.out.close();
this.out = null;
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(out);
CloserUtil.close(r);
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimplePosition in project jvarkit by lindenb.
the class FindAllCoverageAtPosition method doWork.
@Override
public int doWork(final List<String> args) {
final Set<SimplePosition> mutations = new TreeSet<>();
if (this.extend < 1) {
LOG.error("-x should be >=1");
return -1;
}
try {
this.samReaderFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
if (this.referenceFileFile != null) {
this.indexedFastaSequenceFile = htsjdk.samtools.reference.ReferenceSequenceFileFactory.getReferenceSequenceFile(this.referenceFileFile);
this.samReaderFactory.referenceSequence(this.referenceFileFile);
this.fastaDict = SequenceDictionaryUtils.extractRequired(this.indexedFastaSequenceFile);
}
this.positionStrs.stream().flatMap(S -> Arrays.stream(S.split("[ \t;]+"))).filter(S -> !StringUtils.isBlank(S)).map(S -> new SimplePosition(S)).forEach(X -> mutations.add(X));
if (this.positionFile != null) {
if (this.positionFile.toString().endsWith(".bed") || this.positionFile.toString().endsWith(".bed.gz")) {
try (BedLineReader br = new BedLineReader(this.positionFile)) {
br.stream().filter(B -> B != null).forEach(bedLine -> {
for (int x = bedLine.getStart(); x <= bedLine.getEnd(); ++x) {
mutations.add(new SimplePosition(bedLine.getContig(), x));
}
});
}
} else {
try (BufferedReader r2 = IOUtils.openPathForBufferedReading(this.positionFile)) {
r2.lines().filter(L -> !StringUtils.isBlank(L)).filter(L -> !L.startsWith("#")).forEach(L -> mutations.add(new SimplePosition(L)));
}
}
}
if (mutations.isEmpty()) {
LOG.fatal("undefined position \'str\'");
return -1;
}
LOG.info("number of mutations " + mutations.size());
this.out = this.openPathOrStdoutAsPrintWriter(this.outputFile);
out.print("#File");
out.print('\t');
out.print("CHROM");
out.print('\t');
out.print("POS");
if (this.indexedFastaSequenceFile != null) {
out.print('\t');
out.print("REF");
}
out.print('\t');
out.print(this.groupBy.name().toUpperCase());
out.print('\t');
out.print("DEPTH");
for (final CigarOperator op : CigarOperator.values()) {
out.print('\t');
out.print(op.name());
}
for (char c : BASES_To_PRINT) {
out.print('\t');
out.print("Base(" + c + ")");
}
out.println();
if (args.isEmpty()) {
try (BufferedReader r = new BufferedReader(new InputStreamReader(stdin()))) {
scan(r, mutations);
}
} else {
for (final String filename : args) {
try (BufferedReader r = IOUtils.openURIForBufferedReading(filename)) {
scan(r, mutations);
}
}
}
this.out.flush();
return 0;
} catch (final Throwable err) {
LOG.severe(err);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
CloserUtil.close(this.out);
}
}
Aggregations