use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class StructuralVariantComparator method testBNDDistance.
private boolean testBNDDistance(final Locatable a, final Locatable b) {
final Function<Locatable, Locatable> bnd2interval = LOC -> {
if (!(LOC instanceof VariantContext)) {
return new SimplePosition(LOC.getContig(), LOC.getStart());
}
final VariantContext CTX = VariantContext.class.cast(LOC);
// can be negative
int extend_5 = 0;
// can be negative
int extend_3 = 0;
if (CTX.hasAttribute("CIPOS")) {
try {
// can be negative
final List<Integer> xList = CTX.getAttributeAsIntList("CIPOS", 0);
if (xList.size() == 2) {
extend_5 = xList.get(0).intValue();
extend_3 = xList.get(1).intValue();
if (extend_5 > 0)
extend_5 = 0;
if (extend_3 < 0)
extend_3 = 0;
}
} catch (final Throwable err) {
extend_5 = 0;
extend_3 = 0;
}
}
return new SimpleInterval(CTX.getContig(), CTX.getStart() + extend_5, /* can be negative */
CTX.getEnd() + extend_3);
};
// we use SimplePosition because getEnd() can be large when the BND is on the same chromosome
return this.withinDistanceOf(bnd2interval.apply(a), bnd2interval.apply(b), this.bnd_max_distance);
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class SlidingWindowIterator method advance.
@Override
protected Entry<? extends Locatable, List<V>> advance() {
for (; ; ) {
if (!to_be_released.isEmpty()) {
final Window<V> w = to_be_released.pollFirst();
return new AbstractMap.SimpleEntry<>(w.interval, w.variants);
}
final V ctx = this.delegate.hasNext() ? this.delegate.next() : null;
// new contig?
if (ctx == null || !ctx.getContig().equals(prevContig)) {
to_be_released.addAll(this.buffer);
// EOF
if (ctx == null && to_be_released.isEmpty())
return null;
interval2win.clear();
buffer.clear();
}
// because to_be_released might be not empty
if (ctx == null)
continue;
this.prevContig = ctx.getContig();
// remove previous windows
int i = 0;
while (i < buffer.size()) {
final Window<V> w = buffer.get(i);
if (w.getEnd() < ctx.getStart()) {
to_be_released.add(w);
buffer.remove(i);
interval2win.remove(w.interval);
} else {
i++;
}
}
prevContig = ctx.getContig();
int x1 = ctx.getStart() - ctx.getStart() % this.window_shift;
while (x1 - this.window_shift + this.window_size >= ctx.getStart()) {
x1 -= this.window_shift;
}
for (; ; ) {
final SimpleInterval r = new SimpleInterval(ctx.getContig(), Math.max(1, x1), Math.max(1, x1 + this.window_size));
if (r.getStart() > ctx.getEnd())
break;
if (r.overlaps(ctx)) {
Window<V> w = interval2win.get(r);
if (w == null) {
w = new Window<V>(r);
interval2win.put(r, w);
buffer.add(w);
}
w.variants.add(ctx);
}
x1 += this.window_shift;
}
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class FindVariantInSamRecord method find.
public Match find(final SAMRecord record, final VariantContext ctx) {
final MatchImpl match = new MatchImpl(record, ctx);
final Locatable recloc = isUseClip() ? new SimpleInterval(record.getContig(), record.getUnclippedStart(), record.getUnclippedEnd()) : record;
if (!ctx.overlaps(recloc)) {
return match;
}
final byte[] bases = record.getReadBases();
if (bases == null || bases == SAMRecord.NULL_SEQUENCE) {
return match;
}
final Cigar cigar = record.getCigar();
if (cigar == null || cigar.isEmpty()) {
return match;
}
if (!isValidAllele(ctx.getReference())) {
return match;
}
final List<Allele> alts = ctx.getAlternateAlleles().stream().filter(A -> isValidAllele(A)).collect(Collectors.toList());
if (alts.isEmpty()) {
return match;
}
final int ref_allele_len = ctx.getReference().length();
final List<NormalizedDelVariant> dels = alts.stream().filter(A -> A.length() < ref_allele_len).map(A -> new NormalizedDelVariant(ctx, A)).collect(Collectors.toList());
final List<NormalizedInsVariant> ins = alts.stream().filter(A -> A.length() > ref_allele_len).map(A -> new NormalizedInsVariant(ctx, A)).collect(Collectors.toList());
final List<Allele> subst = new ArrayList<>(ctx.getNAlleles());
// add ref
subst.add(ctx.getReference());
// add alleles having same length as ref
subst.addAll(alts.stream().filter(A -> A.length() == ref_allele_len).collect(Collectors.toList()));
int read0 = 0;
int ref1 = record.getUnclippedStart();
for (final CigarElement ce : cigar) {
if (ref1 > ctx.getEnd())
break;
final CigarOperator op = ce.getOperator();
final int clen = ce.getLength();
switch(op) {
case P:
break;
case H:
ref1 += clen;
break;
case D:
case N:
{
// ALT AAAA<-- len -->
for (final NormalizedDelVariant norm : dels) {
// System.err.println("ref1:"+ref1+"=="+norm.refpos+" ctx.start="+ctx.getContig()+":"+ctx.getStart()+":"+ctx.getReference()+" norm.pos="+norm.refpos+" norm.size:"+norm.del_size+"=="+clen+" alt:"+norm.alt+" "+record.getStart()+":"+record.getCigarString());
if (clen == norm.del_size && norm.refpos == ref1) {
// System.err.println("OK");
match.allele = Optional.of(norm.alt);
match.read_pos = read0;
return match;
}
}
ref1 += clen;
break;
}
case I:
{
// ALT AAAAAAAAAAAAAAA
for (final NormalizedInsVariant norm : ins) {
// System.err.println("INS ref1:"+ref1+"=="+norm.refpos+" ctx.start="+ctx.getContig()+":"+ctx.getStart()+":"+ctx.getReference()+" norm.pos="+norm.refpos+" norm.size:"+norm.size()+"=="+clen+" alt:"+norm.alt+" "+record.getStart()+":"+record.getCigarString());
if (clen == norm.size() && norm.refpos == ref1) {
int j = 0;
for (j = 0; j < clen; ++j) {
int b1x = read0 + j;
if (b1x >= bases.length)
break;
// read base
final byte b1 = bases[b1x];
// allele base
final byte b2 = norm.at(j);
if (b1 != b2) {
break;
}
}
if (j == clen) {
match.allele = Optional.of(norm.alt);
return match;
}
}
}
read0 += clen;
break;
}
case M:
case X:
case EQ:
case S:
{
if (op.equals(CigarOperator.S) && !isUseClip()) {
read0 += clen;
ref1 += clen;
break;
}
for (int x = 0; x + ref_allele_len <= clen; ++x) {
if (ref1 + x != ctx.getStart())
continue;
for (final Allele a : subst) {
final byte[] allele_bases = a.getBases();
if (allele_bases.length != ref_allele_len)
throw new IllegalStateException();
int j = 0;
for (j = 0; j < ref_allele_len; ++j) {
// read base
final byte b1 = bases[read0 + x + j];
// allele base
final byte b2 = allele_bases[j];
if (b1 != b2) {
break;
}
}
if (j == ref_allele_len) {
match.allele = Optional.of(a);
match.read_pos = read0 + x;
return match;
}
}
}
read0 += clen;
ref1 += clen;
break;
}
default:
throw new IllegalArgumentException(op.toString());
}
}
return match;
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class VcfCadd method query.
private List<Record> query(final VariantContext loc) {
final String chromCadd = this.convertToCaddContigs.apply(loc.getContig());
if (StringUtils.isBlank(chromCadd))
return Collections.emptyList();
final Locatable query = new SimpleInterval(chromCadd, loc.getStart(), loc.getEnd());
if (this.lastInterval == null || !this.lastInterval.contains(query)) {
this.buffer.clear();
this.lastInterval = new SimpleInterval(chromCadd, Math.max(0, loc.getStart() - 1), Math.max(loc.getEnd(), loc.getStart() + this.buffer_distance));
Iterator<String> iter = this.tabix.iterator(this.lastInterval.getContig(), this.lastInterval.getStart(), this.lastInterval.getEnd());
while (iter.hasNext()) {
final String line = iter.next();
if (line.startsWith("#") || StringUtil.isBlank(line))
continue;
final List<String> tokens = TAB.splitAsStringList(line);
if (!AcidNucleics.isATGC(tokens.get(2))) {
LOG.warn("REF allele not suitable in line " + line + ". skipping");
continue;
}
if (!AcidNucleics.isATGC(tokens.get(this.column_index_for_Alt))) {
LOG.warn("ALT allele not suitable in line " + line + ". skipping");
continue;
}
if (!tokens.get(0).equals(chromCadd))
throw new IllegalStateException("Expected CADD contig " + chromCadd + " in " + line);
final Record rec = new Record(loc.getContig(), tokens);
this.buffer.add(rec);
}
}
return this.buffer.stream().filter(R -> R.getStart() == loc.getStart() && R.ref.equals(loc.getReference()) && loc.getAlternateAlleles().contains(R.alt)).collect(Collectors.toList());
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class CompareBams method doWork.
@Override
public int doWork(final List<String> args) {
this.inputBamsList.addAll(IOUtils.unrollPaths(args));
SortingCollection<Match> database = null;
SamReader samFileReader = null;
CloseableIterator<Match> iter = null;
try {
if (this.inputBamsList.size() < 2) {
LOG.error("Need more bams please, got " + this.inputBamsList.size());
return -1;
}
database = SortingCollection.newInstance(Match.class, new MatchCodec(), (A, B) -> matchCompare0(A, B), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
this.samSequenceDictAreTheSame = true;
database.setDestructiveIteration(true);
for (int currentSamFileIndex = 0; currentSamFileIndex < this.inputBamsList.size(); currentSamFileIndex++) {
final Path samFile = this.inputBamsList.get(currentSamFileIndex);
samFileReader = super.createSamReaderFactory().referenceSequence(this.refPath).open(samFile);
final SAMSequenceDictionary dict = samFileReader.getFileHeader().getSequenceDictionary();
if (dict == null || dict.isEmpty()) {
LOG.error("Empty Dict in " + samFile);
return -1;
}
if (!this.sequenceDictionaries.isEmpty() && !SequenceUtil.areSequenceDictionariesEqual(this.sequenceDictionaries.get(0), dict)) {
this.samSequenceDictAreTheSame = false;
LOG.warn("FOOL !! THE SEQUENCE DICTIONARIES ARE **NOT** THE SAME. I will try to compare anyway but it will be slower.");
}
this.sequenceDictionaries.add(dict);
final Optional<SimpleInterval> interval;
if (!StringUtils.isBlank(this.REGION)) {
interval = IntervalParserFactory.newInstance().dictionary(dict).make().apply(this.REGION);
if (!interval.isPresent()) {
LOG.error("Cannot parse " + REGION + " (bad syntax or not in dictionary)");
return -1;
}
} else {
interval = Optional.empty();
}
SAMRecordIterator it = null;
if (!interval.isPresent()) {
it = samFileReader.iterator();
} else {
it = samFileReader.queryOverlapping(interval.get().getContig(), interval.get().getStart(), interval.get().getEnd());
}
final ProgressFactory.Watcher<SAMRecord> progress = ProgressFactory.newInstance().dictionary(dict).logger(LOG).build();
while (it.hasNext()) {
final SAMRecord rec = progress.apply(it.next());
if (!rec.getReadUnmappedFlag()) {
if (rec.getMappingQuality() < this.min_mapq)
continue;
if (!this.no_read_filtering && rec.isSecondaryOrSupplementary())
continue;
}
final Match m = new Match();
if (rec.getReadPairedFlag()) {
m.num_in_pair = (rec.getFirstOfPairFlag() ? 1 : 2);
} else {
m.num_in_pair = 0;
}
m.readName = rec.getReadName();
m.bamIndex = currentSamFileIndex;
m.flag = rec.getFlags();
m.cigar = rec.getCigarString();
if (m.cigar == null)
m.cigar = "";
if (rec.getReadUnmappedFlag()) {
m.tid = -1;
m.pos = -1;
} else {
m.tid = rec.getReferenceIndex();
m.pos = rec.getAlignmentStart();
}
database.add(m);
}
progress.close();
it.close();
samFileReader.close();
samFileReader = null;
}
database.doneAdding();
LOG.info("Writing results....");
this.out = super.openPathOrStdoutAsPrintWriter(this.outputFile);
// compute the differences for each read
this.out.print("#READ-Name\t");
for (int x = 0; x < this.inputBamsList.size(); ++x) {
for (int y = x + 1; y < this.inputBamsList.size(); ++y) {
if (!(x == 0 && y == 1))
this.out.print("|");
this.out.print(inputBamsList.get(x));
this.out.print(" ");
this.out.print(inputBamsList.get(y));
}
}
for (int x = 0; x < this.inputBamsList.size(); ++x) {
this.out.print("\t" + inputBamsList.get(x));
}
this.out.println();
/* create an array of set<Match> */
final Comparator<Match> match_comparator = (A, B) -> matchCompare1(A, B);
final List<Set<Match>> matches = new ArrayList<Set<CompareBams.Match>>(this.inputBamsList.size());
while (matches.size() < this.inputBamsList.size()) {
matches.add(new TreeSet<CompareBams.Match>(match_comparator));
}
iter = database.iterator();
String currReadName = null;
int curr_num_in_pair = -1;
for (; ; ) {
Match nextMatch = null;
if (iter.hasNext()) {
nextMatch = iter.next();
}
if (nextMatch == null || (currReadName != null && !currReadName.equals(nextMatch.readName)) || (curr_num_in_pair != -1 && curr_num_in_pair != nextMatch.num_in_pair)) {
if (currReadName != null) {
this.out.print(currReadName);
if (curr_num_in_pair > 0) {
this.out.print("/");
this.out.print(curr_num_in_pair);
}
this.out.print("\t");
for (int x = 0; x < this.inputBamsList.size(); ++x) {
final Set<Match> first = matches.get(x);
for (int y = x + 1; y < this.inputBamsList.size(); ++y) {
if (!(x == 0 && y == 1))
this.out.print("|");
final Set<Match> second = matches.get(y);
if (same(first, second)) {
this.out.print("EQ");
} else {
this.out.print("NE");
}
}
}
for (int x = 0; x < this.inputBamsList.size(); ++x) {
this.out.print("\t");
print(matches.get(x), sequenceDictionaries.get(x));
}
this.out.println();
}
if (nextMatch == null)
break;
for (final Set<Match> set : matches) set.clear();
}
currReadName = nextMatch.readName;
curr_num_in_pair = nextMatch.num_in_pair;
matches.get(nextMatch.bamIndex).add(nextMatch);
if (this.out.checkError())
break;
}
iter.close();
this.out.flush();
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
if (database != null)
database.cleanup();
CloserUtil.close(samFileReader);
CloserUtil.close(this.out);
this.out = null;
}
}
Aggregations