use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class CoveragePlotter method doWork.
@Override
public int doWork(final List<String> args) {
ArchiveFactory archive = null;
PrintWriter manifest = null;
try {
if (extend < 1.0) {
LOG.error("extend is lower than 1 :" + this.extend);
return -1;
}
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(this.refPath);
final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().referenceSequence(CoveragePlotter.this.refPath).validationStringency(ValidationStringency.LENIENT);
final List<Path> inputBams = IOUtils.unrollPaths(this.bamsPath);
if (inputBams.isEmpty()) {
LOG.error("input bam file missing.");
return -1;
}
Iterator<? extends Locatable> iter;
final String input = oneFileOrNull(args);
if (input == null) {
final BedLineCodec codec = new BedLineCodec();
final LineIterator liter = new LineIterator(stdin());
iter = new AbstractIterator<Locatable>() {
@Override
protected Locatable advance() {
while (liter.hasNext()) {
final String line = liter.next();
final BedLine bed = codec.decode(line);
if (bed == null) {
continue;
}
return bed;
}
liter.close();
return null;
}
};
} else {
iter = IntervalListProvider.from(input).dictionary(dict).stream().iterator();
}
final BufferedImage image = new BufferedImage(this.dimension.width, this.dimension.height, BufferedImage.TYPE_INT_ARGB);
final BufferedImage offscreen = new BufferedImage(this.dimension.width, this.dimension.height, BufferedImage.TYPE_INT_ARGB);
final double y_mid = this.dimension.getHeight() / 2.0;
final ToDoubleFunction<Double> normToPixelY = NORM -> this.dimension.getHeight() - NORM * y_mid;
final DiscreteMedian<Integer> discreteMedian = new DiscreteMedian<>();
manifest = (this.manifestPath == null ? new PrintWriter(new NullOuputStream()) : IOUtils.openPathForPrintWriter(this.manifestPath));
archive = ArchiveFactory.open(this.outputFile);
while (iter.hasNext()) {
final Locatable the_locatable = iter.next();
String label = "";
if (the_locatable instanceof BedLine) {
final BedLine bedline = BedLine.class.cast(the_locatable);
label = bedline.getOrDefault(3, label);
}
final SimpleInterval rawRegion = new SimpleInterval(the_locatable);
final SimpleInterval extendedRegion;
/* extend interval */
if (this.extend > 1) {
final int L1 = rawRegion.getLengthOnReference();
final int L2 = (int) Math.ceil(L1 * this.extend);
final int mid = rawRegion.getCenter().getPosition();
int x0 = mid - L2 / 2;
if (x0 < 0)
x0 = 1;
int x1 = mid + L2 / 2;
final SAMSequenceRecord ssr = dict.getSequence(rawRegion.getContig());
if (ssr != null)
x1 = Math.min(ssr.getSequenceLength(), x1);
if (x0 > x1)
continue;
extendedRegion = new SimpleInterval(rawRegion.getContig(), x0, x1);
} else {
extendedRegion = rawRegion;
}
final ToDoubleFunction<Integer> pos2pixel = POS -> (POS - extendedRegion.getStart()) / (double) extendedRegion.getLengthOnReference() * this.dimension.getWidth();
final String theGeneName = this.getGenes(extendedRegion).filter(G -> G.getType().equals("gene")).map(G -> G.getAttribute("gene_name")).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(null);
final String outputFilename = this.prefix + extendedRegion.getContig() + "_" + extendedRegion.getStart() + "_" + extendedRegion.getEnd() + (StringUtils.isBlank(label) ? "" : "." + label.replaceAll("[^A-Za-z\\-\\.0-9]+", "_")) + (StringUtils.isBlank(theGeneName) || !StringUtils.isBlank(label) ? "" : "." + theGeneName.replaceAll("[^A-Za-z\\-\\.0-9]+", "_")) + ".png";
final Graphics2D g2 = offscreen.createGraphics();
g2.setColor(Color.BLACK);
g2.setComposite(AlphaComposite.getInstance(AlphaComposite.CLEAR));
g2.fillRect(0, 0, this.dimension.width, this.dimension.height);
g2.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER));
g2.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
g2.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
final Graphics2D g = image.createGraphics();
g.setColor(Color.WHITE);
g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
g.fillRect(0, 0, this.dimension.width, this.dimension.height);
// draw exons
getGenes(extendedRegion).filter(G -> G.getType().equals("exon")).forEach(EX -> {
g.setColor(new Color(240, 240, 240));
double x1 = pos2pixel.applyAsDouble(EX.getStart());
double x2 = pos2pixel.applyAsDouble(EX.getEnd());
g.fill(new Rectangle2D.Double(x1, 0, (x2 - x1), this.dimension.height));
});
int y = (int) (this.dimension.height / 2.0);
g.setColor(Color.BLUE);
g.drawLine(0, y, image.getWidth(), y);
y = (int) (this.dimension.height / 4.0);
g.setColor(Color.CYAN);
g.drawLine(0, y, image.getWidth(), y);
y = (int) (3.0 * this.dimension.height / 4.0);
g.drawLine(0, y, image.getWidth(), y);
g.setColor(Color.DARK_GRAY);
g.drawRect(0, 0, this.dimension.width - 1, this.dimension.height - 1);
drawGenes(g, new Rectangle(0, 0, image.getWidth(), image.getHeight()), extendedRegion);
drawKnownCnv(g, new Rectangle(0, 0, image.getWidth(), image.getHeight()), extendedRegion);
if (this.extend > 1) {
g.setColor(Color.GREEN);
int x = (int) pos2pixel.applyAsDouble(rawRegion.getStart());
g.drawLine(x, 0, x, image.getHeight());
x = (int) pos2pixel.applyAsDouble(rawRegion.getEnd());
g.drawLine(x, 0, x, image.getHeight());
}
final int[] depth = new int[extendedRegion.getLengthOnReference()];
final int[] copy = new int[depth.length];
final BitSet blackListedPositions = new BitSet(depth.length);
final Map<String, Point2D> sample2maxPoint = new HashMap<>(inputBams.size());
boolean drawAbove = false;
// fill black listed regions
if (this.blackListedPath != null) {
try (TabixReader tbr = new TabixReader(this.blackListedPath.toString())) {
final ContigNameConverter cvt = ContigNameConverter.fromContigSet(tbr.getChromosomes());
final String ctg = cvt.apply(extendedRegion.getContig());
if (!StringUtils.isBlank(ctg)) {
final BedLineCodec codec = new BedLineCodec();
final TabixReader.Iterator tbxr = tbr.query(ctg, extendedRegion.getStart(), extendedRegion.getEnd());
for (; ; ) {
final String line = tbxr.next();
if (line == null)
break;
final BedLine bed = codec.decode(line);
if (bed == null)
continue;
int p1 = Math.max(bed.getStart(), extendedRegion.getStart());
while (p1 <= extendedRegion.getEnd() && p1 <= bed.getEnd()) {
blackListedPositions.set(p1 - extendedRegion.getStart());
++p1;
}
}
}
} catch (Throwable err) {
LOG.warn(err);
}
}
if (this.skip_original_interval_for_median && this.extend > 1) {
for (int i = the_locatable.getStart(); i <= the_locatable.getEnd(); i++) {
blackListedPositions.set(i - extendedRegion.getStart());
}
}
final Set<String> sampleNames = new TreeSet<>();
for (final Path path : inputBams) {
try (SamReader sr = samReaderFactory.open(path)) {
final SAMFileHeader header = sr.getFileHeader();
final String sample = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtil.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(path));
sampleNames.add(sample);
/* sample has deletion */
int count_has_dp_le_0_5 = 0;
/* sample has dup */
int count_has_dp_ge_1_5 = 0;
/* longest arc */
int longest_arc = 0;
SequenceUtil.assertSequenceDictionariesEqual(dict, header.getSequenceDictionary());
Arrays.fill(depth, 0);
try (CloseableIterator<SAMRecord> siter = sr.queryOverlapping(extendedRegion.getContig(), extendedRegion.getStart(), extendedRegion.getEnd())) {
while (siter.hasNext()) {
final SAMRecord rec = siter.next();
if (rec.getReadUnmappedFlag())
continue;
if (!SAMRecordDefaultFilter.accept(rec, this.min_mapq))
continue;
int ref = rec.getStart();
final Cigar cigar = rec.getCigar();
if (cigar == null)
continue;
if (this.alpha_arc > 0.0 && rec.getReadPairedFlag() && !rec.getMateUnmappedFlag() && !rec.getProperPairFlag() && rec.getReferenceIndex().equals(rec.getMateReferenceIndex()) && (!this.arc_only_rr_ff || (rec.getReadNegativeStrandFlag() == rec.getMateNegativeStrandFlag()))) {
longest_arc = Math.max(longest_arc, Math.abs(rec.getMateAlignmentStart() - rec.getAlignmentStart()));
final double xstart = pos2pixel.applyAsDouble(rec.getAlignmentStart());
final double xend = pos2pixel.applyAsDouble(rec.getMateAlignmentStart());
final double len = (xend - xstart);
if (Math.abs(len) > this.min_invert && Math.abs(len) < this.max_invert) {
final double y2 = y_mid + (drawAbove ? -1 : 1) * Math.min(y_mid, Math.abs(len / 2.0));
final Composite oldComposite = g2.getComposite();
g2.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, (float) this.alpha_arc));
g2.setColor(getColor("arc", Color.ORANGE));
final GeneralPath curve = new GeneralPath();
curve.moveTo(xstart, y_mid);
curve.quadTo(xstart + len / 2.0, y2, xend, y_mid);
g2.draw(curve);
g2.setComposite(oldComposite);
drawAbove = !drawAbove;
}
}
for (CigarElement ce : cigar) {
final CigarOperator op = ce.getOperator();
final int len = ce.getLength();
if (op.consumesReferenceBases()) {
if (op.consumesReadBases()) {
for (int i = 0; i < len; i++) {
final int pos = ref + i;
if (pos < extendedRegion.getStart())
continue;
if (pos > extendedRegion.getEnd())
break;
depth[pos - extendedRegion.getStart()]++;
}
}
ref += len;
}
}
}
// loop cigar
}
if (extendedRegion.getLengthOnReference() > image.getWidth() && extendedRegion.getLengthOnReference() > this.window_smooth_size) {
// smooth
final int bases_per_pixel = this.window_smooth_size;
System.arraycopy(depth, 0, copy, 0, depth.length);
for (int i = 0; i < depth.length && bases_per_pixel > 1; i++) {
double t = 0;
int count = 0;
for (int j = Math.max(0, i - bases_per_pixel); j <= i + bases_per_pixel && j < depth.length; j++) {
t += copy[j];
count++;
}
if (count == 0)
continue;
depth[i] = (int) (t / count);
}
}
discreteMedian.clear();
for (int i = 0; i < depth.length; i++) {
if (depth[i] > this.max_depth)
continue;
if (blackListedPositions.get(i))
continue;
discreteMedian.add(depth[i]);
}
final double median = discreteMedian.getMedian().orElse(0);
if (median <= 0) {
final String msg = "Skipping " + sample + " " + extendedRegion + " because median is 0";
LOG.warning(msg);
manifest.println("#" + msg);
continue;
}
Point2D max_position = null;
double max_distance_to_1 = 0.0;
final GeneralPath line = new GeneralPath();
for (int x = 0; x < image.getWidth(); x++) {
discreteMedian.clear();
int pos1 = (int) Math.floor(((x + 0) / (double) image.getWidth()) * depth.length);
final int pos2 = (int) Math.ceil(((x + 0) / (double) image.getWidth()) * depth.length);
while (pos1 <= pos2 && pos1 < depth.length) {
discreteMedian.add(depth[pos1]);
pos1++;
}
final double average = discreteMedian.getMedian().orElse(0);
final double normDepth = (average / median);
if (normDepth <= 0.5)
count_has_dp_le_0_5++;
if (normDepth >= 1.5)
count_has_dp_ge_1_5++;
final double y2 = normToPixelY.applyAsDouble(normDepth);
double distance_to_1 = Math.abs(normDepth - 1.0);
if (distance_to_1 > 0.3 && (max_position == null || distance_to_1 > max_distance_to_1)) {
max_distance_to_1 = distance_to_1;
max_position = new Point2D.Double(x, y2);
}
if (x == 0) {
line.moveTo(x, y2);
} else {
line.lineTo(x, y2);
}
}
g.setColor(max_distance_to_1 < 0.1 ? Color.lightGray : Color.GRAY);
final Stroke oldStroke = g.getStroke();
final Composite oldComposite = g.getComposite();
g.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, (float) this.alpha));
g.setStroke(new BasicStroke(0.5f, BasicStroke.CAP_BUTT, BasicStroke.JOIN_ROUND));
g.draw(line);
g.setStroke(oldStroke);
g.setComposite(oldComposite);
if (max_position != null)
sample2maxPoint.put(sample, max_position);
// fill manifest
manifest.print(rawRegion.getContig());
manifest.print("\t");
manifest.print(rawRegion.getStart() - 1);
manifest.print("\t");
manifest.print(rawRegion.getEnd());
manifest.print("\t");
manifest.print(extendedRegion.getStart() - 1);
manifest.print("\t");
manifest.print(extendedRegion.getEnd());
manifest.print("\t");
manifest.print(outputFilename);
manifest.print("\t");
manifest.print(StringUtils.isBlank(theGeneName) ? "." : theGeneName);
manifest.print("\t");
manifest.print(sample);
manifest.print("\t");
manifest.print(count_has_dp_le_0_5);
manifest.print("\t");
manifest.print((int) ((count_has_dp_le_0_5 / (double) extendedRegion.getLengthOnReference()) * 100.0));
manifest.print("\t");
manifest.print(count_has_dp_ge_1_5);
manifest.print("\t");
manifest.print((int) ((count_has_dp_ge_1_5 / (double) extendedRegion.getLengthOnReference()) * 100.0));
manifest.print("\t");
manifest.print(median);
manifest.print("\t");
manifest.print(discreteMedian.getStandardDeviation().orElse(-99999));
manifest.print("\t");
manifest.print(longest_arc);
manifest.println();
}
// end loop over samples
}
g2.dispose();
g.drawImage(offscreen, 0, 0, null);
g.setColor(Color.BLACK);
final String label_samples = sampleNames.size() > 10 ? "N=" + StringUtils.niceInt(sampleNames.size()) : String.join(";", sampleNames);
final Set<String> all_genes = getGenes(extendedRegion).filter(G -> G.getType().equals("gene")).map(G -> G.getAttribute("gene_name")).filter(F -> !StringUtils.isBlank(F)).collect(Collectors.toCollection(TreeSet::new));
g.drawString(extendedRegion.toNiceString() + " Length:" + StringUtils.niceInt(extendedRegion.getLengthOnReference()) + " Sample(s):" + label_samples + " " + label + " Gene(s):" + (all_genes.size() > 20 ? "N=" + StringUtils.niceInt(all_genes.size()) : String.join(";", all_genes)), 10, 10);
if (!sample2maxPoint.isEmpty()) {
/**
* draw sample names
*/
g.setColor(getColor("sample", Color.BLUE));
final int sampleFontSize = 7;
final Font oldFont = g.getFont();
g.setFont(new Font(oldFont.getName(), Font.PLAIN, sampleFontSize));
for (final String sample : sample2maxPoint.keySet()) {
final Point2D pt = sample2maxPoint.get(sample);
double sny = pt.getY();
if (sny > y_mid)
sny += sampleFontSize;
g.drawString(sample, (int) pt.getX(), (int) Math.min(this.dimension.height - sampleFontSize, Math.max(sampleFontSize, sny)));
}
g.setFont(oldFont);
}
g.dispose();
try (OutputStream out = archive.openOuputStream(outputFilename)) {
ImageIO.write(image, "PNG", out);
out.flush();
}
}
// end while iter
archive.close();
archive = null;
manifest.flush();
manifest.close();
manifest = null;
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(manifest);
CloserUtil.close(archive);
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class SvToSVG method doWork.
@Override
public int doWork(final List<String> args) {
/* parse interval */
if (this.intervalStrList.isEmpty()) {
LOG.error("interval(s) undefined");
return -1;
}
this.drawinAreaWidth = Math.max(100, this.drawinAreaWidth);
try {
if (this.fastaFile != null) {
this.indexedFastaSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(this.fastaFile);
}
if (this.vcfFile != null) {
this.vcfFileReader = VCFReaderFactory.makeDefault().open(this.vcfFile, true);
}
final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
final DocumentBuilder db = dbf.newDocumentBuilder();
this.document = db.newDocument();
final List<Path> bamFiles = IOUtils.unrollPaths(args);
if (bamFiles.isEmpty()) {
LOG.error("bam(s) undefined");
return -1;
}
final SamReaderFactory srf = super.createSamReaderFactory().referenceSequence(this.fastaFile);
/* loop over each bam file */
for (final Path bamFile : bamFiles) {
try (final SamReader sr = srf.open(bamFile)) {
final SAMFileHeader header = sr.getFileHeader();
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
final Function<String, Optional<SimpleInterval>> parser = IntervalParserFactory.newInstance().dictionary(dict).make();
final Set<String> samples = header.getReadGroups().stream().map(G -> G.getSample()).filter(S -> !StringUtil.isBlank(S)).collect(Collectors.toSet());
if (samples.size() != 1) {
LOG.error("expected on sample in " + bamFile + " but got " + samples.size() + " " + samples);
return -1;
}
final Sample sample = new Sample(samples.iterator().next());
this.sampleList.add(sample);
/* loop over each region for that sample */
for (final String intervalStr : this.intervalStrList) {
LOG.info("scanning " + intervalStr + " for " + bamFile);
final SimpleInterval interval = parser.apply(intervalStr).orElseThrow(IntervalParserFactory.exception(intervalStr));
if (interval == null) {
LOG.error("Cannot parse " + intervalStr + " for " + bamFile);
return -1;
}
/* create new region */
final Sample.Region region = sample.new Region(interval);
if (!sample.regions.isEmpty() && region.getLengthOnReference() != sample.regions.get(0).getLengthOnReference()) {
LOG.warning("intervals should all have the same length");
}
sample.regions.add(region);
try (CloseableIterator<SAMRecord> iter = sr.query(interval.getContig(), interval.getStart(), interval.getEnd(), false)) {
while (iter.hasNext()) {
final SAMRecord record = iter.next();
boolean trace = record.getReadName().equals(DEBUG_READ);
if (!SAMRecordDefaultFilter.accept(record, this.min_mapq))
continue;
if (trace)
LOG.debug("got1");
final Cigar cigar = record.getCigar();
if (cigar == null || cigar.isEmpty())
continue;
if (trace)
LOG.debug("got2");
if (remove_simple_reads) {
if ((!cigar.isClipped() && SAMUtils.getOtherCanonicalAlignments(record).isEmpty() && (record.getReadPairedFlag() && record.getProperPairFlag())))
continue;
}
if (!record.getContig().equals(interval.getContig()))
continue;
if (trace)
LOG.debug("got3");
final Sample.ShortRead shortRead = sample.new DefaultShortRead(region, record);
if (shortRead.getEnd() < interval.getStart())
continue;
if (shortRead.getStart() > interval.getEnd())
continue;
if (trace)
LOG.debug("got4" + record.getSAMString());
if (!sample.sampleName.equals(shortRead.getSampleName()))
continue;
region.beforePileup.add(shortRead);
}
}
/* en iterator */
}
/* end loop interval */
}
/* end open SAM */
}
this.sampleList.forEach(S -> S.regions.forEach(R -> R.addSplitReads()));
this.sampleList.forEach(S -> S.regions.forEach(R -> R.pileup()));
/* sort per sample */
this.sampleList.sort((A, B) -> A.sampleName.compareTo(B.sampleName));
/* sort per region */
this.sampleList.stream().forEach(SL -> SL.regions.sort((A, B) -> {
final SmartComparator sc = new SmartComparator();
int i = sc.compare(A.getContig(), B.getContig());
if (i != 0)
return i;
return A.getStart() - B.getStart();
}));
buildDocument();
final Transformer tr = TransformerFactory.newInstance().newTransformer();
final Result result;
if (this.outputFile != null) {
result = new StreamResult(this.outputFile);
} else {
result = new StreamResult(stdout());
}
tr.transform(new DOMSource(this.document), result);
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
CloserUtil.close(this.vcfFileReader);
this.document = null;
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class SvToSVG method buildSample.
private Element buildSample(final Sample sample, final DocumentFragment animationLayer) {
final Element sampleRoot = element("g");
double curr_y = sample.getY();
final Element sampleLabel = element("text", sample.sampleName);
sampleLabel.setAttribute("x", "5");
sampleLabel.setAttribute("y", format(curr_y + 14));
sampleLabel.setAttribute("class", "samplename");
sampleRoot.appendChild(sampleLabel);
curr_y += 20;
// loop over regions
for (final Sample.Region region : sample.regions) {
region.y = curr_y;
// genomic sequence will be used to test if there is a mismatch. No garantee it exists.
final GenomicSequence genomicSequence;
if (this.indexedFastaSequenceFile != null) {
final ContigNameConverter ctgConver = ContigNameConverter.fromOneDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
final String sname = ctgConver.apply(region.getContig());
if (StringUtil.isBlank(sname)) {
genomicSequence = null;
} else {
genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, sname);
}
} else {
genomicSequence = null;
}
final Element regionRoot = element("g");
sampleRoot.appendChild(regionRoot);
final Element rgnLabel = element("text", new SimpleInterval(region).toNiceString() + " Length:" + this.niceIntFormat.format((region.getLengthOnReference())));
rgnLabel.setAttribute("x", "5");
rgnLabel.setAttribute("y", format(curr_y + 12));
rgnLabel.setAttribute("class", "samplename");
regionRoot.appendChild(rgnLabel);
curr_y += 20;
final double y_top_region = curr_y;
if (this.coverageHeight > 0) {
/* draw coverage */
final TreeMap<Integer, Long> pos2cov = region.shortReadStream().filter(R -> R.isDefaultShortRead()).flatMapToInt(R -> R.getRecord().getAlignmentBlocks().stream().flatMapToInt(RB -> java.util.stream.IntStream.rangeClosed(RB.getReferenceStart(), RB.getReferenceStart() + RB.getLength()))).filter(P -> P >= region.getStart()).filter(P -> P <= region.getEnd()).mapToObj(P -> P).collect(Collectors.groupingBy(Function.identity(), () -> new TreeMap<>(), Collectors.counting()));
;
final long max_cov = pos2cov.values().stream().mapToLong(L -> L.longValue()).max().orElse(1L);
final Element covPath = element("path");
covPath.setAttribute("class", "coverage");
regionRoot.appendChild(covPath);
final StringBuilder sb = new StringBuilder();
sb.append("M 0 " + format(curr_y + this.coverageHeight));
for (int k = 0; k < this.drawinAreaWidth; k++) {
final int pos1 = region.getStart() + (int) (((k + 0) / (double) this.drawinAreaWidth) * (double) region.getLengthOnReference());
final int pos2 = region.getStart() + (int) (((k + 1) / (double) this.drawinAreaWidth) * (double) region.getLengthOnReference());
final double dp = IntStream.range(pos1, pos2).filter(P -> pos2cov.containsKey(P)).mapToLong(P -> pos2cov.get(P)).max().orElseGet(() -> 0L);
final double dpy = curr_y + this.coverageHeight - (dp / (double) max_cov) * (double) this.coverageHeight;
sb.append(" L " + format(k) + " " + format(dpy));
}
sb.append(" L " + format(this.drawinAreaWidth) + " " + format(curr_y + this.coverageHeight));
sb.append(" Z");
covPath.setAttribute("d", sb.toString());
covPath.appendChild(element("title", "Coverage. Max:" + niceIntFormat.format(max_cov)));
curr_y += this.coverageHeight;
curr_y += 2;
}
/* print all lines */
for (int nLine = 0; nLine < region.lines.size(); ++nLine) {
final List<Sample.ShortRead> line = region.lines.get(nLine);
// loop over records on that line
for (final Sample.ShortRead shortRead : line) {
boolean trace = shortRead.getRecord().getReadName().equals(DEBUG_READ);
shortRead.y = curr_y;
int ref1 = shortRead.getStart();
final double leftX = shortRead.getPixelStart();
final double midy = this.featureHeight / 2.0;
final double maxy = this.featureHeight;
final Element g1 = element("g");
final Element g3;
if (shortRead.isSplitRead()) {
final Sample.SplitRead splitRead = Sample.SplitRead.class.cast(shortRead);
// final double dx = -0.5*shortRead.getPixelLength();
// final double dy = -0.5*shortRead.getHeight();
splitRead.g_rotate = element("g");
splitRead.g_rotate.setAttribute("transform", "rotate(0)");
g1.appendChild(splitRead.g_rotate);
g3 = element("g");
g3.setAttribute("transform", "translate(0,0)");
splitRead.g_rotate.appendChild(g3);
splitRead.g_translate2 = g3;
} else {
g3 = g1;
}
g1.setAttribute("transform", "translate(" + format(shortRead.getPixelStart()) + "," + format(shortRead.getY()) + ")");
// readElement.setAttribute("style","transform-origin:center");
if (shortRead.isSplitRead()) {
// always in front
animationLayer.appendChild(g1);
} else {
regionRoot.insertBefore(g1, regionRoot.getFirstChild());
}
final BiPredicate<Integer, Integer> isMismatch = (readpos0, refpos1) -> {
if (genomicSequence == null)
return false;
if (refpos1 < 1 || refpos1 > genomicSequence.length())
return false;
char refC = Character.toUpperCase(genomicSequence.charAt(refpos1 - 1));
if (refC == 'N')
return false;
final byte[] bases = shortRead.getRecord().getReadBases();
if (bases == null || bases == SAMRecord.NULL_SEQUENCE)
return false;
if (readpos0 < 0 || readpos0 >= bases.length) {
LOG.warn("P=" + readpos0 + " 0<x<" + bases.length);
return false;
}
char readC = (char) Character.toUpperCase(bases[readpos0]);
if (readC == 'N')
return false;
return readC != refC;
};
shortRead.g_translate1 = g1;
int readpos0 = 0;
int readpos0_clipped = 0;
final Element title = element("title", shortRead.getRecord().getPairedReadName() + " " + shortRead.getRecord().getCigarString() + " " + (shortRead.getRecord().getReadNegativeStrandFlag() ? "-" : "+"));
g3.appendChild(title);
final DocumentFragment insertionsFragment = this.document.createDocumentFragment();
final Cigar cigar = shortRead.getRecord().getCigar();
for (int cigarIdx = 0; cigarIdx < cigar.numCigarElements(); cigarIdx++) {
final CigarElement ce = cigar.getCigarElement(cigarIdx);
final CigarOperator op = ce.getOperator();
int next_ref = ref1;
int next_read = readpos0;
switch(op) {
case I:
{
next_read += ce.getLength();
readpos0_clipped += ce.getLength();
if (!(ref1 + ce.getLength() < region.getStart() || ref1 > region.getEnd())) {
final double ce_length = region.baseToPixel(region.getStart() + ce.getLength());
final Element rectInsert = element("rect");
rectInsert.setAttribute("class", "insert");
rectInsert.setAttribute("x", format(region.baseToPixel(ref1) - leftX));
rectInsert.setAttribute("y", format(0));
rectInsert.setAttribute("width", format(this.svgDuration > 0 && ce_length > 1 ? ce_length : 1));
rectInsert.setAttribute("height", format(this.featureHeight));
rectInsert.appendChild(element("title", this.niceIntFormat.format(ce.getLength())));
if (this.svgDuration > 0 && ce_length > 1) {
final Element anim = element("animate");
rectInsert.appendChild(anim);
anim.setAttribute("attributeType", "XML");
anim.setAttribute("attributeName", "width");
anim.setAttribute("begin", "0s");
anim.setAttribute("from", format(ce_length));
anim.setAttribute("to", "1");
anim.setAttribute("dur", String.valueOf(this.svgDuration) + "s");
anim.setAttribute("repeatCount", this.svgRepeatCount);
anim.setAttribute("fill", "freeze");
}
insertionsFragment.appendChild(rectInsert);
}
break;
}
case P:
continue;
case S:
case H:
case M:
case X:
case EQ:
{
next_ref += ce.getLength();
if (!op.equals(CigarOperator.H)) {
next_read += ce.getLength();
}
if (!(next_ref < region.getStart() || ref1 > region.getEnd())) {
final double distance_pix = region.baseToPixel(next_ref) - region.baseToPixel(ref1);
final StringBuilder sb = new StringBuilder();
final Element path = element("path");
path.setAttribute("class", "op" + op.name() + (shortRead.isDefaultShortRead() ? "" : "x"));
if (trace)
path.setAttribute("style", "fill:blue;");
if (!op.isClipping() && shortRead.isDefaultShortRead() && shortRead.getRecord().getReadPairedFlag() && !shortRead.getRecord().getProperPairFlag()) {
if (!shortRead.getContig().equals(shortRead.getRecord().getMateReferenceName())) {
path.setAttribute("style", "fill:orchid;");
} else {
path.setAttribute("style", "fill:lightblue;");
}
}
// arrow <--
if (cigarIdx == 0 && shortRead.isNegativeStrand()) {
sb.append("M ").append(format(region.baseToPixel(ref1) - leftX)).append(',').append(0);
sb.append(" h ").append(format(distance_pix));
sb.append(" v ").append(format(maxy));
sb.append(" h ").append(format(-(distance_pix)));
sb.append(" l ").append(format(-arrow_w)).append(',').append(-featureHeight / 2.0);
sb.append(" Z");
} else // arrow -->
if (cigarIdx + 1 == cigar.numCigarElements() && !shortRead.isNegativeStrand()) {
sb.append("M ").append(format(region.baseToPixel(ref1) - leftX + distance_pix)).append(',').append(0);
sb.append(" h ").append(format(-(distance_pix)));
sb.append(" v ").append(format(maxy));
sb.append(" h ").append(format(distance_pix));
sb.append(" l ").append(format(arrow_w)).append(',').append(-featureHeight / 2.0);
sb.append(" Z");
} else {
sb.append("M ").append(format(region.baseToPixel(ref1) - leftX)).append(',').append(0);
sb.append(" h ").append(format(distance_pix));
sb.append(" v ").append(format(maxy));
sb.append(" h ").append(format(-(distance_pix)));
sb.append(" Z");
}
path.setAttribute("d", sb.toString());
g3.appendChild(path);
if (op.isAlignment() && genomicSequence != null && !hideMismatch) {
for (int x = 0; x < ce.getLength(); ++x) {
if (!isMismatch.test(readpos0_clipped + x, ref1 + x))
continue;
if (ref1 + x < region.getStart())
continue;
if (ref1 + x > region.getEnd())
continue;
final Element rectMismatch = element("rect");
rectMismatch.setAttribute("class", "mismatch");
rectMismatch.setAttribute("x", format(region.baseToPixel(ref1 + x) - leftX));
rectMismatch.setAttribute("y", format(0));
rectMismatch.setAttribute("width", format((region.baseToPixel(ref1 + x + 1) - region.baseToPixel(ref1 + x))));
rectMismatch.setAttribute("height", format(this.featureHeight));
g3.appendChild(rectMismatch);
}
}
}
if (!op.isClipping()) {
readpos0_clipped += ce.getLength();
}
break;
}
case D:
case N:
{
next_ref += ce.getLength();
if (!(next_ref < region.getStart() || ref1 > region.getEnd())) {
final double distance_pix = region.baseToPixel(next_ref) - region.baseToPixel(ref1);
final Element lineE = element("line");
lineE.setAttribute("class", "opD");
lineE.setAttribute("x1", format(region.baseToPixel(ref1) - leftX));
lineE.setAttribute("y1", format(midy));
lineE.setAttribute("x2", format(region.baseToPixel(ref1) - leftX + distance_pix));
lineE.setAttribute("y2", format(midy));
g3.insertBefore(lineE, g3.getFirstChild());
}
break;
}
default:
throw new IllegalStateException(op.name());
}
ref1 = next_ref;
readpos0 = next_read;
}
g3.appendChild(insertionsFragment);
}
curr_y += (this.featureHeight + 3);
}
for (int x = 1; x < 10; ++x) {
final double xx = (this.drawinAreaWidth / 10.0) * x;
final Element line = element("line");
line.setAttribute("class", "ruler");
line.setAttribute("x1", format(xx));
line.setAttribute("y1", format(y_top_region));
line.setAttribute("x2", format(xx));
line.setAttribute("y2", format(curr_y));
line.appendChild(element("title", niceIntFormat.format(region.getStart() + (int) (region.getLengthOnReference() / 10.0) * x)));
regionRoot.insertBefore(line, regionRoot.getFirstChild());
}
/**
* print variants
*/
if (this.vcfFileReader != null) {
final VCFHeader header = this.vcfFileReader.getHeader();
final SAMSequenceDictionary vcfdict = header.getSequenceDictionary();
final ContigNameConverter ctgConver = (vcfdict == null ? null : ContigNameConverter.fromOneDictionary(vcfdict));
final String sname = ctgConver == null ? null : ctgConver.apply(region.getContig());
if (!StringUtil.isBlank(sname)) {
final CloseableIterator<VariantContext> iter = this.vcfFileReader.query(region.getContig(), region.getStart(), region.getEnd());
while (iter.hasNext()) {
final VariantContext ctx = iter.next();
if (!ctx.isVariant())
continue;
final double x1 = Math.max(0, region.baseToPixel(ctx.getStart()));
final double x2 = Math.min(region.baseToPixel(ctx.getEnd() + 1), this.drawinAreaWidth);
final Genotype g = ctx.getGenotype(sample.sampleName);
final Element rect = element("rect");
rect.setAttribute("class", "variant" + (g == null ? "" : g.getType().name()));
rect.setAttribute("x", format(x1));
rect.setAttribute("y", format(y_top_region));
rect.setAttribute("width", format(x2 - x1));
rect.setAttribute("height", format(curr_y - y_top_region));
rect.appendChild(element("title", niceIntFormat.format(ctx.getStart()) + "-" + niceIntFormat.format(ctx.getEnd()) + " " + ctx.getReference().getDisplayString()));
regionRoot.insertBefore(rect, regionRoot.getFirstChild());
}
iter.close();
}
}
region.height = curr_y - region.y;
}
sample.height = curr_y - sample.y;
final Element frame = element("rect");
frame.setAttribute("class", "frame");
frame.setAttribute("x", format(0));
frame.setAttribute("y", format(sample.getY()));
frame.setAttribute("width", format(drawinAreaWidth));
frame.setAttribute("height", format(sample.getHeight()));
sampleRoot.appendChild(frame);
return sampleRoot;
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class BamStats05 method readBedFile.
private Map<String, List<SimpleInterval>> readBedFile(final Path bedFile) throws IOException {
final Map<String, List<SimpleInterval>> gene2interval = new TreeMap<String, List<SimpleInterval>>();
BufferedReader bedIn = null;
try {
bedIn = IOUtils.openPathForBufferedReading(bedFile);
final BedLineCodec codec = new BedLineCodec();
String line = null;
while ((line = bedIn.readLine()) != null) {
if (StringUtils.isBlank(line) || line.startsWith("#"))
continue;
final BedLine bedLine = codec.decode(line);
if (bedLine == null)
continue;
if (bedLine.getColumnCount() < 4) {
throw new IOException("bad bed line (expected 4 columns CHROM/START/END/GENE) in " + line + " " + bedFile);
}
final String chrom = bedLine.getContig();
final int chromStart1 = bedLine.getStart();
final int chromEnd1 = bedLine.getEnd();
final String gene = bedLine.get(3);
if (StringUtils.isBlank(gene))
throw new IOException("bad bed gene in " + line + " " + bedFile);
List<SimpleInterval> intervals = gene2interval.get(gene);
if (intervals == null) {
intervals = new ArrayList<>();
gene2interval.put(gene, intervals);
} else if (!intervals.get(0).getContig().equals(chrom)) {
throw new IOException("more than one chromosome for gene:" + gene + " " + intervals.get(0) + " and " + bedLine.toInterval());
} else {
if (!this.mergeOverlapping) {
intervals.stream().filter(R -> R.overlaps(bedLine)).findFirst().ifPresent(R -> {
throw new IllegalArgumentException("overlapping region: " + bedLine + " and " + R + "\nUse option --merge to merge overlapping regions.");
});
}
}
intervals.add(new SimpleInterval(chrom, chromStart1, chromEnd1));
if (this.mergeOverlapping) {
intervals.sort((A, B) -> Integer.compare(A.getStart(), B.getStart()));
int x = 0;
while (x + 1 < intervals.size()) {
final SimpleInterval i1 = intervals.get(x + 0);
final SimpleInterval i2 = intervals.get(x + 1);
if (i1.overlaps(i2)) {
intervals.remove(x + 1);
intervals.set(x + 0, i1.merge(i2));
} else {
x++;
}
}
}
}
bedIn.close();
return gene2interval;
} finally {
CloserUtil.close(bedIn);
}
}
use of com.github.lindenb.jvarkit.samtools.util.SimpleInterval in project jvarkit by lindenb.
the class BamXtremDepth method bitSetToLocatables.
private List<Locatable> bitSetToLocatables(final SAMSequenceRecord ssr, final BitSet bitSet) {
final List<Locatable> L = new ArrayList<>();
int i = 0;
while (i < bitSet.size()) {
i = bitSet.nextSetBit(i);
if (i == -1)
break;
int j = i + 1;
while (j < bitSet.size() && bitSet.get(j)) {
j++;
}
// bitSet is zero based
L.add(new SimpleInterval(ssr.getContig(), i + 1, j));
i = j;
}
return L;
}
Aggregations