use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class GtfRetroCopy method doWork.
@Override
public int doWork(final List<String> args) {
VCFReader vcfFileReader = null;
VariantContextWriter vcw0 = null;
try {
/* load the reference genome */
/* create a contig name converter from the REF */
final Set<String> knownGeneIds;
if (this.knownPath != null) {
try (BufferedReader br = IOUtils.openPathForBufferedReading(this.knownPath)) {
knownGeneIds = br.lines().filter(L -> !StringUtils.isBlank(L)).map(S -> S.trim()).filter(S -> !(S.equals("-") || S.equals(".") || S.startsWith("#"))).collect(Collectors.toSet());
}
} else {
knownGeneIds = Collections.emptySet();
}
// open the sam file
final String input = oneAndOnlyOneFile(args);
vcfFileReader = VCFReaderFactory.makeDefault().open(Paths.get(input), true);
final VCFHeader header = vcfFileReader.getHeader();
final SAMSequenceDictionary dict = header.getSequenceDictionary();
final Comparator<String> contigCmp = dict == null ? (A, B) -> A.compareTo(B) : new ContigDictComparator(dict);
final Comparator<Gene> geneCmp = (A, B) -> {
int i = contigCmp.compare(A.getContig(), B.getContig());
if (i != 0)
return i;
i = Integer.compare(A.getStart(), B.getStart());
if (i != 0)
return i;
return Integer.compare(A.getEnd(), B.getEnd());
};
final GtfReader gtfReader = new GtfReader(this.gtfPath);
if (dict != null && !dict.isEmpty()) {
this.writingVcf.dictionary(dict);
gtfReader.setContigNameConverter(ContigNameConverter.fromOneDictionary(dict));
}
final List<Gene> genes = gtfReader.getAllGenes().stream().filter(G -> G.getTranscripts().stream().count() > 0L).filter(G -> G.getTranscripts().stream().anyMatch(T -> T.getIntronCount() >= this.min_intron_count)).sorted(geneCmp).collect(Collectors.toList());
gtfReader.close();
/**
* build vcf header
*/
final Set<VCFHeaderLine> metaData = new HashSet<>();
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY, true));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_QUALITY_KEY, true));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY, true));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY, true));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY, true));
metaData.add(new VCFInfoHeaderLine(VCFConstants.SVTYPE, 1, VCFHeaderLineType.String, "Variation type"));
metaData.add(new VCFInfoHeaderLine("SVLEN", 1, VCFHeaderLineType.Integer, "Variation Length"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_BOUNDS, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Introns boundaries"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_SIZES, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Introns sizes"));
metaData.add(new VCFFilterHeaderLine(ATT_NOT_ALL_INTRONS, "Not all introns were found retrocopied"));
for (final String att : ENSEMBL_TRANSCRIPT_ATTS) {
metaData.add(new VCFInfoHeaderLine(att, 1, VCFHeaderLineType.String, "Value for the attribute '" + att + "' in the gtf"));
}
// metaData.add(new VCFFormatHeaderLine(ATT_COUNT_SUPPORTING_READS, 2,VCFHeaderLineType.Integer,"Count supporting reads [intron-left/intron-right]"));
// metaData.add(new VCFInfoHeaderLine(ATT_RETRO_DESC, VCFHeaderLineCount.UNBOUNDED,VCFHeaderLineType.String,
// "Retrocopy attributes: transcript-id|strand|exon-left|exon-left-bases|exon-right-bases|exon-right"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_COUNT, 1, VCFHeaderLineType.Integer, "Number of introns for the Transcript"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_CANDIDATE_COUNT, 1, VCFHeaderLineType.Integer, "Number of introns found retrocopied for the transcript"));
metaData.add(new VCFInfoHeaderLine(ATT_INTRONS_CANDIDATE_FRACTION, 1, VCFHeaderLineType.Float, "Fraction of introns found retrocopied for the transcript"));
metaData.add(new VCFFilterHeaderLine(ATT_NOT_ALL_INTRONS, "Not all introns were found retrocopied"));
metaData.add(new VCFFilterHeaderLine(ATT_KNOWN, "Known RetroGenes. " + (this.knownPath == null ? "" : " Source: " + this.knownPath)));
final VCFHeader header2 = new VCFHeader(header);
metaData.stream().forEach(H -> header2.addMetaDataLine(H));
JVarkitVersion.getInstance().addMetaData(this, header2);
final Allele ref = Allele.create((byte) 'N', true);
final Allele alt = Allele.create("<RETROCOPY>", false);
/* open vcf for writing*/
vcw0 = this.writingVcf.open(this.outputFile);
vcw0.writeHeader(header2);
final ProgressFactory.Watcher<Gene> progress = ProgressFactory.newInstance().logger(LOG).dictionary(dict).build();
for (final Gene gene : genes) {
progress.apply(gene);
final List<VariantContext> variants = new ArrayList<>();
final CloseableIterator<VariantContext> iter2 = vcfFileReader.query(gene.getContig(), gene.getStart(), gene.getEnd());
while (iter2.hasNext()) {
final VariantContext ctx = iter2.next();
// SNV
if (ctx.getStart() == ctx.getEnd())
continue;
StructuralVariantType svType = ctx.getStructuralVariantType();
if (StructuralVariantType.BND.equals(svType))
continue;
if (StructuralVariantType.INS.equals(svType))
continue;
variants.add(ctx);
}
iter2.close();
if (variants.isEmpty())
continue;
for (final Transcript transcript : gene.getTranscripts()) {
if (!transcript.hasIntron())
continue;
if (transcript.getIntronCount() < this.min_intron_count)
continue;
final Counter<String> samples = new Counter<>();
for (final Intron intron : transcript.getIntrons()) {
for (final VariantContext ctx : variants) {
if (!isWithinDistance(intron.getStart(), ctx.getStart()))
continue;
if (!isWithinDistance(intron.getEnd(), ctx.getEnd()))
continue;
if (ctx.hasGenotypes()) {
for (final Genotype g : ctx.getGenotypes()) {
if (g.isNoCall() || g.isHomRef())
continue;
samples.incr(g.getSampleName());
}
} else {
samples.incr("*");
}
}
// end iter2
}
// end intron
final long max_count = samples.stream().mapToLong(E -> E.getValue()).max().orElse(0L);
if (max_count == 0)
continue;
if (this.only_all_introns && max_count != transcript.getIntronCount())
continue;
// ok good candidate
final VariantContextBuilder vcb = new VariantContextBuilder();
vcb.chr(transcript.getContig());
vcb.start(transcript.getStart());
vcb.stop(transcript.getEnd());
switch(this.idKey) {
case gene_name:
final String gn = transcript.getGene().getGeneName();
vcb.id(StringUtils.isBlank(gn) ? transcript.getId() : gn);
break;
case gene_id:
vcb.id(transcript.getGene().getId());
break;
case transcript_id:
vcb.id(transcript.getId());
break;
default:
throw new IllegalStateException();
}
final List<Allele> alleles = Arrays.asList(ref, alt);
// vcb.attribute(VCFConstants.ALLELE_NUMBER_KEY,2);
// vcb.attribute(VCFConstants.ALLELE_COUNT_KEY,1);
// vcb.attribute(VCFConstants.ALLELE_FREQUENCY_KEY,0.5);
vcb.attribute(VCFConstants.SVTYPE, "DEL");
vcb.attribute(VCFConstants.END_KEY, transcript.getEnd());
vcb.attribute("SVLEN", transcript.getLengthOnReference());
vcb.attribute(ATT_INTRONS_BOUNDS, transcript.getIntrons().stream().map(S -> "" + S.getStart() + "-" + S.getEnd()).collect(Collectors.toList()));
vcb.attribute(ATT_INTRONS_SIZES, transcript.getIntrons().stream().mapToInt(S -> S.getLengthOnReference()).toArray());
for (final String att : ENSEMBL_TRANSCRIPT_ATTS) {
final String v = transcript.getProperties().get(att);
if (StringUtils.isBlank(v))
continue;
vcb.attribute(att, v);
}
vcb.alleles(alleles);
boolean pass_filter = true;
// introns sequences
vcb.attribute(ATT_INTRONS_CANDIDATE_COUNT, max_count);
vcb.attribute(ATT_INTRONS_COUNT, transcript.getIntronCount());
vcb.attribute(ATT_INTRONS_CANDIDATE_FRACTION, max_count / (float) transcript.getIntronCount());
if (transcript.getIntronCount() != max_count) {
vcb.filter(ATT_NOT_ALL_INTRONS);
pass_filter = false;
}
if (knownGeneIds.contains(transcript.getGene().getId())) {
vcb.filter(ATT_KNOWN);
pass_filter = false;
}
if (header.hasGenotypingData()) {
final List<Genotype> genotypes = new ArrayList<>();
for (final String sn : header.getSampleNamesInOrder()) {
final List<Allele> gtalleles;
if (samples.count(sn) == 0L) {
gtalleles = Arrays.asList(ref, ref);
} else {
gtalleles = Arrays.asList(ref, alt);
}
final GenotypeBuilder gb = new GenotypeBuilder(sn, gtalleles);
genotypes.add(gb.make());
}
vcb.genotypes(genotypes);
}
if (pass_filter)
vcb.passFilters();
vcw0.add(vcb.make());
}
}
progress.close();
vcw0.close();
vcfFileReader.close();
vcfFileReader = null;
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(vcfFileReader);
CloserUtil.close(vcw0);
}
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class SortVcfOnInfo method doVcfToVcf.
@Override
protected int doVcfToVcf(String inputName, VCFIterator r, VariantContextWriter w) {
SortingCollection<VariantContext> sorted = null;
try {
final Comparator<VariantContext> cmp2;
final VCFHeader header = r.getHeader();
if (this.infoField != null && this.infoField.equals("<ID>")) {
cmp2 = (A, B) -> compareID(A, B);
} else if (this.infoField != null && this.infoField.equals("<QUAL>")) {
cmp2 = (A, B) -> compareQUAL(A, B);
} else {
this.infoDecl = header.getInfoHeaderLine(this.infoField);
if (this.infoDecl == null) {
LOG.error("VCF doesn't contain the INFO field :" + infoField + ". Available:" + header.getInfoHeaderLines().stream().map(H -> H.getID()).collect(Collectors.joining(" ")));
return -1;
}
cmp2 = (V1, V2) -> compareVariants(V1, V2);
}
final Comparator<VariantContext> cmp = (reverse_it ? cmp2.reversed() : cmp2);
JVarkitVersion.getInstance().addMetaData(getClass().getSimpleName(), header);
sorted = SortingCollection.newInstance(VariantContext.class, new VCFRecordCodec(header), cmp, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
sorted.setDestructiveIteration(true);
while (r.hasNext()) {
sorted.add(r.next());
}
CloserUtil.close(r);
r = null;
sorted.doneAdding();
w.writeHeader(header);
try (CloseableIterator<VariantContext> iter = sorted.iterator()) {
while (iter.hasNext()) {
w.add(iter.next());
}
}
return 0;
} catch (final Throwable err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
try {
if (sorted != null)
sorted.cleanup();
} catch (Exception err) {
}
CloserUtil.close(w);
}
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class BamToSVG method plotSVG.
private void plotSVG(final ArchiveFactory archive, final Path path) throws IOException, XMLStreamException {
XMLStreamWriter w = null;
final SamReaderFactory sfrf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).referenceSequence(this.referenceFile);
final Context context = new Context();
try (SamReader samReader = sfrf.open(path)) {
final SAMFileHeader header = samReader.getFileHeader();
final SAMSequenceDictionary dict = SequenceDictionaryUtils.extractRequired(header);
SequenceUtil.assertSequenceDictionariesEqual(dict, this.referenceDict);
context.sampleName = header.getReadGroups().stream().map(RG -> RG.getSample()).filter(S -> !StringUtils.isBlank(S)).findFirst().orElse(IOUtils.getFilenameWithoutCommonSuffixes(path));
// get a chance to get clipping close to bounds
final int extend_for_clip = 100;
// read SamRecord
try (CloseableIterator<SAMRecord> iter = samReader.query(this.interval.getContig(), Math.max(1, this.interval.getStart() - extend_for_clip), this.interval.getEnd() + extend_for_clip, false)) {
final Pileup<SAMRecord> pileup = new Pileup<>((A, B) -> !CoordMath.overlaps(left(A) - 1, right(A) + 1, left(B), right(B)));
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (rec.getReadUnmappedFlag())
continue;
// if( this.samRecordFilter.filterOut(rec)) continue;
if (!SAMRecordDefaultFilter.accept(rec, this.mappingQuality))
continue;
if (!rec.getReferenceName().equals(this.interval.getContig()))
continue;
if (right(rec) < this.interval.getStart())
continue;
if (left(rec) > this.interval.getEnd())
continue;
pileup.add(rec);
}
context.lines = pileup.getRows();
}
if (this.vcf != null) {
context.readVariantFile(vcf);
}
context.featureWidth = this.drawinAreaWidth / (double) ((this.interval.getEnd() - this.interval.getStart()) + 1);
context.featureHeight = Math.min(Math.max(5.0, context.featureWidth), 30);
context.HEIGHT_RULER = (int) (StringUtils.niceInt(this.interval.getEnd()).length() * context.featureHeight + 5);
LOG.info("Feature height:" + context.featureHeight);
final String buildName = SequenceDictionaryUtils.getBuildName(this.referenceDict).orElse("");
final String filename = this.prefix + buildName + (buildName.isEmpty() ? "" : ".") + this.interval.getContig() + "_" + this.interval.getStart() + "_" + this.interval.getEnd() + "." + context.sampleName + ".svg";
LOG.info("writing " + filename);
final XMLOutputFactory xof = XMLOutputFactory.newFactory();
try (OutputStream fout = archive.openOuputStream(filename)) {
w = xof.createXMLStreamWriter(fout, "UTF-8");
w.writeStartDocument("UTF-8", "1.0");
printDocument(w, context);
w.writeEndDocument();
w.flush();
w.close();
fout.flush();
}
}
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class Biostar145820 method doWork.
@Override
public int doWork(final List<String> args) {
SamReader samReader = null;
SAMRecordIterator iter = null;
SAMFileWriter samWriter = null;
final Random random = new Random(this.seed == -1L ? System.currentTimeMillis() : this.seed);
CloseableIterator<RandSamRecord> iter2 = null;
try {
final String input = oneFileOrNull(args);
final SamReaderFactory srf = super.createSamReaderFactory().validationStringency(ValidationStringency.LENIENT);
if (this.refPath != null)
srf.referenceSequence(this.refPath);
samReader = srf.open(input == null ? SamInputResource.of(stdin()) : SamInputResource.of(input));
final SAMFileHeader header = samReader.getFileHeader().clone();
header.setSortOrder(SortOrder.unsorted);
header.addComment("Processed with " + getProgramName() + " : " + getProgramCommandLine());
final ProgressFactory.Watcher<SAMRecord> progress = ProgressFactory.newInstance().dictionary(samReader.getFileHeader()).logger(LOG).build();
iter = samReader.iterator();
final SortingCollection<RandSamRecord> sorter = SortingCollection.newInstance(RandSamRecord.class, new RandSamRecordCodec(header), new RandSamRecordComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
sorter.setDestructiveIteration(true);
while (iter.hasNext()) {
final SAMRecord rec = progress.apply(iter.next());
if (this.filter.filterOut(rec)) {
continue;
}
final RandSamRecord r = new RandSamRecord();
r.l_rand_index = random.nextLong();
r.samRecord = rec;
sorter.add(r);
}
iter.close();
iter = null;
sorter.doneAdding();
iter2 = sorter.iterator();
samWriter = writingBamArgs.openSamWriter(this.outputFile, header, true);
final SAMFileWriter finalw = samWriter;
iter2.stream().limit(this.count < 0L ? Long.MAX_VALUE - 1 : this.count).map(R -> R.samRecord).forEach(R -> finalw.addAlignment(R));
iter2.close();
iter2 = null;
sorter.cleanup();
progress.close();
} catch (final Throwable e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(iter2);
CloserUtil.close(samReader);
CloserUtil.close(samWriter);
}
return 0;
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class SvToSVG method buildSample.
private Element buildSample(final Sample sample, final DocumentFragment animationLayer) {
final Element sampleRoot = element("g");
double curr_y = sample.getY();
final Element sampleLabel = element("text", sample.sampleName);
sampleLabel.setAttribute("x", "5");
sampleLabel.setAttribute("y", format(curr_y + 14));
sampleLabel.setAttribute("class", "samplename");
sampleRoot.appendChild(sampleLabel);
curr_y += 20;
// loop over regions
for (final Sample.Region region : sample.regions) {
region.y = curr_y;
// genomic sequence will be used to test if there is a mismatch. No garantee it exists.
final GenomicSequence genomicSequence;
if (this.indexedFastaSequenceFile != null) {
final ContigNameConverter ctgConver = ContigNameConverter.fromOneDictionary(this.indexedFastaSequenceFile.getSequenceDictionary());
final String sname = ctgConver.apply(region.getContig());
if (StringUtil.isBlank(sname)) {
genomicSequence = null;
} else {
genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, sname);
}
} else {
genomicSequence = null;
}
final Element regionRoot = element("g");
sampleRoot.appendChild(regionRoot);
final Element rgnLabel = element("text", new SimpleInterval(region).toNiceString() + " Length:" + this.niceIntFormat.format((region.getLengthOnReference())));
rgnLabel.setAttribute("x", "5");
rgnLabel.setAttribute("y", format(curr_y + 12));
rgnLabel.setAttribute("class", "samplename");
regionRoot.appendChild(rgnLabel);
curr_y += 20;
final double y_top_region = curr_y;
if (this.coverageHeight > 0) {
/* draw coverage */
final TreeMap<Integer, Long> pos2cov = region.shortReadStream().filter(R -> R.isDefaultShortRead()).flatMapToInt(R -> R.getRecord().getAlignmentBlocks().stream().flatMapToInt(RB -> java.util.stream.IntStream.rangeClosed(RB.getReferenceStart(), RB.getReferenceStart() + RB.getLength()))).filter(P -> P >= region.getStart()).filter(P -> P <= region.getEnd()).mapToObj(P -> P).collect(Collectors.groupingBy(Function.identity(), () -> new TreeMap<>(), Collectors.counting()));
;
final long max_cov = pos2cov.values().stream().mapToLong(L -> L.longValue()).max().orElse(1L);
final Element covPath = element("path");
covPath.setAttribute("class", "coverage");
regionRoot.appendChild(covPath);
final StringBuilder sb = new StringBuilder();
sb.append("M 0 " + format(curr_y + this.coverageHeight));
for (int k = 0; k < this.drawinAreaWidth; k++) {
final int pos1 = region.getStart() + (int) (((k + 0) / (double) this.drawinAreaWidth) * (double) region.getLengthOnReference());
final int pos2 = region.getStart() + (int) (((k + 1) / (double) this.drawinAreaWidth) * (double) region.getLengthOnReference());
final double dp = IntStream.range(pos1, pos2).filter(P -> pos2cov.containsKey(P)).mapToLong(P -> pos2cov.get(P)).max().orElseGet(() -> 0L);
final double dpy = curr_y + this.coverageHeight - (dp / (double) max_cov) * (double) this.coverageHeight;
sb.append(" L " + format(k) + " " + format(dpy));
}
sb.append(" L " + format(this.drawinAreaWidth) + " " + format(curr_y + this.coverageHeight));
sb.append(" Z");
covPath.setAttribute("d", sb.toString());
covPath.appendChild(element("title", "Coverage. Max:" + niceIntFormat.format(max_cov)));
curr_y += this.coverageHeight;
curr_y += 2;
}
/* print all lines */
for (int nLine = 0; nLine < region.lines.size(); ++nLine) {
final List<Sample.ShortRead> line = region.lines.get(nLine);
// loop over records on that line
for (final Sample.ShortRead shortRead : line) {
boolean trace = shortRead.getRecord().getReadName().equals(DEBUG_READ);
shortRead.y = curr_y;
int ref1 = shortRead.getStart();
final double leftX = shortRead.getPixelStart();
final double midy = this.featureHeight / 2.0;
final double maxy = this.featureHeight;
final Element g1 = element("g");
final Element g3;
if (shortRead.isSplitRead()) {
final Sample.SplitRead splitRead = Sample.SplitRead.class.cast(shortRead);
// final double dx = -0.5*shortRead.getPixelLength();
// final double dy = -0.5*shortRead.getHeight();
splitRead.g_rotate = element("g");
splitRead.g_rotate.setAttribute("transform", "rotate(0)");
g1.appendChild(splitRead.g_rotate);
g3 = element("g");
g3.setAttribute("transform", "translate(0,0)");
splitRead.g_rotate.appendChild(g3);
splitRead.g_translate2 = g3;
} else {
g3 = g1;
}
g1.setAttribute("transform", "translate(" + format(shortRead.getPixelStart()) + "," + format(shortRead.getY()) + ")");
// readElement.setAttribute("style","transform-origin:center");
if (shortRead.isSplitRead()) {
// always in front
animationLayer.appendChild(g1);
} else {
regionRoot.insertBefore(g1, regionRoot.getFirstChild());
}
final BiPredicate<Integer, Integer> isMismatch = (readpos0, refpos1) -> {
if (genomicSequence == null)
return false;
if (refpos1 < 1 || refpos1 > genomicSequence.length())
return false;
char refC = Character.toUpperCase(genomicSequence.charAt(refpos1 - 1));
if (refC == 'N')
return false;
final byte[] bases = shortRead.getRecord().getReadBases();
if (bases == null || bases == SAMRecord.NULL_SEQUENCE)
return false;
if (readpos0 < 0 || readpos0 >= bases.length) {
LOG.warn("P=" + readpos0 + " 0<x<" + bases.length);
return false;
}
char readC = (char) Character.toUpperCase(bases[readpos0]);
if (readC == 'N')
return false;
return readC != refC;
};
shortRead.g_translate1 = g1;
int readpos0 = 0;
int readpos0_clipped = 0;
final Element title = element("title", shortRead.getRecord().getPairedReadName() + " " + shortRead.getRecord().getCigarString() + " " + (shortRead.getRecord().getReadNegativeStrandFlag() ? "-" : "+"));
g3.appendChild(title);
final DocumentFragment insertionsFragment = this.document.createDocumentFragment();
final Cigar cigar = shortRead.getRecord().getCigar();
for (int cigarIdx = 0; cigarIdx < cigar.numCigarElements(); cigarIdx++) {
final CigarElement ce = cigar.getCigarElement(cigarIdx);
final CigarOperator op = ce.getOperator();
int next_ref = ref1;
int next_read = readpos0;
switch(op) {
case I:
{
next_read += ce.getLength();
readpos0_clipped += ce.getLength();
if (!(ref1 + ce.getLength() < region.getStart() || ref1 > region.getEnd())) {
final double ce_length = region.baseToPixel(region.getStart() + ce.getLength());
final Element rectInsert = element("rect");
rectInsert.setAttribute("class", "insert");
rectInsert.setAttribute("x", format(region.baseToPixel(ref1) - leftX));
rectInsert.setAttribute("y", format(0));
rectInsert.setAttribute("width", format(this.svgDuration > 0 && ce_length > 1 ? ce_length : 1));
rectInsert.setAttribute("height", format(this.featureHeight));
rectInsert.appendChild(element("title", this.niceIntFormat.format(ce.getLength())));
if (this.svgDuration > 0 && ce_length > 1) {
final Element anim = element("animate");
rectInsert.appendChild(anim);
anim.setAttribute("attributeType", "XML");
anim.setAttribute("attributeName", "width");
anim.setAttribute("begin", "0s");
anim.setAttribute("from", format(ce_length));
anim.setAttribute("to", "1");
anim.setAttribute("dur", String.valueOf(this.svgDuration) + "s");
anim.setAttribute("repeatCount", this.svgRepeatCount);
anim.setAttribute("fill", "freeze");
}
insertionsFragment.appendChild(rectInsert);
}
break;
}
case P:
continue;
case S:
case H:
case M:
case X:
case EQ:
{
next_ref += ce.getLength();
if (!op.equals(CigarOperator.H)) {
next_read += ce.getLength();
}
if (!(next_ref < region.getStart() || ref1 > region.getEnd())) {
final double distance_pix = region.baseToPixel(next_ref) - region.baseToPixel(ref1);
final StringBuilder sb = new StringBuilder();
final Element path = element("path");
path.setAttribute("class", "op" + op.name() + (shortRead.isDefaultShortRead() ? "" : "x"));
if (trace)
path.setAttribute("style", "fill:blue;");
if (!op.isClipping() && shortRead.isDefaultShortRead() && shortRead.getRecord().getReadPairedFlag() && !shortRead.getRecord().getProperPairFlag()) {
if (!shortRead.getContig().equals(shortRead.getRecord().getMateReferenceName())) {
path.setAttribute("style", "fill:orchid;");
} else {
path.setAttribute("style", "fill:lightblue;");
}
}
// arrow <--
if (cigarIdx == 0 && shortRead.isNegativeStrand()) {
sb.append("M ").append(format(region.baseToPixel(ref1) - leftX)).append(',').append(0);
sb.append(" h ").append(format(distance_pix));
sb.append(" v ").append(format(maxy));
sb.append(" h ").append(format(-(distance_pix)));
sb.append(" l ").append(format(-arrow_w)).append(',').append(-featureHeight / 2.0);
sb.append(" Z");
} else // arrow -->
if (cigarIdx + 1 == cigar.numCigarElements() && !shortRead.isNegativeStrand()) {
sb.append("M ").append(format(region.baseToPixel(ref1) - leftX + distance_pix)).append(',').append(0);
sb.append(" h ").append(format(-(distance_pix)));
sb.append(" v ").append(format(maxy));
sb.append(" h ").append(format(distance_pix));
sb.append(" l ").append(format(arrow_w)).append(',').append(-featureHeight / 2.0);
sb.append(" Z");
} else {
sb.append("M ").append(format(region.baseToPixel(ref1) - leftX)).append(',').append(0);
sb.append(" h ").append(format(distance_pix));
sb.append(" v ").append(format(maxy));
sb.append(" h ").append(format(-(distance_pix)));
sb.append(" Z");
}
path.setAttribute("d", sb.toString());
g3.appendChild(path);
if (op.isAlignment() && genomicSequence != null && !hideMismatch) {
for (int x = 0; x < ce.getLength(); ++x) {
if (!isMismatch.test(readpos0_clipped + x, ref1 + x))
continue;
if (ref1 + x < region.getStart())
continue;
if (ref1 + x > region.getEnd())
continue;
final Element rectMismatch = element("rect");
rectMismatch.setAttribute("class", "mismatch");
rectMismatch.setAttribute("x", format(region.baseToPixel(ref1 + x) - leftX));
rectMismatch.setAttribute("y", format(0));
rectMismatch.setAttribute("width", format((region.baseToPixel(ref1 + x + 1) - region.baseToPixel(ref1 + x))));
rectMismatch.setAttribute("height", format(this.featureHeight));
g3.appendChild(rectMismatch);
}
}
}
if (!op.isClipping()) {
readpos0_clipped += ce.getLength();
}
break;
}
case D:
case N:
{
next_ref += ce.getLength();
if (!(next_ref < region.getStart() || ref1 > region.getEnd())) {
final double distance_pix = region.baseToPixel(next_ref) - region.baseToPixel(ref1);
final Element lineE = element("line");
lineE.setAttribute("class", "opD");
lineE.setAttribute("x1", format(region.baseToPixel(ref1) - leftX));
lineE.setAttribute("y1", format(midy));
lineE.setAttribute("x2", format(region.baseToPixel(ref1) - leftX + distance_pix));
lineE.setAttribute("y2", format(midy));
g3.insertBefore(lineE, g3.getFirstChild());
}
break;
}
default:
throw new IllegalStateException(op.name());
}
ref1 = next_ref;
readpos0 = next_read;
}
g3.appendChild(insertionsFragment);
}
curr_y += (this.featureHeight + 3);
}
for (int x = 1; x < 10; ++x) {
final double xx = (this.drawinAreaWidth / 10.0) * x;
final Element line = element("line");
line.setAttribute("class", "ruler");
line.setAttribute("x1", format(xx));
line.setAttribute("y1", format(y_top_region));
line.setAttribute("x2", format(xx));
line.setAttribute("y2", format(curr_y));
line.appendChild(element("title", niceIntFormat.format(region.getStart() + (int) (region.getLengthOnReference() / 10.0) * x)));
regionRoot.insertBefore(line, regionRoot.getFirstChild());
}
/**
* print variants
*/
if (this.vcfFileReader != null) {
final VCFHeader header = this.vcfFileReader.getHeader();
final SAMSequenceDictionary vcfdict = header.getSequenceDictionary();
final ContigNameConverter ctgConver = (vcfdict == null ? null : ContigNameConverter.fromOneDictionary(vcfdict));
final String sname = ctgConver == null ? null : ctgConver.apply(region.getContig());
if (!StringUtil.isBlank(sname)) {
final CloseableIterator<VariantContext> iter = this.vcfFileReader.query(region.getContig(), region.getStart(), region.getEnd());
while (iter.hasNext()) {
final VariantContext ctx = iter.next();
if (!ctx.isVariant())
continue;
final double x1 = Math.max(0, region.baseToPixel(ctx.getStart()));
final double x2 = Math.min(region.baseToPixel(ctx.getEnd() + 1), this.drawinAreaWidth);
final Genotype g = ctx.getGenotype(sample.sampleName);
final Element rect = element("rect");
rect.setAttribute("class", "variant" + (g == null ? "" : g.getType().name()));
rect.setAttribute("x", format(x1));
rect.setAttribute("y", format(y_top_region));
rect.setAttribute("width", format(x2 - x1));
rect.setAttribute("height", format(curr_y - y_top_region));
rect.appendChild(element("title", niceIntFormat.format(ctx.getStart()) + "-" + niceIntFormat.format(ctx.getEnd()) + " " + ctx.getReference().getDisplayString()));
regionRoot.insertBefore(rect, regionRoot.getFirstChild());
}
iter.close();
}
}
region.height = curr_y - region.y;
}
sample.height = curr_y - sample.y;
final Element frame = element("rect");
frame.setAttribute("class", "frame");
frame.setAttribute("x", format(0));
frame.setAttribute("y", format(sample.getY()));
frame.setAttribute("width", format(drawinAreaWidth));
frame.setAttribute("height", format(sample.getHeight()));
sampleRoot.appendChild(frame);
return sampleRoot;
}
Aggregations