use of com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter in project jvarkit by lindenb.
the class CytobandToSvg method doWork.
@Override
public int doWork(final List<String> args) {
XMLStreamWriter w = null;
FileOutputStream fout = null;
try {
final List<Cytoband> cytobands;
if (StringUtil.isBlank(this.cytobandsUri) || this.cytobandsUri.equals("-")) {
cytobands = parseCytobands(IOUtils.openStdinForBufferedReader());
} else {
cytobands = parseCytobands(IOUtils.openURIForBufferedReading(this.cytobandsUri));
}
cytobands.stream().map(C -> C.getContig()).collect(Collectors.toCollection(LinkedHashSet::new)).stream().map(C -> new Contig(cytobands.stream().filter(T -> T.getContig().equals(C)).sorted((A, B) -> Integer.compare(A.getStart(), B.getStart())).collect(Collectors.toList()))).forEach(C -> this.name2contig.put(C.getContig(), C));
;
this.max_contig_length = this.name2contig.values().stream().mapToLong(C -> C.getSequenceLength()).max().orElse(0L);
if (this.name2contig.isEmpty() || this.max_contig_length <= 0) {
LOG.error("no contig found");
return -1;
}
final Rectangle drawingArea = new Rectangle(0, 0, this.width, this.height);
final XMLOutputFactory xof = XMLOutputFactory.newFactory();
if (this.outputFile == null) {
w = xof.createXMLStreamWriter(stdout(), "UTF-8");
} else {
fout = new FileOutputStream(this.outputFile);
w = xof.createXMLStreamWriter(fout, "UTF-8");
}
w.writeStartDocument("UTF-8", "1.0");
w.writeStartElement("svg");
w.writeAttribute("width", format(drawingArea.width));
w.writeAttribute("height", format(drawingArea.height));
w.writeAttribute("version", "1.1");
w.writeDefaultNamespace(SVG.NS);
w.writeNamespace("xlink", XLINK.NS);
w.writeStartElement("title");
w.writeCharacters(StringUtil.isBlank(this.title) ? CytobandToSvg.class.getName() : this.title);
w.writeEndElement();
w.writeStartElement("description");
w.writeCharacters("Cmd:" + getProgramCommandLine() + "\n");
w.writeCharacters("Version:" + getVersion() + "\n");
w.writeCharacters("Author: Pierre Lindenbaum\n");
w.writeEndElement();
w.writeStartElement("defs");
w.writeStartElement("linearGradient");
w.writeAttribute("id", "grad01");
w.writeAttribute("x1", "0%");
w.writeAttribute("x2", "100%");
w.writeAttribute("y1", "50%");
w.writeAttribute("y2", "50%");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "0%");
w.writeAttribute("style", "stop-color:white;stop-opacity:1;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "50%");
w.writeAttribute("style", "stop-color:white;stop-opacity:0;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "75%");
w.writeAttribute("style", "stop-color:black;stop-opacity:0;");
w.writeEmptyElement("stop");
w.writeAttribute("offset", "100%");
w.writeAttribute("style", "stop-color:black;stop-opacity:0.9;");
w.writeEndElement();
w.writeStartElement("filter");
w.writeAttribute("id", "filter01");
w.writeEmptyElement("feGaussianBlur");
w.writeAttribute("in", "SourceGraphic");
w.writeAttribute("stdDeviation", "10");
// filter
w.writeEndElement();
double x = drawingArea.getX();
double contig_width = drawingArea.getWidth() / (double) this.name2contig.size();
for (final Contig contig : this.name2contig.values()) {
contig.bounds.x = x;
contig.bounds.y = drawingArea.getY();
contig.bounds.width = contig_width;
contig.bounds.height = drawingArea.getHeight();
contig.def(w);
x += contig_width;
}
w.writeEndElement();
w.writeStartElement("style");
w.writeCharacters(".ctgborderp {fill:url(#grad01);stroke:green;}" + ".ctgborderq {fill:url(#grad01);stroke:green;}" + ".ctglabel {text-anchor:middle;stroke:none;fill:darkgrey;font: bold 10px Verdana, Helvetica, Arial, sans-serif;}" + ".cytoband {fill:silver;stroke:none;}" + ".bedlabel {stroke:red;fill:none;text-anchor:start;font: normal 7px Verdana, Helvetica, Arial, sans-serif;}" + ".maintitle {stroke:none;fill:darkgrey;text-anchor:middle;font: normal 12px Verdana, Helvetica, Arial, sans-serif;}" + ".ctgback {fill:gainsboro;stroke:none;filter:url(#filter01);}");
// style
w.writeEndElement();
w.writeStartElement("g");
w.writeAttribute("id", "body");
/* title */
if (!StringUtil.isBlank(this.title)) {
w.writeStartElement("text");
w.writeAttribute("class", "maintitle");
w.writeAttribute("x", format(this.width / 2.0));
w.writeAttribute("y", format(12));
w.writeCharacters(this.title);
w.writeEndElement();
}
w.writeStartElement("g");
w.writeAttribute("id", "karyotype");
for (final Contig contig : this.name2contig.values()) {
contig.paint(w);
}
// g@id=karyotype
w.writeEndElement();
final ContigNameConverter ctgNameConverter = ContigNameConverter.fromContigSet(this.name2contig.keySet());
ctgNameConverter.setOnNotFound(ContigNameConverter.OnNotFound.SKIP);
for (final String filename : args) {
final Pattern tab = Pattern.compile("[\t]");
w.writeStartElement("g");
w.writeAttribute("id", "input" + filename);
BufferedReader br = IOUtils.openURIForBufferedReading(filename);
String line;
while ((line = br.readLine()) != null) {
if (line == null || StringUtil.isBlank(line) || BedLine.isBedHeader(line))
continue;
final String[] tokens = tab.split(line, 4);
if (tokens.length < 3)
continue;
final String ctgName = ctgNameConverter.apply(tokens[0]);
if (StringUtil.isBlank(ctgName))
continue;
final Contig ctg = this.name2contig.get(ctgName);
if (ctg == null)
continue;
final Rectangle2D.Double r = ctg.getContigBounds();
final int chromStart = Integer.parseInt(tokens[1]);
final int chromEnd = Integer.parseInt(tokens[2]);
final Map<String, String> attributes = new HashMap<>();
if (tokens.length > 3) {
final StreamTokenizer st = new StreamTokenizer(new StringReader(tokens[3]));
st.wordChars('_', '_');
String key = null;
while (st.nextToken() != StreamTokenizer.TT_EOF) {
String s = null;
switch(st.ttype) {
case StreamTokenizer.TT_NUMBER:
s = String.valueOf(st.nval);
break;
case '"':
case '\'':
case StreamTokenizer.TT_WORD:
s = st.sval;
break;
case ';':
break;
default:
break;
}
if (s == null)
continue;
if (key == null) {
key = s;
} else {
attributes.put(key, s);
key = null;
}
}
}
w.writeStartElement("g");
if (attributes.containsKey("href")) {
w.writeStartElement("a");
w.writeAttribute("href", attributes.get("href"));
}
w.writeStartElement("rect");
w.writeAttribute("style", "stroke:red;");
w.writeAttribute("x", format(r.getMaxX() + 2));
w.writeAttribute("width", format(2));
w.writeAttribute("y", format(ctg.pos2pixel(chromStart, r)));
w.writeAttribute("height", format(ctg.pos2pixel(chromEnd, r) - ctg.pos2pixel(chromStart, r)));
if (attributes.containsKey("title")) {
w.writeStartElement("title");
w.writeCharacters(attributes.get("title"));
w.writeEndElement();
}
w.writeEndElement();
if (attributes.containsKey("label")) {
w.writeStartElement("text");
w.writeAttribute("class", "bedlabel");
w.writeAttribute("x", format(r.getMaxX() + 4));
w.writeAttribute("y", format(3 + ctg.pos2pixel(chromStart, r)));
w.writeCharacters(attributes.get("label"));
w.writeEndElement();
}
if (attributes.containsKey("href")) {
w.writeEndElement();
}
// g
w.writeEndElement();
}
br.close();
w.writeEndElement();
}
// g@id=body
w.writeEndElement();
// svg
w.writeEndElement();
w.writeEndDocument();
w.flush();
w.close();
if (fout != null) {
fout.flush();
fout.close();
fout = null;
}
return 0;
} catch (final Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(fout);
}
}
use of com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter in project jvarkit by lindenb.
the class FindAllCoverageAtPosition method convertFromSamHeader.
private Mutation convertFromSamHeader(final File f, SAMFileHeader h, final Mutation src) {
SAMSequenceDictionary dict = h.getSequenceDictionary();
if (dict == null) {
LOG.warn("No dictionary in " + h);
return null;
}
final ContigNameConverter converter = ContigNameConverter.fromOneDictionary(dict);
converter.setOnNotFound(ContigNameConverter.OnNotFound.SKIP);
final String ctg = converter.apply(src.chrom);
if (ctg == null)
return null;
if (ctg != null && ctg.equals(src.chrom))
return src;
return new Mutation(ctg, src.pos);
}
use of com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter in project jvarkit by lindenb.
the class VcfPeekVcf method doVcfToVcf.
/**
* public for knime
*/
@Override
public int doVcfToVcf(final String inputName, final VcfIterator vcfIn, final VariantContextWriter out) {
try {
final Set<String> unmatchedcontigs = new HashSet<>();
final VCFHeader h = vcfIn.getHeader();
final VCFHeader h2 = new VCFHeader(h);
super.addMetaData(h2);
final Map<String, VCFInfoHeaderLine> databaseTags = new HashMap<String, VCFInfoHeaderLine>();
final VCFHeader databaseHeader = this.indexedVcfFileReader.getFileHeader();
final ContigNameConverter nameConverter = (h.getSequenceDictionary() != null && !h.getSequenceDictionary().isEmpty() && databaseHeader.getSequenceDictionary() != null && !databaseHeader.getSequenceDictionary().isEmpty() ? ContigNameConverter.fromDictionaries(h.getSequenceDictionary(), databaseHeader.getSequenceDictionary()) : ContigNameConverter.getIdentity()).setOnNotFound(this.onContigNotFound);
;
for (final String key : this.peek_info_tags) {
VCFInfoHeaderLine hinfo = databaseHeader.getInfoHeaderLine(key);
if (hinfo == null) {
final String msg = "INFO name=" + key + " missing in " + this.resourceVcfFile;
if (this.missingIdIsError) {
LOG.warn(msg);
continue;
} else {
LOG.error(msg);
return -1;
}
}
switch(hinfo.getCountType()) {
case G:
throw new JvarkitException.UserError("Cannot handle VCFHeaderLineCount.G for " + hinfo.getID());
default:
databaseTags.put(hinfo.getID(), hinfo);
break;
}
hinfo = VCFUtils.renameVCFInfoHeaderLine(hinfo, this.peekTagPrefix + key);
if (h2.getInfoHeaderLine(hinfo.getID()) != null) {
throw new JvarkitException.UserError("key " + this.peekTagPrefix + key + " already defined in VCF header");
}
h2.addMetaDataLine(hinfo);
;
}
out.writeHeader(h2);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(h).logger(LOG);
while (vcfIn.hasNext()) {
final VariantContext ctx = progress.watch(vcfIn.next());
final String outContig = nameConverter.apply(ctx.getContig());
if (outContig == null) {
unmatchedcontigs.add(ctx.getContig());
continue;
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
CloseableIterator<VariantContext> iter = this.indexedVcfFileReader.query(outContig, Math.max(0, ctx.getStart() - 1), (ctx.getEnd() + 1));
while (iter.hasNext()) {
final VariantContext ctx2 = iter.next();
if (!outContig.equals(ctx2.getContig()))
continue;
if (ctx.getStart() != ctx2.getStart())
continue;
if (!ctx.getReference().equals(ctx2.getReference()))
continue;
final boolean okAllele;
switch(altAlleleMatcher) {
case all:
{
okAllele = ctx.getAlternateAlleles().stream().filter(A -> ctx2.hasAlternateAllele(A)).count() == ctx.getAlternateAlleles().size();
break;
}
case at_least_one:
{
okAllele = ctx.getAlternateAlleles().stream().filter(A -> ctx2.hasAlternateAllele(A)).findAny().isPresent();
break;
}
case none:
okAllele = true;
break;
default:
throw new IllegalStateException(altAlleleMatcher.name());
}
if (!okAllele)
continue;
if (this.peekId && ctx2.hasID()) {
vcb.id(ctx2.getID());
}
boolean somethingWasChanged = false;
for (final String key : databaseTags.keySet()) {
if (!ctx2.hasAttribute(key))
continue;
final VCFInfoHeaderLine dbHeader = databaseTags.get(key);
switch(dbHeader.getCountType()) {
case A:
{
final List<Object> newatt = new ArrayList<>();
final List<Object> ctx2att = ctx2.getAttributeAsList(key);
for (int i = 0; i < ctx.getAlternateAlleles().size(); ++i) {
final Allele ctxalt = ctx.getAlternateAllele(i);
int index2 = ctx2.getAlternateAlleles().indexOf(ctxalt);
if (index2 == -1 || index2 >= ctx2att.size()) {
newatt.add(null);
} else {
newatt.add(ctx2att.get(index2));
}
}
if (newatt.stream().filter(Obj -> !(Obj == null || VCFConstants.EMPTY_INFO_FIELD.equals(Obj))).count() > 0) {
vcb.attribute(this.peekTagPrefix + key, newatt);
somethingWasChanged = true;
}
break;
}
case R:
{
final List<Object> newatt = new ArrayList<>();
final List<Object> ctx2att = ctx2.getAttributeAsList(key);
for (int i = 0; i < ctx.getAlleles().size(); ++i) {
final Allele ctxalt = ctx.getAlleles().get(i);
int index2 = ctx2.getAlleleIndex(ctxalt);
if (index2 == -1 || index2 >= ctx2att.size()) {
newatt.add(null);
} else {
newatt.add(ctx2att.get(index2));
}
}
if (newatt.stream().filter(Obj -> !(Obj == null || VCFConstants.EMPTY_INFO_FIELD.equals(Obj))).count() > 0) {
vcb.attribute(this.peekTagPrefix + key, newatt);
somethingWasChanged = true;
}
break;
}
default:
{
final Object o = ctx2.getAttribute(key);
vcb.attribute(this.peekTagPrefix + key, o);
somethingWasChanged = true;
break;
}
}
}
if (somethingWasChanged)
break;
}
iter.close();
iter = null;
out.add(vcb.make());
if (out.checkError())
break;
}
progress.finish();
if (!unmatchedcontigs.isEmpty()) {
LOG.debug("Unmatched contigs: " + unmatchedcontigs.stream().collect(Collectors.joining("; ")));
}
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
}
}
Aggregations