Search in sources :

Example 11 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class Biostar175929 method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("fasta reference was not defined.");
        return -1;
    }
    IndexedFastaSequenceFile reference = null;
    VcfIterator iter = null;
    try {
        reference = new IndexedFastaSequenceFile(this.faidx);
        iter = super.openVcfIterator(oneFileOrNull(args));
        this.pw = openFileOrStdoutAsPrintWriter(this.outputFile);
        final List<VariantContext> variants = new ArrayList<>();
        for (; ; ) {
            VariantContext ctx = null;
            if (iter.hasNext()) {
                ctx = iter.next();
            }
            if (ctx == null || (!variants.isEmpty() && !ctx.getContig().equals(variants.get(0).getContig()))) {
                if (!variants.isEmpty()) {
                    LOG.info("chrom:" + variants.get(0).getContig() + " N=" + variants.size());
                    final GenomicSequence genomic = new GenomicSequence(reference, variants.get(0).getContig());
                    final StringBuilder title = new StringBuilder();
                    final StringBuilder sequence = new StringBuilder();
                    recursive(genomic, variants, 0, title, sequence);
                    variants.clear();
                }
                if (ctx == null)
                    break;
            }
            variants.add(ctx);
        }
        iter.close();
        iter = null;
        this.pw.flush();
        this.pw.close();
        return RETURN_OK;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(reference);
        CloserUtil.close(iter);
        CloserUtil.close(pw);
    }
}
Also used : VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile)

Example 12 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class Biostar59647 method doWork.

@Override
public int doWork(final List<String> args) {
    IndexedFastaSequenceFile indexedFastaSequenceFile = null;
    SamReader samFileReader = null;
    PrintStream pout;
    try {
        GenomicSequence genomicSequence = null;
        indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile);
        samFileReader = null;
        final String bamFile = oneFileOrNull(args);
        samFileReader = super.openSamReader(bamFile);
        if (!SequenceUtil.areSequenceDictionariesEqual(indexedFastaSequenceFile.getSequenceDictionary(), samFileReader.getFileHeader().getSequenceDictionary())) {
            LOG.warning("Not the same sequence dictionaries");
        }
        final XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
        pout = (outputFile == null ? stdout() : new PrintStream(this.outputFile));
        final XMLStreamWriter w = xmlfactory.createXMLStreamWriter(pout, "UTF-8");
        w.writeStartDocument("UTF-8", "1.0");
        w.writeStartElement("sam");
        w.writeAttribute("bam", (bamFile == null ? "stdin" : bamFile));
        w.writeAttribute("ref", refFile.getPath());
        w.writeComment(getProgramCommandLine());
        final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(samFileReader.getFileHeader().getSequenceDictionary());
        final SAMRecordIterator iter = samFileReader.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = iter.next();
            progess.watch(rec);
            final byte[] readbases = rec.getReadBases();
            w.writeStartElement("read");
            w.writeStartElement("name");
            w.writeCharacters(rec.getReadName());
            w.writeEndElement();
            w.writeStartElement("sequence");
            w.writeCharacters(new String(readbases));
            w.writeEndElement();
            w.writeStartElement("flags");
            for (SAMFlag f : SAMFlag.values()) {
                w.writeAttribute(f.name(), String.valueOf(f.isSet(rec.getFlags())));
            }
            w.writeCharacters(String.valueOf(rec.getFlags()));
            // flags
            w.writeEndElement();
            if (!rec.getReadUnmappedFlag()) {
                w.writeStartElement("qual");
                w.writeCharacters(String.valueOf(rec.getMappingQuality()));
                w.writeEndElement();
                w.writeStartElement("chrom");
                w.writeAttribute("index", String.valueOf(rec.getReferenceIndex()));
                w.writeCharacters(rec.getReferenceName());
                w.writeEndElement();
                w.writeStartElement("pos");
                w.writeCharacters(String.valueOf(rec.getAlignmentStart()));
                w.writeEndElement();
                w.writeStartElement("cigar");
                w.writeCharacters(rec.getCigarString());
                w.writeEndElement();
            }
            if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
                w.writeStartElement("mate-chrom");
                w.writeAttribute("index", String.valueOf(rec.getMateReferenceIndex()));
                w.writeCharacters(rec.getMateReferenceName());
                w.writeEndElement();
                w.writeStartElement("mate-pos");
                w.writeCharacters(String.valueOf(rec.getMateAlignmentStart()));
                w.writeEndElement();
            }
            if (!rec.getReadUnmappedFlag() && rec.getCigar() != null) {
                if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
                    genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
                }
                w.writeStartElement("align");
                int readIndex = 0;
                int refIndex = rec.getAlignmentStart();
                for (final CigarElement e : rec.getCigar().getCigarElements()) {
                    switch(e.getOperator()) {
                        // ignore hard clips
                        case H:
                            break;
                        // ignore pads
                        case P:
                            break;
                        // cont.
                        case I:
                        case S:
                            {
                                final int length = e.getLength();
                                for (int i = 0; i < length; ++i) {
                                    w.writeEmptyElement(e.getOperator().name());
                                    w.writeAttribute("read-index", String.valueOf(readIndex + 1));
                                    if (readIndex >= 0 && readIndex < readbases.length) {
                                        w.writeAttribute("read-base", String.valueOf((char) (readbases[readIndex])));
                                    }
                                    readIndex++;
                                }
                                break;
                            }
                        // cont. -- reference skip
                        case N:
                        case D:
                            {
                                final int length = e.getLength();
                                for (int i = 0; i < length; ++i) {
                                    w.writeEmptyElement(e.getOperator().name());
                                    w.writeAttribute("ref-index", String.valueOf(refIndex));
                                    if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
                                        w.writeAttribute("ref-base", String.valueOf(genomicSequence.charAt(refIndex - 1)));
                                    }
                                    refIndex++;
                                }
                                break;
                            }
                        case M:
                        case EQ:
                        case X:
                            {
                                final int length = e.getLength();
                                for (int i = 0; i < length; ++i) {
                                    w.writeEmptyElement(e.getOperator().name());
                                    char baseRead = '\0';
                                    if (readIndex >= 0 && readIndex < readbases.length) {
                                        baseRead = (char) (rec.getReadBases()[readIndex]);
                                        w.writeAttribute("read-index", String.valueOf(readIndex + 1));
                                        w.writeAttribute("read-base", String.valueOf(baseRead));
                                    }
                                    w.writeAttribute("ref-index", String.valueOf(refIndex));
                                    if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
                                        char baseRef = genomicSequence.charAt(refIndex - 1);
                                        w.writeAttribute("ref-base", String.valueOf(baseRef));
                                        if (Character.toUpperCase(baseRef) != Character.toUpperCase(baseRead)) {
                                            w.writeAttribute("mismatch", "true");
                                        }
                                    }
                                    refIndex++;
                                    readIndex++;
                                }
                                break;
                            }
                        default:
                            throw new IllegalStateException("Case statement didn't deal with cigar op: " + e.getOperator());
                    }
                }
                w.writeEndElement();
            }
            w.writeEndElement();
        }
        iter.close();
        w.writeEndElement();
        w.writeEndDocument();
        w.flush();
        pout.flush();
        CloserUtil.close(w);
        CloserUtil.close(pout);
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(samFileReader);
        CloserUtil.close(indexedFastaSequenceFile);
    }
    return 0;
}
Also used : PrintStream(java.io.PrintStream) XMLOutputFactory(javax.xml.stream.XMLOutputFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFlag(htsjdk.samtools.SAMFlag) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) CigarElement(htsjdk.samtools.CigarElement) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) SAMRecord(htsjdk.samtools.SAMRecord)

Example 13 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class ExtendReferenceWithReads method doWork.

@Override
public int doWork(List<String> args) {
    if (this.faidx == null) {
        LOG.error("No REF defined");
        return -1;
    }
    this.samReaders.clear();
    PrintStream out = null;
    try {
        this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
        SAMSequenceDictionary dict = this.indexedFastaSequenceFile.getSequenceDictionary();
        if (dict == null) {
            LOG.error("Reference file is missing a sequence dictionary (use picard)");
            return -1;
        }
        final SamReaderFactory srf = super.createSamReaderFactory();
        srf.setOption(Option.CACHE_FILE_BASED_INDEXES, true);
        for (final String uri : IOUtils.unrollFiles(args)) {
            LOG.info("opening BAM " + uri);
            final SamReader sr = srf.open(SamInputResource.of(uri));
            /* doesn't work with remote ?? */
            if (!sr.hasIndex()) {
                LOG.error("file " + uri + " is not indexed");
                return -1;
            }
            final SAMFileHeader header = sr.getFileHeader();
            if (!header.getSortOrder().equals(SortOrder.coordinate)) {
                LOG.error("file " + uri + " not sorted on coordinate but " + header.getSortOrder());
                return -1;
            }
            final SAMSequenceDictionary dict2 = header.getSequenceDictionary();
            if (dict2 == null) {
                LOG.error("file " + uri + " needs a sequence dictionary");
                return -1;
            }
            samReaders.add(sr);
        }
        if (samReaders.isEmpty()) {
            LOG.error("No BAM defined");
            return -1;
        }
        out = super.openFileOrStdoutAsPrintStream(this.outputFile);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        for (final SAMSequenceRecord ssr : dict.getSequences()) {
            final GenomicSequence genomic = new GenomicSequence(this.indexedFastaSequenceFile, ssr.getSequenceName());
            int chromStart = 0;
            int nPrinted = 0;
            out.print(">");
            out.print(ssr.getSequenceName());
            for (final Rescue side : Rescue.values()) {
                switch(side) {
                    case LEFT:
                        /* look before position 0 of chromosome */
                        {
                            final Map<Integer, Counter<Byte>> pos2bases = scanRegion(ssr, -1, -1, side);
                            int newstart = 0;
                            for (final Integer pos : pos2bases.keySet()) {
                                if (pos >= 0)
                                    continue;
                                newstart = Math.min(newstart, pos);
                            }
                            while (newstart < 0) {
                                final Counter<Byte> count = pos2bases.get(newstart);
                                if (nPrinted % 60 == 0)
                                    out.println();
                                out.print(consensus(count));
                                newstart++;
                                ++nPrinted;
                            }
                            break;
                        }
                    case RIGHT:
                        /* look after position > length(chromosome) */
                        {
                            final Map<Integer, Counter<Byte>> pos2bases = this.scanRegion(ssr, -1, -1, side);
                            int newend = ssr.getSequenceLength();
                            for (final Integer pos : pos2bases.keySet()) {
                                if (pos < ssr.getSequenceLength())
                                    continue;
                                newend = Math.max(newend, pos + 1);
                            }
                            for (int i = ssr.getSequenceLength(); i < newend; i++) {
                                final Counter<Byte> count = pos2bases.get(i);
                                if (nPrinted % 60 == 0)
                                    out.println();
                                out.print(consensus(count));
                                ++nPrinted;
                            }
                            break;
                        }
                    case CENTER:
                        /* 0 to chromosome-length */
                        {
                            chromStart = 0;
                            while (chromStart < genomic.length()) {
                                final char base = Character.toUpperCase(genomic.charAt(chromStart));
                                if (base != 'N') {
                                    progress.watch(ssr.getSequenceName(), chromStart);
                                    if (nPrinted % 60 == 0)
                                        out.println();
                                    out.print(base);
                                    ++chromStart;
                                    ++nPrinted;
                                    continue;
                                }
                                int chromEnd = chromStart + 1;
                                while (chromEnd < genomic.length() && Character.toUpperCase(genomic.charAt(chromEnd)) == 'N') {
                                    ++chromEnd;
                                }
                                if (chromEnd - chromStart < this.minLenNNNNContig) {
                                    while (chromStart < chromEnd) {
                                        progress.watch(ssr.getSequenceName(), chromStart);
                                        if (nPrinted % 60 == 0)
                                            out.println();
                                        out.print(base);
                                        ++chromStart;
                                        ++nPrinted;
                                    }
                                    continue;
                                }
                                final Map<Integer, Counter<Byte>> pos2bases = scanRegion(ssr, chromStart, chromEnd, side);
                                while (chromStart < chromEnd) {
                                    final Counter<Byte> count = pos2bases.get(chromStart);
                                    if (nPrinted % 60 == 0)
                                        out.println();
                                    if (count == null) {
                                        out.print('N');
                                    } else {
                                        out.print(consensus(count));
                                    }
                                    ++chromStart;
                                    ++nPrinted;
                                    continue;
                                }
                            }
                            break;
                        }
                }
            // end switch type
            }
            out.println();
        }
        progress.finish();
        out.flush();
        out.close();
        return RETURN_OK;
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        for (final SamReader r : samReaders) CloserUtil.close(r);
        samReaders.clear();
    }
}
Also used : PrintStream(java.io.PrintStream) SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) Counter(com.github.lindenb.jvarkit.util.Counter) SAMFileHeader(htsjdk.samtools.SAMFileHeader) HashMap(java.util.HashMap) Map(java.util.Map)

Example 14 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class FindAllCoverageAtPosition method doWork.

@Override
public int doWork(final List<String> args) {
    final Set<Mutation> mutations = new TreeSet<>();
    BufferedReader r = null;
    try {
        if (this.referenceFileFile != null) {
            this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.referenceFileFile);
        }
        mutations.addAll(Arrays.asList(this.positionStr.split("[  ]")).stream().filter(S -> !S.trim().isEmpty()).map(S -> new Mutation(S)).collect(Collectors.toSet()));
        for (final String s : positionStr.split("[  ]")) {
            if (s.trim().isEmpty())
                continue;
            mutations.add(new Mutation(s));
        }
        if (this.positionFile != null) {
            String line;
            r = IOUtils.openFileForBufferedReading(positionFile);
            if (positionFile.getName().endsWith(".bed")) {
                final BedLineCodec codec = new BedLineCodec();
                while ((line = r.readLine()) != null) {
                    final BedLine bedLine = codec.decode(line);
                    if (bedLine == null)
                        continue;
                    for (int x = bedLine.getStart(); x <= bedLine.getEnd(); ++x) {
                        mutations.add(new Mutation(bedLine.getContig(), x));
                    }
                }
            } else {
                while ((line = r.readLine()) != null) {
                    if (line.trim().isEmpty() || line.startsWith("#"))
                        continue;
                    mutations.add(new Mutation(line));
                }
            }
            r.close();
            r = null;
        }
        if (mutations.isEmpty()) {
            LOG.fatal("undefined position \'str\'");
            return -1;
        }
        LOG.info("number of mutations " + mutations.size());
        this.out = this.openFileOrStdoutAsPrintWriter(this.outputFile);
        out.print("#File");
        out.print('\t');
        out.print("CHROM");
        out.print('\t');
        out.print("POS");
        if (this.indexedFastaSequenceFile != null) {
            out.print('\t');
            out.print("REF");
        }
        out.print('\t');
        out.print(this.groupBy.name().toUpperCase());
        out.print('\t');
        out.print("DEPTH");
        for (final CigarOperator op : CigarOperator.values()) {
            out.print('\t');
            out.print(op.name());
        }
        for (char c : BASES_To_PRINT) {
            out.print('\t');
            out.print("Base(" + c + ")");
        }
        out.println();
        if (args.isEmpty()) {
            LOG.info("Reading from stdin");
            r = new BufferedReader(new InputStreamReader(stdin()));
            scan(r, mutations);
            r.close();
            r = null;
        } else {
            for (final String filename : args) {
                LOG.info("Reading from " + filename);
                r = IOUtils.openURIForBufferedReading(filename);
                scan(r, mutations);
                r.close();
                r = null;
            }
        }
        this.out.flush();
        return 0;
    } catch (Exception err) {
        LOG.severe(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(this.out);
        CloserUtil.close(r);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) Arrays(java.util.Arrays) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) CigarElement(htsjdk.samtools.CigarElement) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) CigarOperator(htsjdk.samtools.CigarOperator) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) TreeSet(java.util.TreeSet) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Counter(com.github.lindenb.jvarkit.util.Counter) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamRecordJEXLFilter(com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) TreeMap(java.util.TreeMap) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SamReaderFactory(htsjdk.samtools.SamReaderFactory) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) InputStreamReader(java.io.InputStreamReader) CigarOperator(htsjdk.samtools.CigarOperator) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) TreeSet(java.util.TreeSet) BufferedReader(java.io.BufferedReader)

Example 15 with IndexedFastaSequenceFile

use of htsjdk.samtools.reference.IndexedFastaSequenceFile in project jvarkit by lindenb.

the class GBrowserHtml method doWork.

@Override
public int doWork(List<String> args) {
    final int DEFAULT_EXTEND_INTERVAL = 0;
    SamReader samReader = null;
    ZipOutputStream zout = null;
    BufferedReader bufReader = null;
    PrintWriter paramsWriter = null;
    JsonWriter paramsJsonWriter = null;
    String line;
    IndexedFastaSequenceFile faidx = null;
    long snapshot_id = 0L;
    final String inputName = oneFileOrNull(args);
    try {
        final SamJsonWriterFactory samJsonWriterFactory = SamJsonWriterFactory.newInstance().printHeader(true).printAttributes(false).printMate(true).printReadQualities(false).closeStreamAtEnd(false);
        if (this.outputFile != null) {
            if (!this.outputFile.getName().endsWith(".zip")) {
                LOG.error("Output file should end with *.zip");
                return -1;
            }
        }
        bufReader = (inputName == null ? IOUtils.openStreamForBufferedReader(stdin()) : IOUtils.openURIForBufferedReading(inputName));
        final File tmpFile1 = File.createTempFile("gbrowse.", ".tmp");
        tmpFile1.deleteOnExit();
        final File tmpFile2 = File.createTempFile("gbrowse.", ".tmp");
        tmpFile2.deleteOnExit();
        paramsWriter = new PrintWriter(tmpFile2);
        paramsWriter.print("var config=");
        paramsWriter.flush();
        paramsJsonWriter = new JsonWriter(paramsWriter);
        paramsJsonWriter.beginArray();
        zout = new ZipOutputStream(super.openFileOrStdoutAsStream(this.outputFile));
        File bamFile = null;
        File faidxFile = null;
        String sampleName = null;
        int extend_interval = DEFAULT_EXTEND_INTERVAL;
        String title = null;
        Interval interval = null;
        while ((line = bufReader.readLine()) != null) {
            LOG.info(line);
            if (line.isEmpty() || line.startsWith("#"))
                continue;
            final int eq = line.indexOf("=");
            final String key = (eq == -1 ? "" : line.substring(0, eq).toLowerCase().trim());
            final String value = (eq == -1 ? "" : line.substring(eq + 1).trim());
            if (key.equals("bam")) {
                if (samReader != null)
                    samReader.close();
                samReader = null;
                bamFile = (value.isEmpty() ? null : new File(value));
            } else if (key.equals("sample")) {
                sampleName = (value.isEmpty() ? null : value);
            } else if (key.equals("title")) {
                title = (value.isEmpty() ? null : value);
            } else if (key.equals("ref") || key.equals("fasta")) {
                if (faidx != null)
                    faidx.close();
                faidx = null;
                faidxFile = (value.isEmpty() ? null : new File(value));
            } else if (key.equals("extend")) {
                extend_interval = (value.isEmpty() ? DEFAULT_EXTEND_INTERVAL : Integer.parseInt(value));
            } else if (key.equals("position") || key.equals("location") || key.equals("interval") || key.equals("goto")) {
                Pattern pat1 = Pattern.compile("([^\\:]+)\\:([\\d,]+)");
                Matcher matcher = pat1.matcher(value);
                if (matcher.matches()) {
                    String c = matcher.group(1);
                    int pos = Integer.parseInt(matcher.group(2).replaceAll("[,]", ""));
                    interval = new Interval(c, Math.max(1, pos - extend_interval), pos + extend_interval);
                    continue;
                }
                pat1 = Pattern.compile("([^\\:]+)\\:([\\d,]+)\\-([\\d,]+)");
                matcher = pat1.matcher(value);
                if (matcher.matches()) {
                    String c = matcher.group(1);
                    int B = Integer.parseInt(matcher.group(2).replaceAll("[,]", ""));
                    int E = Integer.parseInt(matcher.group(3).replaceAll("[,]", ""));
                    if (B > E) {
                        LOG.error("bad interval :" + line);
                        return -1;
                    }
                    interval = new Interval(c, Math.max(1, B - extend_interval), E + extend_interval);
                    continue;
                }
                pat1 = Pattern.compile("([^\\:]+)\\:([\\d,]+)\\+([\\d,]+)");
                matcher = pat1.matcher(value);
                if (matcher.matches()) {
                    String c = matcher.group(1);
                    int B = Integer.parseInt(matcher.group(2).replaceAll("[,]", ""));
                    int x = Integer.parseInt(matcher.group(3).replaceAll("[,]", ""));
                    interval = new Interval(c, Math.max(1, B - (x + extend_interval)), B + (x + extend_interval));
                    continue;
                }
                LOG.error("bad interval :" + line);
                return -1;
            } else if (line.toLowerCase().equals("snapshot")) {
                if (interval == null) {
                    LOG.error("No interval defined!");
                    continue;
                }
                if (bamFile == null) {
                    LOG.error("No BAM file defined!");
                    continue;
                }
                ++snapshot_id;
                LOG.info("open samFile " + bamFile);
                if (samReader == null) {
                    samReader = super.createSamReaderFactory().open(bamFile);
                }
                FileWriter jsonFileWriter = new FileWriter(tmpFile1);
                JsonWriter jsw = new JsonWriter(jsonFileWriter);
                jsw.beginObject();
                jsw.name("interval");
                jsw.beginObject();
                jsw.name("contig");
                jsw.value(interval.getContig());
                jsw.name("start");
                jsw.value(interval.getStart());
                jsw.name("end");
                jsw.value(interval.getEnd());
                jsw.endObject();
                if (faidxFile != null) {
                    if (faidx == null) {
                        faidx = new IndexedFastaSequenceFile(faidxFile);
                    }
                    ReferenceSequence dna = faidx.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
                    jsw.name("reference");
                    jsw.value(dna.getBaseString());
                }
                jsw.name("sam");
                SAMFileWriter samFileWriter = samJsonWriterFactory.open(samReader.getFileHeader(), jsw);
                SAMRecordIterator samRecIter = samReader.queryOverlapping(interval.getContig(), interval.getStart(), interval.getEnd());
                while (samRecIter.hasNext()) {
                    final SAMRecord rec = samRecIter.next();
                    if (sampleName != null && !sampleName.isEmpty()) {
                        SAMReadGroupRecord srg = rec.getReadGroup();
                        if (srg == null)
                            continue;
                        if (!sampleName.equals(srg.getSample()))
                            continue;
                    }
                    if (rec.getReadUnmappedFlag())
                        continue;
                    samFileWriter.addAlignment(rec);
                }
                samRecIter.close();
                samFileWriter.close();
                jsw.endObject();
                jsw.flush();
                jsw.close();
                jsonFileWriter.close();
                ZipEntry entry = new ZipEntry(this.prefix + "_snapshot." + String.format("%05d", snapshot_id) + ".json");
                zout.putNextEntry(entry);
                IOUtils.copyTo(tmpFile1, zout);
                zout.closeEntry();
                tmpFile1.delete();
                paramsJsonWriter.beginObject();
                paramsJsonWriter.name("title");
                paramsJsonWriter.value(title == null ? interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd() + (sampleName == null ? "" : " " + sampleName) : title);
                paramsJsonWriter.name("interval");
                paramsJsonWriter.beginObject();
                paramsJsonWriter.name("contig");
                paramsJsonWriter.value(interval.getContig());
                paramsJsonWriter.name("start");
                paramsJsonWriter.value(interval.getStart());
                paramsJsonWriter.name("end");
                paramsJsonWriter.value(interval.getEnd());
                paramsJsonWriter.endObject();
                if (sampleName != null && !sampleName.isEmpty()) {
                    paramsJsonWriter.name("sample");
                    paramsJsonWriter.value(sampleName);
                }
                paramsJsonWriter.name("href");
                paramsJsonWriter.value("_snapshot." + String.format("%05d", snapshot_id) + ".json");
                paramsJsonWriter.endObject();
            } else if (line.toLowerCase().equals("exit") || line.toLowerCase().equals("quit")) {
                break;
            }
        }
        bufReader.close();
        bufReader = null;
        for (final String jssrc : new String[] { "gbrowse", "hershey", "samtools", "com.github.lindenb.jvarkit.tools.misc.GBrowserHtml" }) {
            InputStream is = this.getClass().getResourceAsStream("/META-INF/js/" + jssrc + ".js");
            if (is == null) {
                LOG.error("Cannot read resource /META-INF/js/" + jssrc + ".js");
                return -1;
            }
            ZipEntry entry = new ZipEntry(this.prefix + jssrc + ".js");
            entry.setComment("JAVASCRIPT SOURCE " + jssrc);
            zout.putNextEntry(entry);
            IOUtils.copyTo(is, zout);
            CloserUtil.close(is);
            zout.closeEntry();
        }
        // save params.js
        paramsJsonWriter.endArray();
        paramsJsonWriter.flush();
        paramsWriter.println(";");
        paramsWriter.flush();
        paramsJsonWriter.close();
        paramsWriter.close();
        zout.putNextEntry(new ZipEntry(this.prefix + "config.js"));
        IOUtils.copyTo(tmpFile2, zout);
        zout.closeEntry();
        tmpFile2.delete();
        // save index.html
        zout.putNextEntry(new ZipEntry(this.prefix + "index.html"));
        XMLOutputFactory xof = XMLOutputFactory.newFactory();
        XMLStreamWriter w = xof.createXMLStreamWriter(zout, "UTF-8");
        w.writeStartElement("html");
        w.writeStartElement("head");
        w.writeEmptyElement("meta");
        w.writeAttribute("http-equiv", "Content-Type");
        w.writeAttribute("content", "text/html; charset=utf-8");
        w.writeEmptyElement("meta");
        w.writeAttribute("http-equiv", "author");
        w.writeAttribute("content", "Pierre Lindenbaum Phd @yokofakun");
        w.writeStartElement("title");
        w.writeCharacters(getProgramName() + ":" + getVersion());
        // title
        w.writeEndElement();
        w.writeStartElement("style");
        w.writeAttribute("type", "text/css");
        w.writeCharacters("body	{ color:rgb(50,50,50); margin:20px; padding:20px; font: 12pt Arial, Helvetica, sans-serif; }\n");
        w.writeCharacters("label { text-align:right;	 }\n");
        w.writeCharacters("button	{ border: 1px solid; background-image:-moz-linear-gradient( top, gray, lightgray ); }\n");
        w.writeCharacters("canvas { image-rendering:auto;}\n");
        w.writeCharacters(".me 	{ padding-top:100px; font-size:80%; }\n");
        // style
        w.writeEndElement();
        for (final String src : new String[] { "samtools", "gbrowse", "hershey", "config", "com.github.lindenb.jvarkit.tools.misc.GBrowserHtml" }) {
            w.writeStartElement("script");
            w.writeAttribute("type", "text/javascript");
            w.writeAttribute("language", "text/javascript");
            w.writeAttribute("src", src + ".js");
            w.writeCharacters("");
            // script
            w.writeEndElement();
        }
        // head
        w.writeEndElement();
        w.writeStartElement("body");
        w.writeStartElement("div");
        w.writeStartElement("button");
        w.writeAttribute("onclick", "changemenu(-1)");
        w.writeCharacters("prev");
        // button
        w.writeEndElement();
        w.writeStartElement("select");
        w.writeAttribute("id", "menu");
        // menu
        w.writeEndElement();
        w.writeStartElement("button");
        w.writeAttribute("onclick", "changemenu(+1)");
        w.writeCharacters("next");
        // button
        w.writeEndElement();
        // div
        w.writeEndElement();
        w.writeStartElement("div");
        w.writeAttribute("id", "flags");
        // div
        w.writeEndElement();
        w.writeStartElement("div");
        w.writeAttribute("style", "text-align:center;");
        w.writeStartElement("div");
        w.writeAttribute("style", "font-size:200%;margin:10px;");
        w.writeAttribute("id", "browserTitle");
        // div
        w.writeEndElement();
        w.writeStartElement("div");
        w.writeEmptyElement("canvas");
        w.writeAttribute("id", "canvasdoc");
        w.writeAttribute("width", "100");
        w.writeAttribute("height", "100");
        // div
        w.writeEndElement();
        // div
        w.writeEndElement();
        w.writeEmptyElement("hr");
        w.writeStartElement("div");
        w.writeAttribute("class", "me");
        w.writeCharacters("Pierre Lindenbaum PhD. ");
        w.writeStartElement("a");
        w.writeAttribute("href", "https://github.com/lindenb/jvarkit");
        w.writeCharacters("https://github.com/lindenb/jvarkit");
        w.writeEndElement();
        w.writeCharacters(". Tested with Firefox 45.0");
        w.writeEndElement();
        // body
        w.writeEndElement();
        // html
        w.writeEndElement();
        w.flush();
        w.close();
        w = null;
        zout.closeEntry();
        zout.finish();
        zout.close();
        return RETURN_OK;
    } catch (Throwable err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(paramsJsonWriter);
        CloserUtil.close(paramsWriter);
        CloserUtil.close(zout);
        CloserUtil.close(bufReader);
        CloserUtil.close(samReader);
        CloserUtil.close(faidx);
    }
}
Also used : XMLOutputFactory(javax.xml.stream.XMLOutputFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Matcher(java.util.regex.Matcher) SamJsonWriterFactory(com.github.lindenb.jvarkit.util.samtools.SamJsonWriterFactory) FileWriter(java.io.FileWriter) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ZipEntry(java.util.zip.ZipEntry) ReferenceSequence(htsjdk.samtools.reference.ReferenceSequence) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) SamReader(htsjdk.samtools.SamReader) XMLStreamWriter(javax.xml.stream.XMLStreamWriter) PrintWriter(java.io.PrintWriter) Pattern(java.util.regex.Pattern) SAMFileWriter(htsjdk.samtools.SAMFileWriter) InputStream(java.io.InputStream) JsonWriter(com.google.gson.stream.JsonWriter) ZipOutputStream(java.util.zip.ZipOutputStream) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Interval(htsjdk.samtools.util.Interval)

Aggregations

IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)57 File (java.io.File)34 SamReader (htsjdk.samtools.SamReader)22 SAMRecord (htsjdk.samtools.SAMRecord)20 GenomicSequence (com.github.lindenb.jvarkit.util.picard.GenomicSequence)16 SAMFileHeader (htsjdk.samtools.SAMFileHeader)16 ArrayList (java.util.ArrayList)16 IOException (java.io.IOException)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)14 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)13 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)11 CigarElement (htsjdk.samtools.CigarElement)11 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)11 List (java.util.List)11 FileNotFoundException (java.io.FileNotFoundException)10 BufferedReader (java.io.BufferedReader)9 Collectors (java.util.stream.Collectors)9 Cigar (htsjdk.samtools.Cigar)8 CigarOperator (htsjdk.samtools.CigarOperator)7