Search in sources :

Example 1 with BedLine

use of com.github.lindenb.jvarkit.util.bio.bed.BedLine in project jvarkit by lindenb.

the class BamStats01 method run.

private void run(final String filename, SamReader samFileReader) throws IOException {
    Map<String, Histogram> sample2hist = new HashMap<String, BamStats01.Histogram>();
    SAMSequenceDictionary currDict = samFileReader.getFileHeader().getSequenceDictionary();
    if (samSequenceDictionary == null) {
        samSequenceDictionary = currDict;
    }
    if (this.bedFile != null) {
        if (!SequenceUtil.areSequenceDictionariesEqual(currDict, samSequenceDictionary)) {
            samFileReader.close();
            throw new IOException("incompatible sequence dictionaries." + filename);
        }
        if (intervalTreeMap == null) {
            final BedLineCodec bedCodec = new BedLineCodec();
            intervalTreeMap = new IntervalTreeMap<>();
            LOG.info("opening " + this.bedFile);
            String line;
            final BufferedReader bedIn = IOUtils.openFileForBufferedReading(bedFile);
            while ((line = bedIn.readLine()) != null) {
                final BedLine bedLine = bedCodec.decode(line);
                if (bedLine == null)
                    continue;
                int seqIndex = currDict.getSequenceIndex(bedLine.getContig());
                if (seqIndex == -1) {
                    throw new IOException("unknown chromosome from dict in  in " + line + " " + this.bedFile);
                }
                intervalTreeMap.put(bedLine.toInterval(), Boolean.TRUE);
            }
            bedIn.close();
            LOG.info("done reading " + this.bedFile);
        }
    }
    this.chrX_index = -1;
    this.chrY_index = -1;
    for (final SAMSequenceRecord rec : currDict.getSequences()) {
        final String chromName = rec.getSequenceName().toLowerCase();
        if (chromName.equals("x") || chromName.equals("chrx")) {
            this.chrX_index = rec.getSequenceIndex();
        } else if (chromName.equals("y") || chromName.equals("chry")) {
            this.chrY_index = rec.getSequenceIndex();
        }
    }
    final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(currDict);
    final SAMRecordIterator iter = samFileReader.iterator();
    while (iter.hasNext()) {
        final SAMRecord rec = progess.watch(iter.next());
        String sampleName = groupBy.getPartion(rec);
        if (sampleName == null || sampleName.isEmpty())
            sampleName = "undefined";
        Histogram hist = sample2hist.get(sampleName);
        if (hist == null) {
            hist = new Histogram();
            sample2hist.put(sampleName, hist);
        }
        hist.histograms[Category2.ALL.ordinal()].watch(rec);
        if (intervalTreeMap == null)
            continue;
        if (rec.getReadUnmappedFlag()) {
            continue;
        }
        if (!intervalTreeMap.containsOverlapping(new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd()))) {
            hist.histograms[Category2.OFF_TARGET.ordinal()].watch(rec);
        } else {
            hist.histograms[Category2.IN_TARGET.ordinal()].watch(rec);
        }
    }
    progess.finish();
    samFileReader.close();
    samFileReader = null;
    for (final String sampleName : sample2hist.keySet()) {
        final Histogram hist = sample2hist.get(sampleName);
        out.print(filename + "\t" + sampleName);
        for (final Category2 cat2 : Category2.values()) {
            for (// je je suis libertineuuh, je suis une cat1
            final Category cat1 : // je je suis libertineuuh, je suis une cat1
            Category.values()) {
                out.print("\t");
                out.print(hist.histograms[cat2.ordinal()].counts[cat1.ordinal()]);
            }
            if (intervalTreeMap == null)
                break;
        }
        out.println();
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) IOException(java.io.IOException) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) Interval(htsjdk.samtools.util.Interval)

Example 2 with BedLine

use of com.github.lindenb.jvarkit.util.bio.bed.BedLine in project jvarkit by lindenb.

the class Biostar178713 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.outputFile == null || !outputFile.getName().endsWith(".zip")) {
        LOG.error("output file option  must be declared and must en with .zip");
        return -1;
    }
    final Set<String> inputs = IOUtils.unrollFiles(args);
    List<BedLine> bedLines = new ArrayList<>();
    FileOutputStream fos = null;
    ZipOutputStream zout = null;
    try {
        if (inputs.isEmpty()) {
            LOG.info("reading bed from stdin");
            LineIterator r = IOUtils.openStreamForLineIterator(stdin());
            this.readBed(bedLines, r);
            CloserUtil.close(r);
        } else
            for (final String input : inputs) {
                LOG.info("reading bed from " + input);
                LineIterator r = IOUtils.openURIForLineIterator(input);
                this.readBed(bedLines, r);
                CloserUtil.close(r);
            }
        LOG.info("sorting " + bedLines.size());
        Collections.sort(bedLines, new Comparator<BedLine>() {

            @Override
            public int compare(BedLine o1, BedLine o2) {
                int i = o1.getContig().compareTo(o2.getContig());
                if (i != 0)
                    return i;
                i = o1.getStart() - o2.getStart();
                if (i != 0)
                    return i;
                i = o1.getEnd() - o2.getEnd();
                return i;
            }
        });
        if (bedLines.isEmpty()) {
            LOG.error("no bed line found");
            return -1;
        }
        LOG.info("creating zip " + this.outputFile);
        fos = new FileOutputStream(this.outputFile);
        zout = new ZipOutputStream(fos);
        int chunk = 0;
        while (!bedLines.isEmpty()) {
            ++chunk;
            final ZipEntry entry = new ZipEntry("bed" + String.format("%03d", chunk) + ".bed");
            LOG.info("creating " + entry.getName());
            zout.putNextEntry(entry);
            PrintWriter pw = new PrintWriter(zout);
            BedLine prev = bedLines.get(0);
            bedLines.remove(0);
            pw.println(prev.join());
            int n_in_entry = 1;
            int i = 0;
            while (i < bedLines.size()) {
                final BedLine curr = bedLines.get(i);
                final double distance = curr.getStart() - prev.getEnd();
                if (!prev.getContig().equals(curr.getContig()) || distance > this.distancebed) {
                    pw.println(curr.join());
                    prev = curr;
                    bedLines.remove(i);
                    ++n_in_entry;
                } else {
                    ++i;
                }
            }
            pw.flush();
            zout.closeEntry();
            LOG.info("closing " + entry.getName() + " N=" + n_in_entry);
        }
        zout.finish();
        zout.close();
        return RETURN_OK;
    } catch (Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(fos);
    }
}
Also used : ZipEntry(java.util.zip.ZipEntry) ArrayList(java.util.ArrayList) LineIterator(htsjdk.tribble.readers.LineIterator) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) ZipOutputStream(java.util.zip.ZipOutputStream) FileOutputStream(java.io.FileOutputStream) PrintWriter(java.io.PrintWriter)

Example 3 with BedLine

use of com.github.lindenb.jvarkit.util.bio.bed.BedLine in project jvarkit by lindenb.

the class KnimeVariantHelper method parseBedAsBooleanIntervalTreeMap.

public IntervalTreeMap<Boolean> parseBedAsBooleanIntervalTreeMap(final String bedUri) throws IOException {
    if (bedUri == null || bedUri.isEmpty())
        throw new IllegalArgumentException("bad bed uri");
    BufferedReader r = null;
    final IntervalTreeMap<Boolean> treeMap = new IntervalTreeMap<>();
    try {
        r = IOUtils.openURIForBufferedReading(bedUri);
        final BedLineCodec codec = new BedLineCodec();
        r.lines().forEach(L -> {
            final BedLine bedline = codec.decode(L);
            if (bedline == null) {
                LOG.warn("Ignoring line in BED (doesn't look like a BED line):" + L);
                return;
            }
            treeMap.put(bedline.toInterval(), Boolean.TRUE);
        });
        return treeMap;
    } finally {
        CloserUtil.close(r);
    }
}
Also used : BufferedReader(java.io.BufferedReader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine)

Example 4 with BedLine

use of com.github.lindenb.jvarkit.util.bio.bed.BedLine in project jvarkit by lindenb.

the class Biostar77828 method doWork.

@Override
public int doWork(List<String> args) {
    PrintStream pw = null;
    try {
        LOG.info("load BED");
        BufferedReader in = super.openBufferedReader(oneFileOrNull(args));
        final BedLineCodec codec = new BedLineCodec();
        String line;
        while ((line = in.readLine()) != null) {
            if (line.isEmpty() || line.startsWith("#"))
                continue;
            final BedLine bedLine = codec.decode(line);
            if (bedLine == null)
                continue;
            if (bedLine.getColumnCount() < 3)
                throw new IOException("bad BED input " + bedLine);
            final Segment seg = new Segment(bedLine.getContig(), bedLine.getStart() - 1, bedLine.getEnd());
            if (seg.size() == 0)
                continue;
            this.effective_genome_size += seg.size();
            this.all_segments.add(seg);
        }
        pw = super.openFileOrStdoutAsPrintStream(this.outputFile);
        Solution best = null;
        for (long generation = 0; generation < this.N_ITERATIONS; ++generation) {
            if (generation % 100000 == 0)
                LOG.info("generation:" + generation + "/" + this.N_ITERATIONS + " " + (int) ((generation / (double) this.N_ITERATIONS) * 100.0) + "% " + String.valueOf(best));
            Solution sol = createSolution();
            sol.generation = generation;
            if (best == null || sol.compareTo(best) < 0) {
                best = sol;
            /*
	    			if(LOG.isDebugEnabled())
	    				{
	    				LOG.info("%%generation:"+generation);
	    				best.print(stderr());
	    				}*/
            }
        }
        if (best != null) {
            best.print(pw);
        }
        pw.flush();
        pw.close();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(pw);
    }
}
Also used : PrintStream(java.io.PrintStream) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) IOException(java.io.IOException) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine)

Example 5 with BedLine

use of com.github.lindenb.jvarkit.util.bio.bed.BedLine in project jvarkit by lindenb.

the class FindAllCoverageAtPosition method doWork.

@Override
public int doWork(final List<String> args) {
    final Set<Mutation> mutations = new TreeSet<>();
    BufferedReader r = null;
    try {
        if (this.referenceFileFile != null) {
            this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(this.referenceFileFile);
        }
        mutations.addAll(Arrays.asList(this.positionStr.split("[  ]")).stream().filter(S -> !S.trim().isEmpty()).map(S -> new Mutation(S)).collect(Collectors.toSet()));
        for (final String s : positionStr.split("[  ]")) {
            if (s.trim().isEmpty())
                continue;
            mutations.add(new Mutation(s));
        }
        if (this.positionFile != null) {
            String line;
            r = IOUtils.openFileForBufferedReading(positionFile);
            if (positionFile.getName().endsWith(".bed")) {
                final BedLineCodec codec = new BedLineCodec();
                while ((line = r.readLine()) != null) {
                    final BedLine bedLine = codec.decode(line);
                    if (bedLine == null)
                        continue;
                    for (int x = bedLine.getStart(); x <= bedLine.getEnd(); ++x) {
                        mutations.add(new Mutation(bedLine.getContig(), x));
                    }
                }
            } else {
                while ((line = r.readLine()) != null) {
                    if (line.trim().isEmpty() || line.startsWith("#"))
                        continue;
                    mutations.add(new Mutation(line));
                }
            }
            r.close();
            r = null;
        }
        if (mutations.isEmpty()) {
            LOG.fatal("undefined position \'str\'");
            return -1;
        }
        LOG.info("number of mutations " + mutations.size());
        this.out = this.openFileOrStdoutAsPrintWriter(this.outputFile);
        out.print("#File");
        out.print('\t');
        out.print("CHROM");
        out.print('\t');
        out.print("POS");
        if (this.indexedFastaSequenceFile != null) {
            out.print('\t');
            out.print("REF");
        }
        out.print('\t');
        out.print(this.groupBy.name().toUpperCase());
        out.print('\t');
        out.print("DEPTH");
        for (final CigarOperator op : CigarOperator.values()) {
            out.print('\t');
            out.print(op.name());
        }
        for (char c : BASES_To_PRINT) {
            out.print('\t');
            out.print("Base(" + c + ")");
        }
        out.println();
        if (args.isEmpty()) {
            LOG.info("Reading from stdin");
            r = new BufferedReader(new InputStreamReader(stdin()));
            scan(r, mutations);
            r.close();
            r = null;
        } else {
            for (final String filename : args) {
                LOG.info("Reading from " + filename);
                r = IOUtils.openURIForBufferedReading(filename);
                scan(r, mutations);
                r.close();
                r = null;
            }
        }
        this.out.flush();
        return 0;
    } catch (Exception err) {
        LOG.severe(err);
        return -1;
    } finally {
        CloserUtil.close(this.indexedFastaSequenceFile);
        CloserUtil.close(this.out);
        CloserUtil.close(r);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) Arrays(java.util.Arrays) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) CigarElement(htsjdk.samtools.CigarElement) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) CigarOperator(htsjdk.samtools.CigarOperator) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ValidationStringency(htsjdk.samtools.ValidationStringency) TreeSet(java.util.TreeSet) StringUtil(htsjdk.samtools.util.StringUtil) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) Counter(com.github.lindenb.jvarkit.util.Counter) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) SamReader(htsjdk.samtools.SamReader) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamRecordJEXLFilter(com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) TreeMap(java.util.TreeMap) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BufferedReader(java.io.BufferedReader) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SamReaderFactory(htsjdk.samtools.SamReaderFactory) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) InputStreamReader(java.io.InputStreamReader) CigarOperator(htsjdk.samtools.CigarOperator) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) TreeSet(java.util.TreeSet) BufferedReader(java.io.BufferedReader)

Aggregations

BedLine (com.github.lindenb.jvarkit.util.bio.bed.BedLine)19 BedLineCodec (com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec)16 BufferedReader (java.io.BufferedReader)12 Interval (htsjdk.samtools.util.Interval)9 IOException (java.io.IOException)8 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)7 SamReader (htsjdk.samtools.SamReader)7 File (java.io.File)7 ArrayList (java.util.ArrayList)7 SAMFileHeader (htsjdk.samtools.SAMFileHeader)6 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)5 SAMRecord (htsjdk.samtools.SAMRecord)5 Parameter (com.beust.jcommander.Parameter)4 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)4 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)4 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)4 Program (com.github.lindenb.jvarkit.util.jcommander.Program)4 Logger (com.github.lindenb.jvarkit.util.log.Logger)4 Cigar (htsjdk.samtools.Cigar)4 CigarElement (htsjdk.samtools.CigarElement)4