Search in sources :

Example 11 with IntUnaryOperator

use of java.util.function.IntUnaryOperator in project jvarkit by lindenb.

the class BamStats04 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.bedFile == null || !this.bedFile.exists()) {
        LOG.error("undefined option -B (bed file)");
        return -1;
    }
    if (args.isEmpty()) {
        LOG.error("Bam files missing");
        return -1;
    }
    if (this.minCoverages.isEmpty()) {
        this.minCoverages.add(0);
    }
    final String NO_PARTITION = "N/A";
    BufferedReader bedIn = null;
    final List<SamReader> samReaders = new ArrayList<>(args.size());
    PrintWriter pw = null;
    ReferenceGenome referenceGenome = null;
    ReferenceContig referenceContig = null;
    try {
        final BedLineCodec codec = new BedLineCodec();
        final Set<String> all_partitions = new TreeSet<>();
        bedIn = IOUtils.openFileForBufferedReading(this.bedFile);
        SAMSequenceDictionary dict = null;
        for (final String filename : IOUtils.unrollFiles(args)) {
            LOG.info(filename);
            final SamReader samReader = super.openSamReader(filename);
            if (!samReader.hasIndex()) {
                LOG.error(filename + " is not indexed");
                samReader.close();
                return -1;
            }
            final SAMFileHeader samFileheader = samReader.getFileHeader();
            if (samFileheader == null) {
                LOG.error("SAM file is missing a header " + filename);
                return -1;
            }
            final List<SAMReadGroupRecord> readGroups = samFileheader.getReadGroups();
            if (readGroups == null || readGroups.isEmpty()) {
                LOG.warn("No Read group (RG) in the header of " + filename);
                all_partitions.add(NO_PARTITION);
            } else {
                for (final SAMReadGroupRecord rg : readGroups) {
                    all_partitions.add(this.partition.apply(rg, NO_PARTITION));
                }
            }
            final SAMSequenceDictionary d = samFileheader.getSequenceDictionary();
            if (d == null) {
                samReader.close();
                LOG.error(JvarkitException.BamDictionaryMissing.getMessage(filename));
                return -1;
            }
            samReaders.add(samReader);
            if (dict == null) {
                dict = d;
            } else if (SequenceUtil.areSequenceDictionariesEqual(d, dict)) {
                LOG.error(JvarkitException.DictionariesAreNotTheSame.getMessage(d, dict));
                return -1;
            }
        }
        if (samReaders.isEmpty()) {
            LOG.error("No Bam defined");
            return -1;
        }
        if (!StringUtil.isBlank(this.faidxUri)) {
            referenceGenome = new ReferenceGenomeFactory().open(this.faidxUri);
        }
        pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        pw.print("#chrom\tstart\tend\tlength\t" + this.partition.name() + (referenceGenome == null ? "" : "\tgc_percent"));
        pw.print("\tmincov\tmaxcov");
        for (final int MIN_COVERAGE : this.minCoverages) {
            pw.print("\tmeancov_" + MIN_COVERAGE + "\tmediancov_" + MIN_COVERAGE + "\tnocoveragebp_" + MIN_COVERAGE + "\tpercentcovered_" + MIN_COVERAGE);
        }
        pw.println();
        String line = null;
        while ((line = bedIn.readLine()) != null) {
            if (line.isEmpty() || line.startsWith("#"))
                continue;
            final BedLine bedLine = codec.decode(line);
            if (bedLine == null)
                continue;
            if (dict.getSequence(bedLine.getContig()) == null) {
                LOG.error("Unknown contig in " + line);
                return -1;
            }
            if (bedLine.getStart() > bedLine.getEnd()) {
                LOG.info("ignoring " + bedLine);
                continue;
            }
            if (referenceGenome != null && (referenceContig == null || !referenceContig.hasName(bedLine.getContig()))) {
                referenceContig = referenceGenome.getContig(bedLine.getContig());
            }
            final Map<String, IntervalStat> sample2stats = new HashMap<>(all_partitions.size());
            for (final String rgId : all_partitions) {
                sample2stats.put(rgId, new IntervalStat(bedLine));
            }
            for (final SamReader samReader : samReaders) {
                /**
                 *     start - 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
                 *	   end - 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
                 */
                final SAMRecordIterator r = samReader.queryOverlapping(bedLine.getContig(), bedLine.getStart(), bedLine.getEnd());
                while (r.hasNext()) {
                    final SAMRecord rec = r.next();
                    if (rec.getReadUnmappedFlag())
                        continue;
                    if (this.filter.filterOut(rec))
                        continue;
                    if (!rec.getReferenceName().equals(bedLine.getContig()))
                        continue;
                    final String partition;
                    final SAMReadGroupRecord group = rec.getReadGroup();
                    if (group == null) {
                        partition = NO_PARTITION;
                    } else {
                        final String name = this.partition.apply(group);
                        partition = (StringUtil.isBlank(name) ? NO_PARTITION : name);
                    }
                    IntervalStat stat = sample2stats.get(partition);
                    if (stat == null) {
                        stat = new IntervalStat(bedLine);
                        sample2stats.put(partition, stat);
                    }
                    stat.visit(rec);
                }
                r.close();
            }
            // end of loop over sam Readers
            final OptionalInt gcPercentInt = (referenceContig == null ? OptionalInt.empty() : referenceContig.getGCPercent(bedLine.getStart() - 1, bedLine.getEnd()).getGCPercentAsInteger());
            for (final String partitionName : sample2stats.keySet()) {
                final IntervalStat stat = sample2stats.get(partitionName);
                Arrays.sort(stat.counts);
                pw.print(bedLine.getContig() + "\t" + (bedLine.getStart() - 1) + "\t" + (bedLine.getEnd()) + "\t" + stat.counts.length + "\t" + partitionName);
                if (referenceGenome != null) {
                    pw.print("\t");
                    if (gcPercentInt.isPresent())
                        pw.print(gcPercentInt.getAsInt());
                }
                pw.print("\t" + stat.counts[0] + "\t" + stat.counts[stat.counts.length - 1]);
                for (final int MIN_COVERAGE : this.minCoverages) {
                    /**
                     * map depth to 0 if depth <= MIN_COVERAGE
                     */
                    final IntUnaryOperator depthAdjuster = (D) -> (D <= MIN_COVERAGE ? 0 : D);
                    final int count_no_coverage = (int) Arrays.stream(stat.counts).filter(D -> depthAdjuster.applyAsInt(D) <= 0).count();
                    final double mean = Percentile.average().evaluate(Arrays.stream(stat.counts).map(depthAdjuster));
                    final double median_depth = Percentile.median().evaluate(Arrays.stream(stat.counts).map(depthAdjuster));
                    pw.print("\t" + mean + "\t" + median_depth + "\t" + count_no_coverage + "\t" + (int) (((stat.counts.length - count_no_coverage) / (double) stat.counts.length) * 100.0));
                }
                pw.println();
            }
        }
        pw.flush();
        pw.close();
        pw = null;
        LOG.info("done");
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(referenceGenome);
        CloserUtil.close(pw);
        CloserUtil.close(bedIn);
        CloserUtil.close(samReaders);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) Arrays(java.util.Arrays) SequenceUtil(htsjdk.samtools.util.SequenceUtil) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IntUnaryOperator(java.util.function.IntUnaryOperator) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) CigarElement(htsjdk.samtools.CigarElement) CigarOperator(htsjdk.samtools.CigarOperator) HashMap(java.util.HashMap) SAMRecordPartition(com.github.lindenb.jvarkit.util.samtools.SAMRecordPartition) OptionalInt(java.util.OptionalInt) SAMFileHeader(htsjdk.samtools.SAMFileHeader) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) StringUtil(htsjdk.samtools.util.StringUtil) ReferenceGenomeFactory(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenomeFactory) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) ReferenceContig(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceContig) SamReader(htsjdk.samtools.SamReader) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Percentile(com.github.lindenb.jvarkit.math.stats.Percentile) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamRecordJEXLFilter(com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) BufferedReader(java.io.BufferedReader) ReferenceGenome(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenome) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) ReferenceContig(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceContig) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) IntUnaryOperator(java.util.function.IntUnaryOperator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) TreeSet(java.util.TreeSet) PrintWriter(java.io.PrintWriter) ReferenceGenome(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenome) ReferenceGenomeFactory(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenomeFactory) OptionalInt(java.util.OptionalInt) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 12 with IntUnaryOperator

use of java.util.function.IntUnaryOperator in project jqa-core-framework by buschmais.

the class PluginIdGenerator method apply.

@Override
public JqassistantPlugin apply(JqassistantPlugin plugin) {
    if (StringUtils.isBlank(plugin.getId())) {
        String name = plugin.getName().toLowerCase();
        IntUnaryOperator replacer = i -> (Character.isWhitespace(i)) ? UNDERSCORE : i;
        StringBuilder generate = new StringBuilder();
        for (int index = 0; index < name.length(); index++) {
            int updated = replacer.applyAsInt(name.charAt(index));
            int lastChar = getLastChar(generate);
            if (!(updated == UNDERSCORE && lastChar == UNDERSCORE)) {
                generate.appendCodePoint(updated);
            }
        }
        plugin.setId(generate.toString());
        LOGGER.debug("Assigned generated plugin id '{}' to plugin named '{}'", plugin.getName(), plugin.getId());
    }
    return plugin;
}
Also used : JqassistantPlugin(org.jqassistant.schema.plugin.v1.JqassistantPlugin) Logger(org.slf4j.Logger) IntUnaryOperator(java.util.function.IntUnaryOperator) LoggerFactory(org.slf4j.LoggerFactory) Function(java.util.function.Function) StringUtils(org.apache.commons.lang3.StringUtils) IntUnaryOperator(java.util.function.IntUnaryOperator)

Example 13 with IntUnaryOperator

use of java.util.function.IntUnaryOperator in project cyclops by aol.

the class ShakespearePlaysScrabbleWithStreams method measureThroughput.

@SuppressWarnings("unused")
@Benchmark
@BenchmarkMode(Mode.SampleTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 5)
@Measurement(iterations = 5)
@Fork(1)
public List<Entry<Integer, List<String>>> measureThroughput() {
    // Function to compute the score of a given word
    IntUnaryOperator scoreOfALetter = letter -> letterScores[letter - 'a'];
    // score of the same letters in a word
    ToIntFunction<Entry<Integer, Long>> letterScore = entry -> letterScores[entry.getKey() - 'a'] * Integer.min(entry.getValue().intValue(), scrabbleAvailableLetters[entry.getKey() - 'a']);
    // Histogram of the letters in a given word
    Function<String, Map<Integer, Long>> histoOfLetters = word -> word.chars().boxed().collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
    // number of blanks for a given letter
    ToLongFunction<Entry<Integer, Long>> blank = entry -> Long.max(0L, entry.getValue() - scrabbleAvailableLetters[entry.getKey() - 'a']);
    // number of blanks for a given word
    Function<String, Long> nBlanks = word -> histoOfLetters.apply(word).entrySet().stream().mapToLong(blank).sum();
    // can a word be written with 2 blanks?
    Predicate<String> checkBlanks = word -> nBlanks.apply(word) <= 2;
    // score taking blanks into account
    Function<String, Integer> score2 = word -> histoOfLetters.apply(word).entrySet().stream().mapToInt(letterScore).sum();
    // Placing the word on the board
    // Building the streams of first and last letters
    Function<String, IntStream> first3 = word -> word.chars().limit(3);
    Function<String, IntStream> last3 = word -> word.chars().skip(Integer.max(0, word.length() - 4));
    // Stream to be maxed
    Function<String, IntStream> toBeMaxed = word -> Stream.of(first3.apply(word), last3.apply(word)).flatMapToInt(Function.identity());
    // Bonus for double letter
    ToIntFunction<String> bonusForDoubleLetter = word -> toBeMaxed.apply(word).map(scoreOfALetter).max().orElse(0);
    // score of the word put on the board
    Function<String, Integer> score3 = word -> (score2.apply(word) + bonusForDoubleLetter.applyAsInt(word)) + (score2.apply(word) + bonusForDoubleLetter.applyAsInt(word)) + (word.length() == 7 ? 50 : 0);
    Function<Function<String, Integer>, Stream<Map<Integer, List<String>>>> buildHistoOnScore = score -> Stream.of(buildShakerspeareWordsStream().filter(scrabbleWords::contains).filter(// filter out the words that needs more than 2 blanks
    checkBlanks).collect(Collectors.groupingBy(score, () -> new TreeMap<Integer, List<String>>(Comparator.reverseOrder()), Collectors.toList())));
    // best key / value pairs
    List<Entry<Integer, List<String>>> finalList = buildHistoOnScore.apply(score3).map(e -> e.entrySet().stream().limit(3).collect(Collectors.toList())).findAny().get();
    return finalList;
}
Also used : IntStream(java.util.stream.IntStream) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Measurement(org.openjdk.jmh.annotations.Measurement) IntUnaryOperator(java.util.function.IntUnaryOperator) Mode(org.openjdk.jmh.annotations.Mode) Predicate(java.util.function.Predicate) ToIntFunction(java.util.function.ToIntFunction) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) Warmup(org.openjdk.jmh.annotations.Warmup) Benchmark(org.openjdk.jmh.annotations.Benchmark) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Stream(java.util.stream.Stream) TreeMap(java.util.TreeMap) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit) Map(java.util.Map) Entry(java.util.Map.Entry) Fork(org.openjdk.jmh.annotations.Fork) Comparator(java.util.Comparator) ToLongFunction(java.util.function.ToLongFunction) IntUnaryOperator(java.util.function.IntUnaryOperator) ToIntFunction(java.util.function.ToIntFunction) Function(java.util.function.Function) ToLongFunction(java.util.function.ToLongFunction) Entry(java.util.Map.Entry) IntStream(java.util.stream.IntStream) Stream(java.util.stream.Stream) List(java.util.List) TreeMap(java.util.TreeMap) Map(java.util.Map) IntStream(java.util.stream.IntStream) Measurement(org.openjdk.jmh.annotations.Measurement) Warmup(org.openjdk.jmh.annotations.Warmup) Fork(org.openjdk.jmh.annotations.Fork) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Aggregations

IntUnaryOperator (java.util.function.IntUnaryOperator)13 List (java.util.List)6 Map (java.util.Map)4 Collectors (java.util.stream.Collectors)4 IntStream (java.util.stream.IntStream)4 TestCase (junit.framework.TestCase)4 ArrayList (java.util.ArrayList)3 Comparator (java.util.Comparator)3 TimeUnit (java.util.concurrent.TimeUnit)3 Function (java.util.function.Function)3 Stream (java.util.stream.Stream)3 File (java.io.File)2 Arrays (java.util.Arrays)2 HashMap (java.util.HashMap)2 Set (java.util.Set)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 Logger (org.slf4j.Logger)2 LoggerFactory (org.slf4j.LoggerFactory)2 Parameter (com.beust.jcommander.Parameter)1 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)1