use of java.util.function.IntUnaryOperator in project jvarkit by lindenb.
the class BamStats04 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.bedFile == null || !this.bedFile.exists()) {
LOG.error("undefined option -B (bed file)");
return -1;
}
if (args.isEmpty()) {
LOG.error("Bam files missing");
return -1;
}
if (this.minCoverages.isEmpty()) {
this.minCoverages.add(0);
}
final String NO_PARTITION = "N/A";
BufferedReader bedIn = null;
final List<SamReader> samReaders = new ArrayList<>(args.size());
PrintWriter pw = null;
ReferenceGenome referenceGenome = null;
ReferenceContig referenceContig = null;
try {
final BedLineCodec codec = new BedLineCodec();
final Set<String> all_partitions = new TreeSet<>();
bedIn = IOUtils.openFileForBufferedReading(this.bedFile);
SAMSequenceDictionary dict = null;
for (final String filename : IOUtils.unrollFiles(args)) {
LOG.info(filename);
final SamReader samReader = super.openSamReader(filename);
if (!samReader.hasIndex()) {
LOG.error(filename + " is not indexed");
samReader.close();
return -1;
}
final SAMFileHeader samFileheader = samReader.getFileHeader();
if (samFileheader == null) {
LOG.error("SAM file is missing a header " + filename);
return -1;
}
final List<SAMReadGroupRecord> readGroups = samFileheader.getReadGroups();
if (readGroups == null || readGroups.isEmpty()) {
LOG.warn("No Read group (RG) in the header of " + filename);
all_partitions.add(NO_PARTITION);
} else {
for (final SAMReadGroupRecord rg : readGroups) {
all_partitions.add(this.partition.apply(rg, NO_PARTITION));
}
}
final SAMSequenceDictionary d = samFileheader.getSequenceDictionary();
if (d == null) {
samReader.close();
LOG.error(JvarkitException.BamDictionaryMissing.getMessage(filename));
return -1;
}
samReaders.add(samReader);
if (dict == null) {
dict = d;
} else if (SequenceUtil.areSequenceDictionariesEqual(d, dict)) {
LOG.error(JvarkitException.DictionariesAreNotTheSame.getMessage(d, dict));
return -1;
}
}
if (samReaders.isEmpty()) {
LOG.error("No Bam defined");
return -1;
}
if (!StringUtil.isBlank(this.faidxUri)) {
referenceGenome = new ReferenceGenomeFactory().open(this.faidxUri);
}
pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
pw.print("#chrom\tstart\tend\tlength\t" + this.partition.name() + (referenceGenome == null ? "" : "\tgc_percent"));
pw.print("\tmincov\tmaxcov");
for (final int MIN_COVERAGE : this.minCoverages) {
pw.print("\tmeancov_" + MIN_COVERAGE + "\tmediancov_" + MIN_COVERAGE + "\tnocoveragebp_" + MIN_COVERAGE + "\tpercentcovered_" + MIN_COVERAGE);
}
pw.println();
String line = null;
while ((line = bedIn.readLine()) != null) {
if (line.isEmpty() || line.startsWith("#"))
continue;
final BedLine bedLine = codec.decode(line);
if (bedLine == null)
continue;
if (dict.getSequence(bedLine.getContig()) == null) {
LOG.error("Unknown contig in " + line);
return -1;
}
if (bedLine.getStart() > bedLine.getEnd()) {
LOG.info("ignoring " + bedLine);
continue;
}
if (referenceGenome != null && (referenceContig == null || !referenceContig.hasName(bedLine.getContig()))) {
referenceContig = referenceGenome.getContig(bedLine.getContig());
}
final Map<String, IntervalStat> sample2stats = new HashMap<>(all_partitions.size());
for (final String rgId : all_partitions) {
sample2stats.put(rgId, new IntervalStat(bedLine));
}
for (final SamReader samReader : samReaders) {
/**
* start - 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
* end - 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
*/
final SAMRecordIterator r = samReader.queryOverlapping(bedLine.getContig(), bedLine.getStart(), bedLine.getEnd());
while (r.hasNext()) {
final SAMRecord rec = r.next();
if (rec.getReadUnmappedFlag())
continue;
if (this.filter.filterOut(rec))
continue;
if (!rec.getReferenceName().equals(bedLine.getContig()))
continue;
final String partition;
final SAMReadGroupRecord group = rec.getReadGroup();
if (group == null) {
partition = NO_PARTITION;
} else {
final String name = this.partition.apply(group);
partition = (StringUtil.isBlank(name) ? NO_PARTITION : name);
}
IntervalStat stat = sample2stats.get(partition);
if (stat == null) {
stat = new IntervalStat(bedLine);
sample2stats.put(partition, stat);
}
stat.visit(rec);
}
r.close();
}
// end of loop over sam Readers
final OptionalInt gcPercentInt = (referenceContig == null ? OptionalInt.empty() : referenceContig.getGCPercent(bedLine.getStart() - 1, bedLine.getEnd()).getGCPercentAsInteger());
for (final String partitionName : sample2stats.keySet()) {
final IntervalStat stat = sample2stats.get(partitionName);
Arrays.sort(stat.counts);
pw.print(bedLine.getContig() + "\t" + (bedLine.getStart() - 1) + "\t" + (bedLine.getEnd()) + "\t" + stat.counts.length + "\t" + partitionName);
if (referenceGenome != null) {
pw.print("\t");
if (gcPercentInt.isPresent())
pw.print(gcPercentInt.getAsInt());
}
pw.print("\t" + stat.counts[0] + "\t" + stat.counts[stat.counts.length - 1]);
for (final int MIN_COVERAGE : this.minCoverages) {
/**
* map depth to 0 if depth <= MIN_COVERAGE
*/
final IntUnaryOperator depthAdjuster = (D) -> (D <= MIN_COVERAGE ? 0 : D);
final int count_no_coverage = (int) Arrays.stream(stat.counts).filter(D -> depthAdjuster.applyAsInt(D) <= 0).count();
final double mean = Percentile.average().evaluate(Arrays.stream(stat.counts).map(depthAdjuster));
final double median_depth = Percentile.median().evaluate(Arrays.stream(stat.counts).map(depthAdjuster));
pw.print("\t" + mean + "\t" + median_depth + "\t" + count_no_coverage + "\t" + (int) (((stat.counts.length - count_no_coverage) / (double) stat.counts.length) * 100.0));
}
pw.println();
}
}
pw.flush();
pw.close();
pw = null;
LOG.info("done");
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(referenceGenome);
CloserUtil.close(pw);
CloserUtil.close(bedIn);
CloserUtil.close(samReaders);
}
}
use of java.util.function.IntUnaryOperator in project jqa-core-framework by buschmais.
the class PluginIdGenerator method apply.
@Override
public JqassistantPlugin apply(JqassistantPlugin plugin) {
if (StringUtils.isBlank(plugin.getId())) {
String name = plugin.getName().toLowerCase();
IntUnaryOperator replacer = i -> (Character.isWhitespace(i)) ? UNDERSCORE : i;
StringBuilder generate = new StringBuilder();
for (int index = 0; index < name.length(); index++) {
int updated = replacer.applyAsInt(name.charAt(index));
int lastChar = getLastChar(generate);
if (!(updated == UNDERSCORE && lastChar == UNDERSCORE)) {
generate.appendCodePoint(updated);
}
}
plugin.setId(generate.toString());
LOGGER.debug("Assigned generated plugin id '{}' to plugin named '{}'", plugin.getName(), plugin.getId());
}
return plugin;
}
use of java.util.function.IntUnaryOperator in project cyclops by aol.
the class ShakespearePlaysScrabbleWithStreams method measureThroughput.
@SuppressWarnings("unused")
@Benchmark
@BenchmarkMode(Mode.SampleTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 5)
@Measurement(iterations = 5)
@Fork(1)
public List<Entry<Integer, List<String>>> measureThroughput() {
// Function to compute the score of a given word
IntUnaryOperator scoreOfALetter = letter -> letterScores[letter - 'a'];
// score of the same letters in a word
ToIntFunction<Entry<Integer, Long>> letterScore = entry -> letterScores[entry.getKey() - 'a'] * Integer.min(entry.getValue().intValue(), scrabbleAvailableLetters[entry.getKey() - 'a']);
// Histogram of the letters in a given word
Function<String, Map<Integer, Long>> histoOfLetters = word -> word.chars().boxed().collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
// number of blanks for a given letter
ToLongFunction<Entry<Integer, Long>> blank = entry -> Long.max(0L, entry.getValue() - scrabbleAvailableLetters[entry.getKey() - 'a']);
// number of blanks for a given word
Function<String, Long> nBlanks = word -> histoOfLetters.apply(word).entrySet().stream().mapToLong(blank).sum();
// can a word be written with 2 blanks?
Predicate<String> checkBlanks = word -> nBlanks.apply(word) <= 2;
// score taking blanks into account
Function<String, Integer> score2 = word -> histoOfLetters.apply(word).entrySet().stream().mapToInt(letterScore).sum();
// Placing the word on the board
// Building the streams of first and last letters
Function<String, IntStream> first3 = word -> word.chars().limit(3);
Function<String, IntStream> last3 = word -> word.chars().skip(Integer.max(0, word.length() - 4));
// Stream to be maxed
Function<String, IntStream> toBeMaxed = word -> Stream.of(first3.apply(word), last3.apply(word)).flatMapToInt(Function.identity());
// Bonus for double letter
ToIntFunction<String> bonusForDoubleLetter = word -> toBeMaxed.apply(word).map(scoreOfALetter).max().orElse(0);
// score of the word put on the board
Function<String, Integer> score3 = word -> (score2.apply(word) + bonusForDoubleLetter.applyAsInt(word)) + (score2.apply(word) + bonusForDoubleLetter.applyAsInt(word)) + (word.length() == 7 ? 50 : 0);
Function<Function<String, Integer>, Stream<Map<Integer, List<String>>>> buildHistoOnScore = score -> Stream.of(buildShakerspeareWordsStream().filter(scrabbleWords::contains).filter(// filter out the words that needs more than 2 blanks
checkBlanks).collect(Collectors.groupingBy(score, () -> new TreeMap<Integer, List<String>>(Comparator.reverseOrder()), Collectors.toList())));
// best key / value pairs
List<Entry<Integer, List<String>>> finalList = buildHistoOnScore.apply(score3).map(e -> e.entrySet().stream().limit(3).collect(Collectors.toList())).findAny().get();
return finalList;
}
Aggregations