use of java.util.stream.IntStream in project pyramid by cheng-li.
the class RegTreeTrainer method splitNode.
/**
* xgboost monotonicity
* split a splitable node
* @param leafToSplit
* @param regTreeConfig
* @param dataSet
*/
private static void splitNode(RegressionTree tree, Node leafToSplit, RegTreeConfig regTreeConfig, DataSet dataSet, double[] labels, int[] monotonicity, LeafOutputCalculator leafOutputCalculator) {
int numDataPoints = dataSet.getNumDataPoints();
/**
* split this leaf node
*/
int featureIndex = leafToSplit.getFeatureIndex();
double threshold = leafToSplit.getThreshold();
Vector inputVector = dataSet.getColumn(featureIndex);
Vector columnVector;
if (inputVector.isDense()) {
columnVector = inputVector;
} else {
columnVector = new DenseVector(inputVector);
}
/**
* create children
*/
Node leftChild = new Node();
leftChild.setId(tree.numNodes);
tree.numNodes += 1;
Node rightChild = new Node();
rightChild.setId(tree.numNodes);
tree.numNodes += 1;
double[] parentProbs = leafToSplit.getProbs();
double[] leftProbs = new double[numDataPoints];
double[] rightProbs = new double[numDataPoints];
IntStream intStream = IntStream.range(0, numDataPoints);
if (regTreeConfig.isParallel()) {
intStream = intStream.parallel();
}
intStream.forEach(i -> {
double featureValue = columnVector.get(i);
if (Double.isNaN(featureValue)) {
// go to both branches probabilistically
leftProbs[i] = parentProbs[i] * leafToSplit.getLeftProb();
rightProbs[i] = parentProbs[i] * leafToSplit.getRightProb();
} else {
// <= go left, > go right
if (featureValue <= threshold) {
leftProbs[i] = parentProbs[i];
rightProbs[i] = 0;
} else {
leftProbs[i] = 0;
rightProbs[i] = parentProbs[i];
}
}
});
leftChild.setProbs(leftProbs);
rightChild.setProbs(rightProbs);
// the last two leaves need not to be updated completely
// as we don't need to split them later
int maxNumLeaves = regTreeConfig.getMaxNumLeaves();
if (tree.leaves.size() != maxNumLeaves - 1) {
updateNode(leftChild, regTreeConfig, dataSet, labels, monotonicity);
updateNode(rightChild, regTreeConfig, dataSet, labels, monotonicity);
}
/**
* link left and right child to the parent
*/
leafToSplit.setLeftChild(leftChild);
leafToSplit.setRightChild(rightChild);
/**
* update leaves, remove the parent, and add children
*/
leafToSplit.setLeaf(false);
leafToSplit.clearProbs();
tree.leaves.remove(leafToSplit);
leftChild.setLeaf(true);
rightChild.setLeaf(true);
tree.leaves.add(leftChild);
tree.leaves.add(rightChild);
tree.allNodes.add(leftChild);
tree.allNodes.add(rightChild);
int mono = monotonicity[featureIndex];
leafOutputCalculator.setParallel(regTreeConfig.isParallel());
setLeafOutput(leftChild, leafOutputCalculator, labels);
setLeafOutput(rightChild, leafOutputCalculator, labels);
setBoundForChildren(leafToSplit, mono);
}
use of java.util.stream.IntStream in project druid by druid-io.
the class MaxSizeSplitHintSpecTest method testSplitSkippingEmptyInputs.
@Test
public void testSplitSkippingEmptyInputs() {
final int nonEmptyInputSize = 3;
final MaxSizeSplitHintSpec splitHintSpec = new MaxSizeSplitHintSpec(new HumanReadableBytes(10L), null);
final Function<Integer, InputFileAttribute> inputAttributeExtractor = InputFileAttribute::new;
final IntStream dataStream = IntStream.concat(IntStream.concat(IntStream.generate(() -> 0).limit(10), IntStream.generate(() -> nonEmptyInputSize).limit(10)), IntStream.generate(() -> 0).limit(10));
final List<List<Integer>> splits = Lists.newArrayList(splitHintSpec.split(dataStream.iterator(), inputAttributeExtractor));
Assert.assertEquals(4, splits.size());
Assert.assertEquals(3, splits.get(0).size());
Assert.assertEquals(3, splits.get(1).size());
Assert.assertEquals(3, splits.get(2).size());
Assert.assertEquals(1, splits.get(3).size());
}
use of java.util.stream.IntStream in project neo4j by neo4j.
the class GraphDatabaseSettingsTest method transactionSamplingCanBePercentageValues.
@Test
void transactionSamplingCanBePercentageValues() {
IntStream range = IntStream.range(1, 101);
range.forEach(percentage -> {
Config config = Config.defaults(transaction_sampling_percentage, percentage);
int configuredSampling = config.get(transaction_sampling_percentage);
assertEquals(percentage, configuredSampling);
});
assertThrows(IllegalArgumentException.class, () -> Config.defaults(transaction_sampling_percentage, 0));
assertThrows(IllegalArgumentException.class, () -> Config.defaults(transaction_sampling_percentage, 101));
assertThrows(IllegalArgumentException.class, () -> Config.defaults(transaction_sampling_percentage, 10101));
}
use of java.util.stream.IntStream in project cyclops by aol.
the class ShakespearePlaysScrabbleWithStreams method measureThroughput.
@SuppressWarnings("unused")
@Benchmark
@BenchmarkMode(Mode.SampleTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 5)
@Measurement(iterations = 5)
@Fork(1)
public List<Entry<Integer, List<String>>> measureThroughput() {
// Function to compute the score of a given word
IntUnaryOperator scoreOfALetter = letter -> letterScores[letter - 'a'];
// score of the same letters in a word
ToIntFunction<Entry<Integer, Long>> letterScore = entry -> letterScores[entry.getKey() - 'a'] * Integer.min(entry.getValue().intValue(), scrabbleAvailableLetters[entry.getKey() - 'a']);
// Histogram of the letters in a given word
Function<String, Map<Integer, Long>> histoOfLetters = word -> word.chars().boxed().collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
// number of blanks for a given letter
ToLongFunction<Entry<Integer, Long>> blank = entry -> Long.max(0L, entry.getValue() - scrabbleAvailableLetters[entry.getKey() - 'a']);
// number of blanks for a given word
Function<String, Long> nBlanks = word -> histoOfLetters.apply(word).entrySet().stream().mapToLong(blank).sum();
// can a word be written with 2 blanks?
Predicate<String> checkBlanks = word -> nBlanks.apply(word) <= 2;
// score taking blanks into account
Function<String, Integer> score2 = word -> histoOfLetters.apply(word).entrySet().stream().mapToInt(letterScore).sum();
// Placing the word on the board
// Building the streams of first and last letters
Function<String, IntStream> first3 = word -> word.chars().limit(3);
Function<String, IntStream> last3 = word -> word.chars().skip(Integer.max(0, word.length() - 4));
// Stream to be maxed
Function<String, IntStream> toBeMaxed = word -> Stream.of(first3.apply(word), last3.apply(word)).flatMapToInt(Function.identity());
// Bonus for double letter
ToIntFunction<String> bonusForDoubleLetter = word -> toBeMaxed.apply(word).map(scoreOfALetter).max().orElse(0);
// score of the word put on the board
Function<String, Integer> score3 = word -> (score2.apply(word) + bonusForDoubleLetter.applyAsInt(word)) + (score2.apply(word) + bonusForDoubleLetter.applyAsInt(word)) + (word.length() == 7 ? 50 : 0);
Function<Function<String, Integer>, Stream<Map<Integer, List<String>>>> buildHistoOnScore = score -> Stream.of(buildShakerspeareWordsStream().filter(scrabbleWords::contains).filter(// filter out the words that needs more than 2 blanks
checkBlanks).collect(Collectors.groupingBy(score, () -> new TreeMap<Integer, List<String>>(Comparator.reverseOrder()), Collectors.toList())));
// best key / value pairs
List<Entry<Integer, List<String>>> finalList = buildHistoOnScore.apply(score3).map(e -> e.entrySet().stream().limit(3).collect(Collectors.toList())).findAny().get();
return finalList;
}
use of java.util.stream.IntStream in project mzmine2 by mzmine.
the class PeakFinderTask method run.
public void run() {
setStatus(TaskStatus.PROCESSING);
logger.info("Running gap filler on " + peakList);
// Calculate total number of scans in all files
for (RawDataFile dataFile : peakList.getRawDataFiles()) {
totalScans += dataFile.getNumOfScans(1);
}
processedScans = new AtomicInteger();
// Create new feature list
processedPeakList = new SimplePeakList(peakList + " " + suffix, peakList.getRawDataFiles());
// Fill new feature list with empty rows
for (int row = 0; row < peakList.getNumberOfRows(); row++) {
PeakListRow sourceRow = peakList.getRow(row);
PeakListRow newRow = new SimplePeakListRow(sourceRow.getID());
newRow.setComment(sourceRow.getComment());
for (PeakIdentity ident : sourceRow.getPeakIdentities()) {
newRow.addPeakIdentity(ident, false);
}
if (sourceRow.getPreferredPeakIdentity() != null) {
newRow.setPreferredPeakIdentity(sourceRow.getPreferredPeakIdentity());
}
processedPeakList.addRow(newRow);
}
if (rtCorrection) {
totalScans *= 2;
// Fill the gaps of a random sample using all the other samples and
// take it as master list
// to fill the gaps of the other samples
masterSample = (int) Math.floor(Math.random() * peakList.getNumberOfRawDataFiles());
fillList(MASTERLIST);
// Process all raw data files
fillList(!MASTERLIST);
} else {
// Process all raw data files
IntStream rawStream = IntStream.range(0, peakList.getNumberOfRawDataFiles());
if (useParallelStream)
rawStream = rawStream.parallel();
rawStream.forEach(i -> {
// Canceled?
if (isCanceled()) {
// inside stream - only skips this element
return;
}
RawDataFile dataFile = peakList.getRawDataFile(i);
List<Gap> gaps = new ArrayList<Gap>();
// if necessary
for (int row = 0; row < peakList.getNumberOfRows(); row++) {
// Canceled?
if (isCanceled()) {
// inside stream - only skips this element
return;
}
PeakListRow sourceRow = peakList.getRow(row);
PeakListRow newRow = processedPeakList.getRow(row);
Feature sourcePeak = sourceRow.getPeak(dataFile);
if (sourcePeak == null) {
// Create a new gap
Range<Double> mzRange = mzTolerance.getToleranceRange(sourceRow.getAverageMZ());
Range<Double> rtRange = rtTolerance.getToleranceRange(sourceRow.getAverageRT());
Gap newGap = new Gap(newRow, dataFile, mzRange, rtRange, intTolerance);
gaps.add(newGap);
} else {
newRow.addPeak(dataFile, sourcePeak);
}
}
// Stop processing this file if there are no gaps
if (gaps.size() == 0) {
processedScans.addAndGet(dataFile.getNumOfScans());
return;
}
// Get all scans of this data file
int[] scanNumbers = dataFile.getScanNumbers(1);
// Process each scan
for (int scanNumber : scanNumbers) {
// Canceled?
if (isCanceled()) {
// inside stream - only skips this element
return;
}
// Get the scan
Scan scan = dataFile.getScan(scanNumber);
// Feed this scan to all gaps
for (Gap gap : gaps) {
gap.offerNextScan(scan);
}
processedScans.incrementAndGet();
}
// Finalize gaps
for (Gap gap : gaps) {
gap.noMoreOffers();
}
});
}
// terminate - stream only skips all elements
if (isCanceled())
return;
// Append processed feature list to the project
project.addPeakList(processedPeakList);
// Add quality parameters to peaks
QualityParameters.calculateQualityParameters(processedPeakList);
// Add task description to peakList
processedPeakList.addDescriptionOfAppliedTask(new SimplePeakListAppliedMethod("Gap filling ", parameters));
// Remove the original peaklist if requested
if (removeOriginal)
project.removePeakList(peakList);
logger.info("Finished gap-filling on " + peakList);
setStatus(TaskStatus.FINISHED);
}
Aggregations