use of uk.gov.gchq.gaffer.spark.operation.javardd.SplitStoreFromJavaRDDOfElements in project Gaffer by gchq.
the class SplitStoreFromJavaRDDOfElementsHandler method generateSplitPoints.
private void generateSplitPoints(final SplitStoreFromJavaRDDOfElements operation, final Context context, final AccumuloStore store) throws OperationException {
final byte[] schemaAsJson = store.getSchema().toCompactJson();
final String keyConverterClassName = store.getKeyPackage().getKeyConverter().getClass().getName();
final JavaRDD<Text> rows = operation.getInput().mapPartitions(new ElementIteratorToPairIteratorFunction(keyConverterClassName, schemaAsJson)).flatMap(pair -> {
if (null == pair.getSecond()) {
return asList(pair.getFirst()).iterator();
} else {
return asList(pair.getFirst(), pair.getSecond()).iterator();
}
}).map(key -> key.getRow());
final double fractionToSample = super.adjustFractionToSampleForSize(operation.getFractionToSample(), operation.getMaxSampleSize(), rows.count());
final Random seed = new Random(System.currentTimeMillis());
final List<String> sample = rows.sample(WITHOUT_REPLACEMENT, fractionToSample, seed.nextLong()).map(Text::toString).collect();
super.createSplitPoints(store, context, sample);
}
Aggregations