Search in sources :

Example 16 with Range

use of org.apache.commons.lang3.Range in project narchy by automenta.

the class DigitizedScalarTest method testRewardConceptsFuzzification3.

// @Test
// public void testRewardConceptsFuzzification1() {
// NAR d = new Default();
// MutableFloat m = new MutableFloat(0f);
// testSteadyFreqCondition(m,
// new FuzzyScalarConcepts(
// new FloatNormalized(() -> m.floatValue()).updateRange(-1).updateRange(1),
// d, FuzzyScalarConcepts.FuzzyTriangle, $.p("x")),
// (f) -> Util.equals(f, 0.5f + 0.5f * m.floatValue(), tolerance)
// );
// }
public void testRewardConceptsFuzzification3() {
    NAR n =;
    MutableFloat m = new MutableFloat(0f);
    FloatNormalized range = new FloatPolarNormalized(() -> m.floatValue(), 1f);
    DigitizedScalar f = new DigitizedScalar(range, DigitizedScalar.FuzzyNeedle, n, $.p("low"), $.p("mid"), $.p("hih"));
    // {
    // f.clear();
    // m.setValue(0);;
    // System.out.println(Texts.n4(m.floatValue()) + "\t" + f.toString());
    // assertEquals("(I-->[sad]) %0.25;.90%\t(I-->[neutral]) %1.0;.90%\t(I-->[happy]) %0.0;.90%", f.toString());
    // }
    // {
    // f.clear();
    // m.setValue(-1);;
    // System.out.println(Texts.n4(m.floatValue()) + "\t" + f.toString());
    // assertEquals("(I-->[sad]) %1.0;.90%\t(I-->[neutral]) %0.0;.90%\t(I-->[happy]) %0.0;.90%", f.toString());
    // }
    // {
    // f.clear();
    // m.setValue(+1);;
    // System.out.println(Texts.n4(m.floatValue()) + "\t" + f.toString());
    // assertEquals("(I-->[sad]) %0.0;.90%\t(I-->[neutral]) %0.0;.90%\t(I-->[happy]) %1.0;.90%", f.toString());
    // }
    testSteadyFreqCondition(m, f, (freqSum) -> {
        System.out.println(freqSum + " " + tolerance);
        return Util.equals(freqSum, 1f, tolerance);
    }, n);
Also used : FloatNormalized(jcog.math.FloatNormalized) MutableFloat(org.apache.commons.lang3.mutable.MutableFloat) DigitizedScalar(nars.concept.scalar.DigitizedScalar) FloatPolarNormalized(jcog.math.FloatPolarNormalized) NAR(nars.NAR) Test(org.junit.jupiter.api.Test) Disabled(org.junit.jupiter.api.Disabled)

Example 17 with Range

use of org.apache.commons.lang3.Range in project drill by apache.

the class BlockMapBuilder method buildBlockMap.

 * Builds a mapping of block locations to file byte range
private ImmutableRangeMap<Long, BlockLocation> buildBlockMap(FileStatus status) throws IOException {
    final Timer.Context context = metrics.timer(BLOCK_MAP_BUILDER_TIMER).time();
    BlockLocation[] blocks;
    ImmutableRangeMap<Long, BlockLocation> blockMap;
    blocks = fs.getFileBlockLocations(status, 0, status.getLen());
    ImmutableRangeMap.Builder<Long, BlockLocation> blockMapBuilder = new ImmutableRangeMap.Builder<Long, BlockLocation>();
    for (BlockLocation block : blocks) {
        long start = block.getOffset();
        long end = start + block.getLength();
        Range<Long> range = Range.closedOpen(start, end);
        blockMapBuilder = blockMapBuilder.put(range, block);
    blockMap =;
    blockMapMap.put(status.getPath(), blockMap);
    return blockMap;
Also used : Timer(com.codahale.metrics.Timer) ToStringBuilder(org.apache.commons.lang3.builder.ToStringBuilder) ImmutableRangeMap( BlockLocation(org.apache.hadoop.fs.BlockLocation)

Example 18 with Range

use of org.apache.commons.lang3.Range in project drill by apache.

the class LocalFunctionRegistry method registerOperatorsWithInference.

private void registerOperatorsWithInference(DrillOperatorTable operatorTable, Map<String, Collection<DrillFuncHolder>> registeredFunctions) {
    final Map<String, DrillSqlOperator.DrillSqlOperatorBuilder> map = new HashMap<>();
    final Map<String, DrillSqlAggOperator.DrillSqlAggOperatorBuilder> mapAgg = new HashMap<>();
    for (Entry<String, Collection<DrillFuncHolder>> function : registeredFunctions.entrySet()) {
        final ArrayListMultimap<Pair<Integer, Integer>, DrillFuncHolder> functions = ArrayListMultimap.create();
        final ArrayListMultimap<Integer, DrillFuncHolder> aggregateFunctions = ArrayListMultimap.create();
        final String name = function.getKey().toUpperCase();
        boolean isDeterministic = true;
        boolean isNiladic = false;
        boolean isVarArg = false;
        for (DrillFuncHolder func : function.getValue()) {
            final int paramCount = func.getParamCount();
            if (func.isAggregating()) {
                aggregateFunctions.put(paramCount, func);
            } else {
                final Pair<Integer, Integer> argNumberRange;
                if (registeredFuncNameToArgRange.containsKey(name)) {
                    argNumberRange = registeredFuncNameToArgRange.get(name);
                } else {
                    argNumberRange = Pair.of(func.getParamCount(), func.getParamCount());
                functions.put(argNumberRange, func);
            if (!func.isDeterministic() || func.isComplexWriterFuncHolder()) {
                isDeterministic = false;
            if (func.isNiladic()) {
                isNiladic = true;
            if (func.isVarArg()) {
                isVarArg = true;
        for (Entry<Pair<Integer, Integer>, Collection<DrillFuncHolder>> entry : functions.asMap().entrySet()) {
            final Pair<Integer, Integer> range = entry.getKey();
            final int max = range.getRight();
            final int min = range.getLeft();
            if (!map.containsKey(name)) {
                map.put(name, new DrillSqlOperator.DrillSqlOperatorBuilder().setName(name));
            final DrillSqlOperator.DrillSqlOperatorBuilder drillSqlOperatorBuilder = map.get(name);
            drillSqlOperatorBuilder.addFunctions(entry.getValue()).setVarArg(isVarArg).setArgumentCount(min, max).setDeterministic(isDeterministic).setNiladic(isNiladic);
        for (Entry<Integer, Collection<DrillFuncHolder>> entry : aggregateFunctions.asMap().entrySet()) {
            if (!mapAgg.containsKey(name)) {
                mapAgg.put(name, new DrillSqlAggOperator.DrillSqlAggOperatorBuilder().setName(name));
            final DrillSqlAggOperator.DrillSqlAggOperatorBuilder drillSqlAggOperatorBuilder = mapAgg.get(name);
            drillSqlAggOperatorBuilder.addFunctions(entry.getValue()).setArgumentCount(entry.getKey(), entry.getKey());
    for (final Entry<String, DrillSqlOperator.DrillSqlOperatorBuilder> entry : map.entrySet()) {
        operatorTable.addOperatorWithInference(entry.getKey(), entry.getValue().build());
    for (final Entry<String, DrillSqlAggOperator.DrillSqlAggOperatorBuilder> entry : mapAgg.entrySet()) {
        operatorTable.addOperatorWithInference(entry.getKey(), entry.getValue().build());
Also used : DrillFuncHolder(org.apache.drill.exec.expr.fn.DrillFuncHolder) HashMap(java.util.HashMap) DrillSqlAggOperator(org.apache.drill.exec.planner.sql.DrillSqlAggOperator) DrillSqlOperator(org.apache.drill.exec.planner.sql.DrillSqlOperator) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Collection(java.util.Collection) Pair(org.apache.commons.lang3.tuple.Pair)

Example 19 with Range

use of org.apache.commons.lang3.Range in project samza by apache.

the class TransactionalStateTaskRestoreManager method getStoreActions.

 * Marks each persistent but non-logged store for deletion.
 * For each logged store, based on the current, checkpointed and local changelog offsets,
 * 1. decides which directories (current and checkpoints) to delete for persistent stores.
 * 2. decides which directories (checkpoints) to retain for persistent stores.
 * 3. decides which stores (persistent or not) need to be restored, and the beginning and end offsets for the restore.
 * When this method returns, in StoreActions,
 * 1. all persistent store current directories will be present in storeDirsToDelete
 * 2. each persistent store checkpoint directory will be present in either storeDirToRetain or storeDirsToDelete.
 * 3. there will be at most one storeDirToRetain per persistent store, which will be a checkpoint directory.
 * 4. any stores (persistent or not) that need to be restored from changelogs will be present in
 *    storesToRestore with appropriate offsets.
static StoreActions getStoreActions(TaskModel taskModel, Map<String, StorageEngine> storeEngines, Map<String, SystemStream> storeChangelogs, Map<String, KafkaStateCheckpointMarker> kafkaStateCheckpointMarkers, CheckpointId checkpointId, Map<SystemStreamPartition, SystemStreamPartitionMetadata> currentChangelogOffsets, SystemAdmins systemAdmins, StorageManagerUtil storageManagerUtil, File loggedStoreBaseDirectory, File nonLoggedStoreBaseDirectory, Config config, Clock clock) {
    TaskName taskName = taskModel.getTaskName();
    TaskMode taskMode = taskModel.getTaskMode();
    Map<String, File> storeDirToRetain = new HashMap<>();
    ListMultimap<String, File> storeDirsToDelete = ArrayListMultimap.create();
    Map<String, RestoreOffsets> storesToRestore = new HashMap<>();
    storeEngines.forEach((storeName, storageEngine) -> {
        // do nothing if store is non persistent and not logged (e.g. in memory cache only)
        if (!storageEngine.getStoreProperties().isPersistedToDisk() && !storageEngine.getStoreProperties().isLoggedStore()) {
        // persistent but non-logged stores are always deleted
        if (storageEngine.getStoreProperties().isPersistedToDisk() && !storageEngine.getStoreProperties().isLoggedStore()) {
            File currentDir = storageManagerUtil.getTaskStoreDir(nonLoggedStoreBaseDirectory, storeName, taskName, taskMode);
  "Marking current directory: {} for store: {} in task: {} for deletion since it is not a logged store.", currentDir, storeName, taskName);
            storeDirsToDelete.put(storeName, currentDir);
            // persistent but non-logged stores should not have checkpoint dirs
        // get the oldest and newest current changelog SSP offsets as well as the checkpointed changelog SSP offset
        SystemStream changelog = storeChangelogs.get(storeName);
        SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
        SystemAdmin admin = systemAdmins.getSystemAdmin(changelogSSP.getSystem());
        SystemStreamPartitionMetadata changelogSSPMetadata = currentChangelogOffsets.get(changelogSSP);
        String oldestOffset = changelogSSPMetadata.getOldestOffset();
        String newestOffset = changelogSSPMetadata.getNewestOffset();
        // can be null if no message, or message has null offset
        String checkpointedOffset = null;
        if (kafkaStateCheckpointMarkers.containsKey(storeName) && StringUtils.isNotBlank(kafkaStateCheckpointMarkers.get(storeName).getChangelogOffset())) {
            checkpointedOffset = kafkaStateCheckpointMarkers.get(storeName).getChangelogOffset();
        long timeSinceLastCheckpointInMs = checkpointId == null ? Long.MAX_VALUE : System.currentTimeMillis() - checkpointId.getMillis();
        // if the config is set, delete current and checkpoint dirs, restore from oldest offset to checkpointed
        if (storageEngine.getStoreProperties().isPersistedToDisk() && new StorageConfig(config).cleanLoggedStoreDirsOnStart(storeName)) {
            File currentDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode);
  "Marking current directory: {} for store: {} in task: {} for deletion due to clean.on.container.start config.", currentDir, storeName, taskName);
            storeDirsToDelete.put(storeName, currentDir);
            storageManagerUtil.getTaskStoreCheckpointDirs(loggedStoreBaseDirectory, storeName, taskName, taskMode).forEach(checkpointDir -> {
      "Marking checkpoint directory: {} for store: {} in task: {} for deletion due to clean.on.container.start config.", checkpointDir, storeName, taskName);
                storeDirsToDelete.put(storeName, checkpointDir);
  "Marking restore offsets for store: {} in task: {} to {}, {} ", storeName, taskName, oldestOffset, checkpointedOffset);
            storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, checkpointedOffset));
        Optional<File> currentDirOptional;
        Optional<List<File>> checkpointDirsOptional;
        if (!storageEngine.getStoreProperties().isPersistedToDisk()) {
            currentDirOptional = Optional.empty();
            checkpointDirsOptional = Optional.empty();
        } else {
            currentDirOptional = Optional.of(storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode));
            checkpointDirsOptional = Optional.of(storageManagerUtil.getTaskStoreCheckpointDirs(loggedStoreBaseDirectory, storeName, taskName, taskMode));
        }"For store: {} in task: {} got current dir: {}, checkpoint dirs: {}, checkpointed changelog offset: {}", storeName, taskName, currentDirOptional, checkpointDirsOptional, checkpointedOffset);
        currentDirOptional.ifPresent(currentDir -> {
  "Marking current directory: {} for store: {} in task: {} for deletion.", currentDir, storeName, taskName);
            storeDirsToDelete.put(storeName, currentDir);
        if (checkpointedOffset == null && oldestOffset != null) {
            // this can mean that either this is the initial migration for this feature and there are no previously
            // checkpointed changelog offsets, or that this is a new store or changelog topic after the initial migration.
            // if this is the first time migration, it might be desirable to retain existing data.
            // if this is new store or topic, it is possible that the container previously died after writing some data to
            // the changelog but before a commit, so it is desirable to delete the store, not restore anything and
            // trim the changelog
            // since we can't tell the difference b/w the two scenarios by just looking at the store and changelogs,
            // we'll request users to indicate whether to retain existing data using a config flag. this flag should only
            // be set during migrations, and turned off after the first successful commit of the new container (i.e. next
            // deploy). for simplicity, we'll always delete the local store, and restore from changelog if necessary.
            // the former scenario should not be common. the recommended way to opt-in to the transactional state feature
            // is to first upgrade to the latest samza version but keep the transactional state restore config off.
            // this will create the store checkpoint directories and write the changelog offset to the checkpoint, but
            // will not use them during restore. once this is done (i.e. at least one commit after upgrade), the
            // transactional state restore feature can be turned on on subsequent deploys. this code path exists as a
            // fail-safe against clearing changelogs in case users do not follow upgrade instructions and enable the
            // feature directly.
            checkpointDirsOptional.ifPresent(checkpointDirs -> checkpointDirs.forEach(checkpointDir -> {
      "Marking checkpoint directory: {} for store: {} in task: {} for deletion since checkpointed " + "offset is null and oldest offset: {} is not.", checkpointDir, storeName, taskName, oldestOffset);
                storeDirsToDelete.put(storeName, checkpointDir);
            if (new TaskConfig(config).getTransactionalStateRetainExistingState()) {
                // mark for restore from (oldest, newest) to recreate local state.
                LOG.warn("Checkpointed offset for store: {} in task: {} is null. Since retain existing state is true, " + "local state will be fully restored from current changelog contents. " + "There is no transactional local state guarantee.", storeName, taskName);
                storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, newestOffset));
            } else {
                LOG.warn("Checkpointed offset for store: {} in task: {} is null. Since retain existing state is false, " + "any local state and changelog topic contents will be deleted.", storeName, taskName);
                // mark for restore from (oldest, null) to trim entire changelog.
                storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, null));
        } else if (// check if the checkpointed offset is out of range of current oldest and newest offsets
        admin.offsetComparator(oldestOffset, checkpointedOffset) > 0 || admin.offsetComparator(checkpointedOffset, newestOffset) > 0) {
            // checkpointed offset is out of range. this could mean that this is a TTL topic and the checkpointed
            // offset was TTLd, or that the changelog topic was manually deleted and then recreated.
            // we cannot guarantee transactional state for TTL stores, so delete everything and do a full restore
            // for local store. if the topic was deleted and recreated, this will have the side effect of
            // clearing the store as well.
            LOG.warn("Checkpointed offset: {} for store: {} in task: {} is out of range of oldest: {} or newest: {} offset." + "Deleting existing store and fully restoring from changelog topic from oldest to newest offset. If the topic " + "has time-based retention, there is no transactional local state guarantees. If the topic was changed," + "local state will be cleaned up and fully restored to match the new topic contents.", checkpointedOffset, storeName, taskName, oldestOffset, newestOffset);
            checkpointDirsOptional.ifPresent(checkpointDirs -> checkpointDirs.forEach(checkpointDir -> storeDirsToDelete.put(storeName, checkpointDir)));
            storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, newestOffset));
        } else {
            // happy path. checkpointed offset is in range of current oldest and newest offsets
            if (!checkpointDirsOptional.isPresent()) {
                // non-persistent logged store
      "Did not find any checkpoint directories for logged (maybe non-persistent) store: {}. Local state " + "will be fully restored from current changelog contents.", storeName);
                storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, checkpointedOffset));
            } else {
                // persistent logged store
                String targetOffset;
                // check checkpoint time against if older, restore from checkpointed offset to newest
                // with no trim. be conservative. allow 10% safety margin to avoid deletions when the downtime is close
                // to
                long minCompactionLagMs = new StorageConfig(config).getChangelogMinCompactionLagMs(storeName);
                if (timeSinceLastCheckpointInMs > .9 * minCompactionLagMs) {
                    LOG.warn("Checkpointed offset for store: {} in task: {} is: {}. It is in range of oldest: {} and " + "newest: {} changelog offset. However, time since last checkpoint is: {}, which is greater than " + "0.9 * {} for the changelog topic. Since there is a chance that" + "the changelog topic has been compacted, restoring store to the end of the current changelog contents." + "There is no transactional local state guarantee.", storeName, taskName, checkpointedOffset, oldestOffset, newestOffset, timeSinceLastCheckpointInMs, minCompactionLagMs);
                    targetOffset = newestOffset;
                } else {
                    targetOffset = checkpointedOffset;
                // if there exists a valid store checkpoint directory with oldest offset <= local offset <= target offset,
                // retain it and restore the delta. delete all other checkpoint directories for the store. if more than one such
                // checkpoint directory exists, retain the one with the highest local offset and delete the rest.
                boolean hasValidCheckpointDir = false;
                for (File checkpointDir : checkpointDirsOptional.get()) {
                    if (storageManagerUtil.isLoggedStoreValid(storeName, checkpointDir, config, storeChangelogs, taskModel, clock, storeEngines)) {
                        String localOffset = storageManagerUtil.readOffsetFile(checkpointDir, Collections.singleton(changelogSSP), false).get(changelogSSP);
              "Read local offset: {} for store: {} checkpoint dir: {} in task: {}", localOffset, storeName, checkpointDir, taskName);
                        if (admin.offsetComparator(localOffset, oldestOffset) >= 0 && admin.offsetComparator(localOffset, targetOffset) <= 0 && (storesToRestore.get(storeName) == null || admin.offsetComparator(localOffset, storesToRestore.get(storeName).startingOffset) > 0)) {
                            hasValidCheckpointDir = true;
                  "Temporarily marking checkpoint dir: {} for store: {} in task: {} for retention. " + "May be overridden later.", checkpointDir, storeName, taskName);
                            storeDirToRetain.put(storeName, checkpointDir);
                            // mark for restore even if local == checkpointed, so that the changelog gets trimmed.
                  "Temporarily marking store: {} in task: {} for restore from beginning offset: {} to " + "ending offset: {}. May be overridden later", storeName, taskName, localOffset, targetOffset);
                            storesToRestore.put(storeName, new RestoreOffsets(localOffset, targetOffset));
                // delete all non-retained checkpoint directories
                for (File checkpointDir : checkpointDirsOptional.get()) {
                    if (storeDirToRetain.get(storeName) == null || !storeDirToRetain.get(storeName).equals(checkpointDir)) {
              "Marking checkpoint directory: {} for store: {} in task: {} for deletion since it is not " + "marked for retention.", checkpointDir, storeName, taskName);
                        storeDirsToDelete.put(storeName, checkpointDir);
                // if the store had not valid checkpoint dirs to retain, restore from changelog
                if (!hasValidCheckpointDir) {
                    storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, targetOffset));
    });"Store directories to be retained in Task: {} are: {}", taskName, storeDirToRetain);"Store directories to be deleted in Task: {} are: {}", taskName, storeDirsToDelete);"Stores to be restored in Task: {} are: {}", taskName, storesToRestore);
    return new StoreActions(storeDirToRetain, storeDirsToDelete, storesToRestore);
Also used : ArrayListMultimap( ListMultimap( SSPMetadataCache(org.apache.samza.system.SSPMetadataCache) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TaskModel(org.apache.samza.job.model.TaskModel) Serde(org.apache.samza.serializers.Serde) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) FileUtil(org.apache.samza.util.FileUtil) SystemConsumer(org.apache.samza.system.SystemConsumer) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) ExecutorService(java.util.concurrent.ExecutorService) StorageConfig(org.apache.samza.config.StorageConfig) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) ImmutableMap( TaskConfig(org.apache.samza.config.TaskConfig) JobContext(org.apache.samza.context.JobContext) Partition(org.apache.samza.Partition) ContainerContext(org.apache.samza.context.ContainerContext) Set(java.util.Set) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Clock(org.apache.samza.util.Clock) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) File( SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ChangelogSSPIterator(org.apache.samza.system.ChangelogSSPIterator) SystemAdmin(org.apache.samza.system.SystemAdmin) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) Optional(java.util.Optional) Preconditions( KafkaStateCheckpointMarker(org.apache.samza.checkpoint.kafka.KafkaStateCheckpointMarker) VisibleForTesting( Config(org.apache.samza.config.Config) Collections(java.util.Collections) SystemAdmins(org.apache.samza.system.SystemAdmins) HashMap(java.util.HashMap) StorageConfig(org.apache.samza.config.StorageConfig) SystemStream(org.apache.samza.system.SystemStream) TaskConfig(org.apache.samza.config.TaskConfig) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) TaskMode(org.apache.samza.job.model.TaskMode) TaskName(org.apache.samza.container.TaskName) List(java.util.List) SystemAdmin(org.apache.samza.system.SystemAdmin) File( SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) VisibleForTesting(

Example 20 with Range

use of org.apache.commons.lang3.Range in project presto by prestodb.

the class ColumnCardinalityCache method getCardinalities.

 * Gets the cardinality for each {@link AccumuloColumnConstraint}.
 * Given constraints are expected to be indexed! Who knows what would happen if they weren't!
 * @param schema Schema name
 * @param table Table name
 * @param auths Scan authorizations
 * @param idxConstraintRangePairs Mapping of all ranges for a given constraint
 * @param earlyReturnThreshold Smallest acceptable cardinality to return early while other tasks complete
 * @param pollingDuration Duration for polling the cardinality completion service
 * @return An immutable multimap of cardinality to column constraint, sorted by cardinality from smallest to largest
 * @throws TableNotFoundException If the metrics table does not exist
 * @throws ExecutionException If another error occurs; I really don't even know anymore.
public Multimap<Long, AccumuloColumnConstraint> getCardinalities(String schema, String table, Authorizations auths, Multimap<AccumuloColumnConstraint, Range> idxConstraintRangePairs, long earlyReturnThreshold, Duration pollingDuration) {
    // Submit tasks to the executor to fetch column cardinality, adding it to the Guava cache if necessary
    CompletionService<Pair<Long, AccumuloColumnConstraint>> executor = new ExecutorCompletionService<>(executorService);
    idxConstraintRangePairs.asMap().forEach((key, value) -> executor.submit(() -> {
        long cardinality = getColumnCardinality(schema, table, auths, key.getFamily(), key.getQualifier(), value);
        LOG.debug("Cardinality for column %s is %s", key.getName(), cardinality);
        return Pair.of(cardinality, key);
    // Create a multi map sorted by cardinality
    ListMultimap<Long, AccumuloColumnConstraint> cardinalityToConstraints = MultimapBuilder.treeKeys().arrayListValues().build();
    try {
        boolean earlyReturn = false;
        int numTasks = idxConstraintRangePairs.asMap().entrySet().size();
        do {
            // Sleep for the polling duration to allow concurrent tasks to run for this time
            // Poll each task, retrieving the result if it is done
            for (int i = 0; i < numTasks; ++i) {
                Future<Pair<Long, AccumuloColumnConstraint>> futureCardinality = executor.poll();
                if (futureCardinality != null && futureCardinality.isDone()) {
                    Pair<Long, AccumuloColumnConstraint> columnCardinality = futureCardinality.get();
                    cardinalityToConstraints.put(columnCardinality.getLeft(), columnCardinality.getRight());
            // If the smallest cardinality is present and below the threshold, set the earlyReturn flag
            Optional<Entry<Long, AccumuloColumnConstraint>> smallestCardinality = cardinalityToConstraints.entries().stream().findFirst();
            if (smallestCardinality.isPresent()) {
                if (smallestCardinality.get().getKey() <= earlyReturnThreshold) {
          "Cardinality %s, is below threshold. Returning early while other tasks finish", smallestCardinality);
                    earlyReturn = true;
        } while (!earlyReturn && cardinalityToConstraints.entries().size() < numTasks);
    } catch (ExecutionException | InterruptedException e) {
        if (e instanceof InterruptedException) {
        throw new PrestoException(UNEXPECTED_ACCUMULO_ERROR, "Exception when getting cardinality", e);
    // Create a copy of the cardinalities
    return ImmutableMultimap.copyOf(cardinalityToConstraints);
Also used : AccumuloColumnConstraint(com.facebook.presto.accumulo.model.AccumuloColumnConstraint) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) PrestoException(com.facebook.presto.spi.PrestoException) AccumuloColumnConstraint(com.facebook.presto.accumulo.model.AccumuloColumnConstraint) Entry(java.util.Map.Entry) Long.parseLong(java.lang.Long.parseLong) ExecutionException(java.util.concurrent.ExecutionException) Pair(org.apache.commons.lang3.tuple.Pair)


List (java.util.List)30 HashMap (java.util.HashMap)24 Map (java.util.Map)24 ArrayList (java.util.ArrayList)23 Collectors ( StringUtils (org.apache.commons.lang3.StringUtils)20 LoggerFactory (org.slf4j.LoggerFactory)17 Pair (org.apache.commons.lang3.tuple.Pair)16 Logger (org.slf4j.Logger)16 Set (java.util.Set)15 IOException ( Optional (java.util.Optional)12 Range (org.apache.commons.lang3.Range)11 Test (org.junit.jupiter.api.Test)11 java.util (java.util)10 Date (java.util.Date)10 Lists ( HashSet (java.util.HashSet)9 ExecutorService (java.util.concurrent.ExecutorService)9 Collection (java.util.Collection)8