Search in sources :

Example 16 with Range

use of org.apache.commons.lang3.Range in project narchy by automenta.

the class DigitizedScalarTest method testRewardConceptsFuzzification3.

// @Test
// public void testRewardConceptsFuzzification1() {
// NAR d = new Default();
// MutableFloat m = new MutableFloat(0f);
// 
// testSteadyFreqCondition(m,
// new FuzzyScalarConcepts(
// new FloatNormalized(() -> m.floatValue()).updateRange(-1).updateRange(1),
// d, FuzzyScalarConcepts.FuzzyTriangle, $.p("x")),
// (f) -> Util.equals(f, 0.5f + 0.5f * m.floatValue(), tolerance)
// );
// }
@Disabled
@Test
public void testRewardConceptsFuzzification3() {
    NAR n = NARS.shell();
    MutableFloat m = new MutableFloat(0f);
    FloatNormalized range = new FloatPolarNormalized(() -> m.floatValue(), 1f);
    DigitizedScalar f = new DigitizedScalar(range, DigitizedScalar.FuzzyNeedle, n, $.p("low"), $.p("mid"), $.p("hih"));
    // {
    // f.clear();
    // m.setValue(0); d.next();
    // System.out.println(Texts.n4(m.floatValue()) + "\t" + f.toString());
    // assertEquals("(I-->[sad]) %0.25;.90%\t(I-->[neutral]) %1.0;.90%\t(I-->[happy]) %0.0;.90%", f.toString());
    // }
    // 
    // {
    // f.clear();
    // m.setValue(-1); d.next();
    // System.out.println(Texts.n4(m.floatValue()) + "\t" + f.toString());
    // assertEquals("(I-->[sad]) %1.0;.90%\t(I-->[neutral]) %0.0;.90%\t(I-->[happy]) %0.0;.90%", f.toString());
    // }
    // 
    // {
    // f.clear();
    // m.setValue(+1); d.next();
    // System.out.println(Texts.n4(m.floatValue()) + "\t" + f.toString());
    // assertEquals("(I-->[sad]) %0.0;.90%\t(I-->[neutral]) %0.0;.90%\t(I-->[happy]) %1.0;.90%", f.toString());
    // }
    testSteadyFreqCondition(m, f, (freqSum) -> {
        System.out.println(freqSum + " " + tolerance);
        return Util.equals(freqSum, 1f, tolerance);
    }, n);
}
Also used : FloatNormalized(jcog.math.FloatNormalized) MutableFloat(org.apache.commons.lang3.mutable.MutableFloat) DigitizedScalar(nars.concept.scalar.DigitizedScalar) FloatPolarNormalized(jcog.math.FloatPolarNormalized) NAR(nars.NAR) Test(org.junit.jupiter.api.Test) Disabled(org.junit.jupiter.api.Disabled)

Example 17 with Range

use of org.apache.commons.lang3.Range in project drill by apache.

the class BlockMapBuilder method buildBlockMap.

/**
 * Builds a mapping of block locations to file byte range
 */
private ImmutableRangeMap<Long, BlockLocation> buildBlockMap(FileStatus status) throws IOException {
    final Timer.Context context = metrics.timer(BLOCK_MAP_BUILDER_TIMER).time();
    BlockLocation[] blocks;
    ImmutableRangeMap<Long, BlockLocation> blockMap;
    blocks = fs.getFileBlockLocations(status, 0, status.getLen());
    ImmutableRangeMap.Builder<Long, BlockLocation> blockMapBuilder = new ImmutableRangeMap.Builder<Long, BlockLocation>();
    for (BlockLocation block : blocks) {
        long start = block.getOffset();
        long end = start + block.getLength();
        Range<Long> range = Range.closedOpen(start, end);
        blockMapBuilder = blockMapBuilder.put(range, block);
    }
    blockMap = blockMapBuilder.build();
    blockMapMap.put(status.getPath(), blockMap);
    context.stop();
    return blockMap;
}
Also used : Timer(com.codahale.metrics.Timer) ToStringBuilder(org.apache.commons.lang3.builder.ToStringBuilder) ImmutableRangeMap(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableRangeMap) BlockLocation(org.apache.hadoop.fs.BlockLocation)

Example 18 with Range

use of org.apache.commons.lang3.Range in project drill by apache.

the class LocalFunctionRegistry method registerOperatorsWithInference.

private void registerOperatorsWithInference(DrillOperatorTable operatorTable, Map<String, Collection<DrillFuncHolder>> registeredFunctions) {
    final Map<String, DrillSqlOperator.DrillSqlOperatorBuilder> map = new HashMap<>();
    final Map<String, DrillSqlAggOperator.DrillSqlAggOperatorBuilder> mapAgg = new HashMap<>();
    for (Entry<String, Collection<DrillFuncHolder>> function : registeredFunctions.entrySet()) {
        final ArrayListMultimap<Pair<Integer, Integer>, DrillFuncHolder> functions = ArrayListMultimap.create();
        final ArrayListMultimap<Integer, DrillFuncHolder> aggregateFunctions = ArrayListMultimap.create();
        final String name = function.getKey().toUpperCase();
        boolean isDeterministic = true;
        boolean isNiladic = false;
        boolean isVarArg = false;
        for (DrillFuncHolder func : function.getValue()) {
            final int paramCount = func.getParamCount();
            if (func.isAggregating()) {
                aggregateFunctions.put(paramCount, func);
            } else {
                final Pair<Integer, Integer> argNumberRange;
                if (registeredFuncNameToArgRange.containsKey(name)) {
                    argNumberRange = registeredFuncNameToArgRange.get(name);
                } else {
                    argNumberRange = Pair.of(func.getParamCount(), func.getParamCount());
                }
                functions.put(argNumberRange, func);
            }
            if (!func.isDeterministic() || func.isComplexWriterFuncHolder()) {
                isDeterministic = false;
            }
            if (func.isNiladic()) {
                isNiladic = true;
            }
            if (func.isVarArg()) {
                isVarArg = true;
            }
        }
        for (Entry<Pair<Integer, Integer>, Collection<DrillFuncHolder>> entry : functions.asMap().entrySet()) {
            final Pair<Integer, Integer> range = entry.getKey();
            final int max = range.getRight();
            final int min = range.getLeft();
            if (!map.containsKey(name)) {
                map.put(name, new DrillSqlOperator.DrillSqlOperatorBuilder().setName(name));
            }
            final DrillSqlOperator.DrillSqlOperatorBuilder drillSqlOperatorBuilder = map.get(name);
            drillSqlOperatorBuilder.addFunctions(entry.getValue()).setVarArg(isVarArg).setArgumentCount(min, max).setDeterministic(isDeterministic).setNiladic(isNiladic);
        }
        for (Entry<Integer, Collection<DrillFuncHolder>> entry : aggregateFunctions.asMap().entrySet()) {
            if (!mapAgg.containsKey(name)) {
                mapAgg.put(name, new DrillSqlAggOperator.DrillSqlAggOperatorBuilder().setName(name));
            }
            final DrillSqlAggOperator.DrillSqlAggOperatorBuilder drillSqlAggOperatorBuilder = mapAgg.get(name);
            drillSqlAggOperatorBuilder.addFunctions(entry.getValue()).setArgumentCount(entry.getKey(), entry.getKey());
        }
    }
    for (final Entry<String, DrillSqlOperator.DrillSqlOperatorBuilder> entry : map.entrySet()) {
        operatorTable.addOperatorWithInference(entry.getKey(), entry.getValue().build());
    }
    for (final Entry<String, DrillSqlAggOperator.DrillSqlAggOperatorBuilder> entry : mapAgg.entrySet()) {
        operatorTable.addOperatorWithInference(entry.getKey(), entry.getValue().build());
    }
}
Also used : DrillFuncHolder(org.apache.drill.exec.expr.fn.DrillFuncHolder) HashMap(java.util.HashMap) DrillSqlAggOperator(org.apache.drill.exec.planner.sql.DrillSqlAggOperator) DrillSqlOperator(org.apache.drill.exec.planner.sql.DrillSqlOperator) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Collection(java.util.Collection) Pair(org.apache.commons.lang3.tuple.Pair)

Example 19 with Range

use of org.apache.commons.lang3.Range in project samza by apache.

the class TransactionalStateTaskRestoreManager method getStoreActions.

/**
 * Marks each persistent but non-logged store for deletion.
 *
 * For each logged store, based on the current, checkpointed and local changelog offsets,
 * 1. decides which directories (current and checkpoints) to delete for persistent stores.
 * 2. decides which directories (checkpoints) to retain for persistent stores.
 * 3. decides which stores (persistent or not) need to be restored, and the beginning and end offsets for the restore.
 *
 * When this method returns, in StoreActions,
 * 1. all persistent store current directories will be present in storeDirsToDelete
 * 2. each persistent store checkpoint directory will be present in either storeDirToRetain or storeDirsToDelete.
 * 3. there will be at most one storeDirToRetain per persistent store, which will be a checkpoint directory.
 * 4. any stores (persistent or not) that need to be restored from changelogs will be present in
 *    storesToRestore with appropriate offsets.
 */
@VisibleForTesting
static StoreActions getStoreActions(TaskModel taskModel, Map<String, StorageEngine> storeEngines, Map<String, SystemStream> storeChangelogs, Map<String, KafkaStateCheckpointMarker> kafkaStateCheckpointMarkers, CheckpointId checkpointId, Map<SystemStreamPartition, SystemStreamPartitionMetadata> currentChangelogOffsets, SystemAdmins systemAdmins, StorageManagerUtil storageManagerUtil, File loggedStoreBaseDirectory, File nonLoggedStoreBaseDirectory, Config config, Clock clock) {
    TaskName taskName = taskModel.getTaskName();
    TaskMode taskMode = taskModel.getTaskMode();
    Map<String, File> storeDirToRetain = new HashMap<>();
    ListMultimap<String, File> storeDirsToDelete = ArrayListMultimap.create();
    Map<String, RestoreOffsets> storesToRestore = new HashMap<>();
    storeEngines.forEach((storeName, storageEngine) -> {
        // do nothing if store is non persistent and not logged (e.g. in memory cache only)
        if (!storageEngine.getStoreProperties().isPersistedToDisk() && !storageEngine.getStoreProperties().isLoggedStore()) {
            return;
        }
        // persistent but non-logged stores are always deleted
        if (storageEngine.getStoreProperties().isPersistedToDisk() && !storageEngine.getStoreProperties().isLoggedStore()) {
            File currentDir = storageManagerUtil.getTaskStoreDir(nonLoggedStoreBaseDirectory, storeName, taskName, taskMode);
            LOG.info("Marking current directory: {} for store: {} in task: {} for deletion since it is not a logged store.", currentDir, storeName, taskName);
            storeDirsToDelete.put(storeName, currentDir);
            // persistent but non-logged stores should not have checkpoint dirs
            return;
        }
        // get the oldest and newest current changelog SSP offsets as well as the checkpointed changelog SSP offset
        SystemStream changelog = storeChangelogs.get(storeName);
        SystemStreamPartition changelogSSP = new SystemStreamPartition(changelog, taskModel.getChangelogPartition());
        SystemAdmin admin = systemAdmins.getSystemAdmin(changelogSSP.getSystem());
        SystemStreamPartitionMetadata changelogSSPMetadata = currentChangelogOffsets.get(changelogSSP);
        String oldestOffset = changelogSSPMetadata.getOldestOffset();
        String newestOffset = changelogSSPMetadata.getNewestOffset();
        // can be null if no message, or message has null offset
        String checkpointedOffset = null;
        if (kafkaStateCheckpointMarkers.containsKey(storeName) && StringUtils.isNotBlank(kafkaStateCheckpointMarkers.get(storeName).getChangelogOffset())) {
            checkpointedOffset = kafkaStateCheckpointMarkers.get(storeName).getChangelogOffset();
        }
        long timeSinceLastCheckpointInMs = checkpointId == null ? Long.MAX_VALUE : System.currentTimeMillis() - checkpointId.getMillis();
        // if the clean.store.start config is set, delete current and checkpoint dirs, restore from oldest offset to checkpointed
        if (storageEngine.getStoreProperties().isPersistedToDisk() && new StorageConfig(config).cleanLoggedStoreDirsOnStart(storeName)) {
            File currentDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode);
            LOG.info("Marking current directory: {} for store: {} in task: {} for deletion due to clean.on.container.start config.", currentDir, storeName, taskName);
            storeDirsToDelete.put(storeName, currentDir);
            storageManagerUtil.getTaskStoreCheckpointDirs(loggedStoreBaseDirectory, storeName, taskName, taskMode).forEach(checkpointDir -> {
                LOG.info("Marking checkpoint directory: {} for store: {} in task: {} for deletion due to clean.on.container.start config.", checkpointDir, storeName, taskName);
                storeDirsToDelete.put(storeName, checkpointDir);
            });
            LOG.info("Marking restore offsets for store: {} in task: {} to {}, {} ", storeName, taskName, oldestOffset, checkpointedOffset);
            storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, checkpointedOffset));
            return;
        }
        Optional<File> currentDirOptional;
        Optional<List<File>> checkpointDirsOptional;
        if (!storageEngine.getStoreProperties().isPersistedToDisk()) {
            currentDirOptional = Optional.empty();
            checkpointDirsOptional = Optional.empty();
        } else {
            currentDirOptional = Optional.of(storageManagerUtil.getTaskStoreDir(loggedStoreBaseDirectory, storeName, taskName, taskMode));
            checkpointDirsOptional = Optional.of(storageManagerUtil.getTaskStoreCheckpointDirs(loggedStoreBaseDirectory, storeName, taskName, taskMode));
        }
        LOG.info("For store: {} in task: {} got current dir: {}, checkpoint dirs: {}, checkpointed changelog offset: {}", storeName, taskName, currentDirOptional, checkpointDirsOptional, checkpointedOffset);
        currentDirOptional.ifPresent(currentDir -> {
            LOG.info("Marking current directory: {} for store: {} in task: {} for deletion.", currentDir, storeName, taskName);
            storeDirsToDelete.put(storeName, currentDir);
        });
        if (checkpointedOffset == null && oldestOffset != null) {
            // this can mean that either this is the initial migration for this feature and there are no previously
            // checkpointed changelog offsets, or that this is a new store or changelog topic after the initial migration.
            // if this is the first time migration, it might be desirable to retain existing data.
            // if this is new store or topic, it is possible that the container previously died after writing some data to
            // the changelog but before a commit, so it is desirable to delete the store, not restore anything and
            // trim the changelog
            // since we can't tell the difference b/w the two scenarios by just looking at the store and changelogs,
            // we'll request users to indicate whether to retain existing data using a config flag. this flag should only
            // be set during migrations, and turned off after the first successful commit of the new container (i.e. next
            // deploy). for simplicity, we'll always delete the local store, and restore from changelog if necessary.
            // the former scenario should not be common. the recommended way to opt-in to the transactional state feature
            // is to first upgrade to the latest samza version but keep the transactional state restore config off.
            // this will create the store checkpoint directories and write the changelog offset to the checkpoint, but
            // will not use them during restore. once this is done (i.e. at least one commit after upgrade), the
            // transactional state restore feature can be turned on on subsequent deploys. this code path exists as a
            // fail-safe against clearing changelogs in case users do not follow upgrade instructions and enable the
            // feature directly.
            checkpointDirsOptional.ifPresent(checkpointDirs -> checkpointDirs.forEach(checkpointDir -> {
                LOG.info("Marking checkpoint directory: {} for store: {} in task: {} for deletion since checkpointed " + "offset is null and oldest offset: {} is not.", checkpointDir, storeName, taskName, oldestOffset);
                storeDirsToDelete.put(storeName, checkpointDir);
            }));
            if (new TaskConfig(config).getTransactionalStateRetainExistingState()) {
                // mark for restore from (oldest, newest) to recreate local state.
                LOG.warn("Checkpointed offset for store: {} in task: {} is null. Since retain existing state is true, " + "local state will be fully restored from current changelog contents. " + "There is no transactional local state guarantee.", storeName, taskName);
                storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, newestOffset));
            } else {
                LOG.warn("Checkpointed offset for store: {} in task: {} is null. Since retain existing state is false, " + "any local state and changelog topic contents will be deleted.", storeName, taskName);
                // mark for restore from (oldest, null) to trim entire changelog.
                storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, null));
            }
        } else if (// check if the checkpointed offset is out of range of current oldest and newest offsets
        admin.offsetComparator(oldestOffset, checkpointedOffset) > 0 || admin.offsetComparator(checkpointedOffset, newestOffset) > 0) {
            // checkpointed offset is out of range. this could mean that this is a TTL topic and the checkpointed
            // offset was TTLd, or that the changelog topic was manually deleted and then recreated.
            // we cannot guarantee transactional state for TTL stores, so delete everything and do a full restore
            // for local store. if the topic was deleted and recreated, this will have the side effect of
            // clearing the store as well.
            LOG.warn("Checkpointed offset: {} for store: {} in task: {} is out of range of oldest: {} or newest: {} offset." + "Deleting existing store and fully restoring from changelog topic from oldest to newest offset. If the topic " + "has time-based retention, there is no transactional local state guarantees. If the topic was changed," + "local state will be cleaned up and fully restored to match the new topic contents.", checkpointedOffset, storeName, taskName, oldestOffset, newestOffset);
            checkpointDirsOptional.ifPresent(checkpointDirs -> checkpointDirs.forEach(checkpointDir -> storeDirsToDelete.put(storeName, checkpointDir)));
            storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, newestOffset));
        } else {
            // happy path. checkpointed offset is in range of current oldest and newest offsets
            if (!checkpointDirsOptional.isPresent()) {
                // non-persistent logged store
                LOG.info("Did not find any checkpoint directories for logged (maybe non-persistent) store: {}. Local state " + "will be fully restored from current changelog contents.", storeName);
                storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, checkpointedOffset));
            } else {
                // persistent logged store
                String targetOffset;
                // check checkpoint time against min.compaction.lag.ms. if older, restore from checkpointed offset to newest
                // with no trim. be conservative. allow 10% safety margin to avoid deletions when the downtime is close
                // to min.compaction.lag.ms
                long minCompactionLagMs = new StorageConfig(config).getChangelogMinCompactionLagMs(storeName);
                if (timeSinceLastCheckpointInMs > .9 * minCompactionLagMs) {
                    LOG.warn("Checkpointed offset for store: {} in task: {} is: {}. It is in range of oldest: {} and " + "newest: {} changelog offset. However, time since last checkpoint is: {}, which is greater than " + "0.9 * min.compaction.lag.ms: {} for the changelog topic. Since there is a chance that" + "the changelog topic has been compacted, restoring store to the end of the current changelog contents." + "There is no transactional local state guarantee.", storeName, taskName, checkpointedOffset, oldestOffset, newestOffset, timeSinceLastCheckpointInMs, minCompactionLagMs);
                    targetOffset = newestOffset;
                } else {
                    targetOffset = checkpointedOffset;
                }
                // if there exists a valid store checkpoint directory with oldest offset <= local offset <= target offset,
                // retain it and restore the delta. delete all other checkpoint directories for the store. if more than one such
                // checkpoint directory exists, retain the one with the highest local offset and delete the rest.
                boolean hasValidCheckpointDir = false;
                for (File checkpointDir : checkpointDirsOptional.get()) {
                    if (storageManagerUtil.isLoggedStoreValid(storeName, checkpointDir, config, storeChangelogs, taskModel, clock, storeEngines)) {
                        String localOffset = storageManagerUtil.readOffsetFile(checkpointDir, Collections.singleton(changelogSSP), false).get(changelogSSP);
                        LOG.info("Read local offset: {} for store: {} checkpoint dir: {} in task: {}", localOffset, storeName, checkpointDir, taskName);
                        if (admin.offsetComparator(localOffset, oldestOffset) >= 0 && admin.offsetComparator(localOffset, targetOffset) <= 0 && (storesToRestore.get(storeName) == null || admin.offsetComparator(localOffset, storesToRestore.get(storeName).startingOffset) > 0)) {
                            hasValidCheckpointDir = true;
                            LOG.info("Temporarily marking checkpoint dir: {} for store: {} in task: {} for retention. " + "May be overridden later.", checkpointDir, storeName, taskName);
                            storeDirToRetain.put(storeName, checkpointDir);
                            // mark for restore even if local == checkpointed, so that the changelog gets trimmed.
                            LOG.info("Temporarily marking store: {} in task: {} for restore from beginning offset: {} to " + "ending offset: {}. May be overridden later", storeName, taskName, localOffset, targetOffset);
                            storesToRestore.put(storeName, new RestoreOffsets(localOffset, targetOffset));
                        }
                    }
                }
                // delete all non-retained checkpoint directories
                for (File checkpointDir : checkpointDirsOptional.get()) {
                    if (storeDirToRetain.get(storeName) == null || !storeDirToRetain.get(storeName).equals(checkpointDir)) {
                        LOG.info("Marking checkpoint directory: {} for store: {} in task: {} for deletion since it is not " + "marked for retention.", checkpointDir, storeName, taskName);
                        storeDirsToDelete.put(storeName, checkpointDir);
                    }
                }
                // if the store had not valid checkpoint dirs to retain, restore from changelog
                if (!hasValidCheckpointDir) {
                    storesToRestore.put(storeName, new RestoreOffsets(oldestOffset, targetOffset));
                }
            }
        }
    });
    LOG.info("Store directories to be retained in Task: {} are: {}", taskName, storeDirToRetain);
    LOG.info("Store directories to be deleted in Task: {} are: {}", taskName, storeDirsToDelete);
    LOG.info("Stores to be restored in Task: {} are: {}", taskName, storesToRestore);
    return new StoreActions(storeDirToRetain, storeDirsToDelete, storesToRestore);
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) ListMultimap(com.google.common.collect.ListMultimap) SSPMetadataCache(org.apache.samza.system.SSPMetadataCache) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TaskModel(org.apache.samza.job.model.TaskModel) Serde(org.apache.samza.serializers.Serde) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) StringUtils(org.apache.commons.lang3.StringUtils) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) FileUtil(org.apache.samza.util.FileUtil) SystemConsumer(org.apache.samza.system.SystemConsumer) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) ExecutorService(java.util.concurrent.ExecutorService) StorageConfig(org.apache.samza.config.StorageConfig) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) ImmutableMap(com.google.common.collect.ImmutableMap) TaskConfig(org.apache.samza.config.TaskConfig) JobContext(org.apache.samza.context.JobContext) Partition(org.apache.samza.Partition) ContainerContext(org.apache.samza.context.ContainerContext) Set(java.util.Set) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Clock(org.apache.samza.util.Clock) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) ChangelogSSPIterator(org.apache.samza.system.ChangelogSSPIterator) SystemAdmin(org.apache.samza.system.SystemAdmin) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) KafkaStateCheckpointMarker(org.apache.samza.checkpoint.kafka.KafkaStateCheckpointMarker) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Config(org.apache.samza.config.Config) Collections(java.util.Collections) SystemAdmins(org.apache.samza.system.SystemAdmins) HashMap(java.util.HashMap) StorageConfig(org.apache.samza.config.StorageConfig) SystemStream(org.apache.samza.system.SystemStream) TaskConfig(org.apache.samza.config.TaskConfig) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) TaskMode(org.apache.samza.job.model.TaskMode) TaskName(org.apache.samza.container.TaskName) List(java.util.List) SystemAdmin(org.apache.samza.system.SystemAdmin) File(java.io.File) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 20 with Range

use of org.apache.commons.lang3.Range in project presto by prestodb.

the class ColumnCardinalityCache method getCardinalities.

/**
 * Gets the cardinality for each {@link AccumuloColumnConstraint}.
 * Given constraints are expected to be indexed! Who knows what would happen if they weren't!
 *
 * @param schema Schema name
 * @param table Table name
 * @param auths Scan authorizations
 * @param idxConstraintRangePairs Mapping of all ranges for a given constraint
 * @param earlyReturnThreshold Smallest acceptable cardinality to return early while other tasks complete
 * @param pollingDuration Duration for polling the cardinality completion service
 * @return An immutable multimap of cardinality to column constraint, sorted by cardinality from smallest to largest
 * @throws TableNotFoundException If the metrics table does not exist
 * @throws ExecutionException If another error occurs; I really don't even know anymore.
 */
public Multimap<Long, AccumuloColumnConstraint> getCardinalities(String schema, String table, Authorizations auths, Multimap<AccumuloColumnConstraint, Range> idxConstraintRangePairs, long earlyReturnThreshold, Duration pollingDuration) {
    // Submit tasks to the executor to fetch column cardinality, adding it to the Guava cache if necessary
    CompletionService<Pair<Long, AccumuloColumnConstraint>> executor = new ExecutorCompletionService<>(executorService);
    idxConstraintRangePairs.asMap().forEach((key, value) -> executor.submit(() -> {
        long cardinality = getColumnCardinality(schema, table, auths, key.getFamily(), key.getQualifier(), value);
        LOG.debug("Cardinality for column %s is %s", key.getName(), cardinality);
        return Pair.of(cardinality, key);
    }));
    // Create a multi map sorted by cardinality
    ListMultimap<Long, AccumuloColumnConstraint> cardinalityToConstraints = MultimapBuilder.treeKeys().arrayListValues().build();
    try {
        boolean earlyReturn = false;
        int numTasks = idxConstraintRangePairs.asMap().entrySet().size();
        do {
            // Sleep for the polling duration to allow concurrent tasks to run for this time
            Thread.sleep(pollingDuration.toMillis());
            // Poll each task, retrieving the result if it is done
            for (int i = 0; i < numTasks; ++i) {
                Future<Pair<Long, AccumuloColumnConstraint>> futureCardinality = executor.poll();
                if (futureCardinality != null && futureCardinality.isDone()) {
                    Pair<Long, AccumuloColumnConstraint> columnCardinality = futureCardinality.get();
                    cardinalityToConstraints.put(columnCardinality.getLeft(), columnCardinality.getRight());
                }
            }
            // If the smallest cardinality is present and below the threshold, set the earlyReturn flag
            Optional<Entry<Long, AccumuloColumnConstraint>> smallestCardinality = cardinalityToConstraints.entries().stream().findFirst();
            if (smallestCardinality.isPresent()) {
                if (smallestCardinality.get().getKey() <= earlyReturnThreshold) {
                    LOG.info("Cardinality %s, is below threshold. Returning early while other tasks finish", smallestCardinality);
                    earlyReturn = true;
                }
            }
        } while (!earlyReturn && cardinalityToConstraints.entries().size() < numTasks);
    } catch (ExecutionException | InterruptedException e) {
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
        }
        throw new PrestoException(UNEXPECTED_ACCUMULO_ERROR, "Exception when getting cardinality", e);
    }
    // Create a copy of the cardinalities
    return ImmutableMultimap.copyOf(cardinalityToConstraints);
}
Also used : AccumuloColumnConstraint(com.facebook.presto.accumulo.model.AccumuloColumnConstraint) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) PrestoException(com.facebook.presto.spi.PrestoException) AccumuloColumnConstraint(com.facebook.presto.accumulo.model.AccumuloColumnConstraint) Entry(java.util.Map.Entry) Long.parseLong(java.lang.Long.parseLong) ExecutionException(java.util.concurrent.ExecutionException) Pair(org.apache.commons.lang3.tuple.Pair)

Aggregations

List (java.util.List)30 HashMap (java.util.HashMap)24 Map (java.util.Map)24 ArrayList (java.util.ArrayList)23 Collectors (java.util.stream.Collectors)21 StringUtils (org.apache.commons.lang3.StringUtils)20 LoggerFactory (org.slf4j.LoggerFactory)17 Pair (org.apache.commons.lang3.tuple.Pair)16 Logger (org.slf4j.Logger)16 Set (java.util.Set)15 IOException (java.io.IOException)14 Optional (java.util.Optional)12 Range (org.apache.commons.lang3.Range)11 Test (org.junit.jupiter.api.Test)11 java.util (java.util)10 Date (java.util.Date)10 Lists (com.google.common.collect.Lists)9 HashSet (java.util.HashSet)9 ExecutorService (java.util.concurrent.ExecutorService)9 Collection (java.util.Collection)8