Search in sources :

Example 46 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class DimensionPredicateFilterBenchmark method setup.

@Setup
public void setup() throws IOException {
    final BitmapFactory bitmapFactory = new RoaringBitmapFactory();
    final BitmapSerdeFactory serdeFactory = new RoaringBitmapSerdeFactory(null);
    final List<Integer> ints = generateInts();
    final GenericIndexed<String> dictionary = GenericIndexed.fromIterable(FluentIterable.from(ints).transform(new Function<Integer, String>() {

        @Override
        public String apply(Integer i) {
            return i.toString();
        }
    }), GenericIndexed.STRING_STRATEGY);
    final BitmapIndex bitmapIndex = new BitmapIndexColumnPartSupplier(bitmapFactory, GenericIndexed.fromIterable(FluentIterable.from(ints).transform(new Function<Integer, ImmutableBitmap>() {

        @Override
        public ImmutableBitmap apply(Integer i) {
            final MutableBitmap mutableBitmap = bitmapFactory.makeEmptyMutableBitmap();
            mutableBitmap.add(i - START_INT);
            return bitmapFactory.makeImmutableBitmap(mutableBitmap);
        }
    }), serdeFactory.getObjectStrategy()), dictionary).get();
    selector = new BitmapIndexSelector() {

        @Override
        public Indexed<String> getDimensionValues(String dimension) {
            return dictionary;
        }

        @Override
        public int getNumRows() {
            throw new UnsupportedOperationException();
        }

        @Override
        public BitmapFactory getBitmapFactory() {
            return bitmapFactory;
        }

        @Override
        public ImmutableBitmap getBitmapIndex(String dimension, String value) {
            return bitmapIndex.getBitmap(bitmapIndex.getIndex(value));
        }

        @Override
        public BitmapIndex getBitmapIndex(String dimension) {
            return bitmapIndex;
        }

        @Override
        public ImmutableRTree getSpatialIndex(String dimension) {
            throw new UnsupportedOperationException();
        }
    };
}
Also used : ImmutableBitmap(io.druid.collections.bitmap.ImmutableBitmap) MutableBitmap(io.druid.collections.bitmap.MutableBitmap) BitmapIndex(io.druid.segment.column.BitmapIndex) Function(com.google.common.base.Function) ImmutableRTree(io.druid.collections.spatial.ImmutableRTree) RoaringBitmapSerdeFactory(io.druid.segment.data.RoaringBitmapSerdeFactory) BitmapIndexColumnPartSupplier(io.druid.segment.serde.BitmapIndexColumnPartSupplier) BitmapIndexSelector(io.druid.query.filter.BitmapIndexSelector) RoaringBitmapFactory(io.druid.collections.bitmap.RoaringBitmapFactory) BitmapFactory(io.druid.collections.bitmap.BitmapFactory) RoaringBitmapFactory(io.druid.collections.bitmap.RoaringBitmapFactory) Indexed(io.druid.segment.data.Indexed) GenericIndexed(io.druid.segment.data.GenericIndexed) RoaringBitmapSerdeFactory(io.druid.segment.data.RoaringBitmapSerdeFactory) BitmapSerdeFactory(io.druid.segment.data.BitmapSerdeFactory) Setup(org.openjdk.jmh.annotations.Setup)

Example 47 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class KafkaSupervisor method discoverTasks.

private void discoverTasks() throws ExecutionException, InterruptedException, TimeoutException {
    int taskCount = 0;
    List<String> futureTaskIds = Lists.newArrayList();
    List<ListenableFuture<Boolean>> futures = Lists.newArrayList();
    List<Task> tasks = taskStorage.getActiveTasks();
    for (Task task : tasks) {
        if (!(task instanceof KafkaIndexTask) || !dataSource.equals(task.getDataSource())) {
            continue;
        }
        taskCount++;
        final KafkaIndexTask kafkaTask = (KafkaIndexTask) task;
        final String taskId = task.getId();
        // Determine which task group this task belongs to based on one of the partitions handled by this task. If we
        // later determine that this task is actively reading, we will make sure that it matches our current partition
        // allocation (getTaskGroupIdForPartition(partition) should return the same value for every partition being read
        // by this task) and kill it if it is not compatible. If the task is instead found to be in the publishing
        // state, we will permit it to complete even if it doesn't match our current partition allocation to support
        // seamless schema migration.
        Iterator<Integer> it = kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet().iterator();
        final Integer taskGroupId = (it.hasNext() ? getTaskGroupIdForPartition(it.next()) : null);
        if (taskGroupId != null) {
            // check to see if we already know about this task, either in [taskGroups] or in [pendingCompletionTaskGroups]
            // and if not add it to taskGroups or pendingCompletionTaskGroups (if status = PUBLISHING)
            TaskGroup taskGroup = taskGroups.get(taskGroupId);
            if (!isTaskInPendingCompletionGroups(taskId) && (taskGroup == null || !taskGroup.tasks.containsKey(taskId))) {
                futureTaskIds.add(taskId);
                futures.add(Futures.transform(taskClient.getStatusAsync(taskId), new Function<KafkaIndexTask.Status, Boolean>() {

                    @Override
                    public Boolean apply(KafkaIndexTask.Status status) {
                        if (status == KafkaIndexTask.Status.PUBLISHING) {
                            addDiscoveredTaskToPendingCompletionTaskGroups(taskGroupId, taskId, kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap());
                            // update partitionGroups with the publishing task's offsets (if they are greater than what is
                            // existing) so that the next tasks will start reading from where this task left off
                            Map<Integer, Long> publishingTaskCurrentOffsets = taskClient.getCurrentOffsets(taskId, true);
                            for (Map.Entry<Integer, Long> entry : publishingTaskCurrentOffsets.entrySet()) {
                                Integer partition = entry.getKey();
                                Long offset = entry.getValue();
                                ConcurrentHashMap<Integer, Long> partitionOffsets = partitionGroups.get(getTaskGroupIdForPartition(partition));
                                boolean succeeded;
                                do {
                                    succeeded = true;
                                    Long previousOffset = partitionOffsets.putIfAbsent(partition, offset);
                                    if (previousOffset != null && previousOffset < offset) {
                                        succeeded = partitionOffsets.replace(partition, previousOffset, offset);
                                    }
                                } while (!succeeded);
                            }
                        } else {
                            for (Integer partition : kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap().keySet()) {
                                if (!taskGroupId.equals(getTaskGroupIdForPartition(partition))) {
                                    log.warn("Stopping task [%s] which does not match the expected partition allocation", taskId);
                                    try {
                                        stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
                                    } catch (InterruptedException | ExecutionException | TimeoutException e) {
                                        log.warn(e, "Exception while stopping task");
                                    }
                                    return false;
                                }
                            }
                            if (taskGroups.putIfAbsent(taskGroupId, new TaskGroup(ImmutableMap.copyOf(kafkaTask.getIOConfig().getStartPartitions().getPartitionOffsetMap()), kafkaTask.getIOConfig().getMinimumMessageTime())) == null) {
                                log.debug("Created new task group [%d]", taskGroupId);
                            }
                            if (!isTaskCurrent(taskGroupId, taskId)) {
                                log.info("Stopping task [%s] which does not match the expected parameters and ingestion spec", taskId);
                                try {
                                    stopTask(taskId, false).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
                                } catch (InterruptedException | ExecutionException | TimeoutException e) {
                                    log.warn(e, "Exception while stopping task");
                                }
                                return false;
                            } else {
                                taskGroups.get(taskGroupId).tasks.putIfAbsent(taskId, new TaskData());
                            }
                        }
                        return true;
                    }
                }, workerExec));
            }
        }
    }
    List<Boolean> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS);
    for (int i = 0; i < results.size(); i++) {
        if (results.get(i) == null) {
            String taskId = futureTaskIds.get(i);
            log.warn("Task [%s] failed to return status, killing task", taskId);
            killTask(taskId);
        }
    }
    log.debug("Found [%d] Kafka indexing tasks for dataSource [%s]", taskCount, dataSource);
}
Also used : Task(io.druid.indexing.common.task.Task) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) Function(com.google.common.base.Function) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) TaskStatus(io.druid.indexing.common.TaskStatus) KafkaIndexTask(io.druid.indexing.kafka.KafkaIndexTask) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 48 with Function

use of com.google.common.base.Function in project sharding-jdbc by dangdangdotcom.

the class ShardingRuleMockBuilder method build.

public ShardingRule build() {
    final DataSourceRule dataSourceRule = new DataSourceRule(ImmutableMap.of("db0", Mockito.mock(DataSource.class), "db1", Mockito.mock(DataSource.class)));
    Collection<TableRule> tableRules = Lists.newArrayList(Iterators.transform(autoIncrementColumnMap.keySet().iterator(), new Function<String, TableRule>() {

        @Override
        public TableRule apply(final String input) {
            TableRule.TableRuleBuilder builder = TableRule.builder(input).actualTables(Collections.singletonList(input)).dataSourceRule(dataSourceRule);
            for (String each : autoIncrementColumnMap.get(input)) {
                builder.autoIncrementColumns(each);
            }
            return builder.build();
        }
    }));
    if (tableRules.isEmpty()) {
        tableRules.add(new TableRule.TableRuleBuilder("mock").actualTables(Collections.singletonList("mock")).dataSourceRule(dataSourceRule).build());
    }
    return new ShardingRule.ShardingRuleBuilder().dataSourceRule(dataSourceRule).idGenerator(IncrementIdGenerator.class).tableRules(tableRules).databaseShardingStrategy(new DatabaseShardingStrategy(shardingColumns, new NoneDatabaseShardingAlgorithm())).build();
}
Also used : TableRule(com.dangdang.ddframe.rdb.sharding.api.rule.TableRule) Function(com.google.common.base.Function) DatabaseShardingStrategy(com.dangdang.ddframe.rdb.sharding.api.strategy.database.DatabaseShardingStrategy) DataSourceRule(com.dangdang.ddframe.rdb.sharding.api.rule.DataSourceRule) ShardingRule(com.dangdang.ddframe.rdb.sharding.api.rule.ShardingRule) NoneDatabaseShardingAlgorithm(com.dangdang.ddframe.rdb.sharding.api.strategy.database.NoneDatabaseShardingAlgorithm) IncrementIdGenerator(com.dangdang.ddframe.rdb.sharding.id.generator.fixture.IncrementIdGenerator)

Example 49 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class ScanQueryEngine method process.

public Sequence<ScanResultValue> process(final ScanQuery query, final Segment segment, final Map<String, Object> responseContext) {
    if (responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) != null) {
        int count = (int) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT);
        if (count >= query.getLimit()) {
            return Sequences.empty();
        }
    }
    final Long timeoutAt = (long) responseContext.get(ScanQueryRunnerFactory.CTX_TIMEOUT_AT);
    final long start = System.currentTimeMillis();
    final StorageAdapter adapter = segment.asStorageAdapter();
    if (adapter == null) {
        throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
    }
    List<String> allDims = Lists.newLinkedList(adapter.getAvailableDimensions());
    List<String> allMetrics = Lists.newLinkedList(adapter.getAvailableMetrics());
    final List<String> allColumns = Lists.newLinkedList();
    if (query.getColumns() != null && !query.getColumns().isEmpty()) {
        if (!query.getColumns().contains(ScanResultValue.timestampKey)) {
            allColumns.add(ScanResultValue.timestampKey);
        }
        allColumns.addAll(query.getColumns());
        allDims.retainAll(query.getColumns());
        allMetrics.retainAll(query.getColumns());
    } else {
        if (!allDims.contains(ScanResultValue.timestampKey)) {
            allColumns.add(ScanResultValue.timestampKey);
        }
        allColumns.addAll(allDims);
        allColumns.addAll(allMetrics);
    }
    final List<DimensionSpec> dims = DefaultDimensionSpec.toSpec(allDims);
    final List<String> metrics = allMetrics;
    final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
    Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
    final String segmentId = segment.getIdentifier();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter()));
    if (responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) == null) {
        responseContext.put(ScanQueryRunnerFactory.CTX_COUNT, 0);
    }
    final int limit = query.getLimit() - (int) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT);
    return Sequences.concat(Sequences.map(adapter.makeCursors(filter, intervals.get(0), VirtualColumns.EMPTY, Granularities.ALL, query.isDescending()), new Function<Cursor, Sequence<ScanResultValue>>() {

        @Override
        public Sequence<ScanResultValue> apply(final Cursor cursor) {
            return new BaseSequence<>(new BaseSequence.IteratorMaker<ScanResultValue, Iterator<ScanResultValue>>() {

                @Override
                public Iterator<ScanResultValue> make() {
                    final LongColumnSelector timestampColumnSelector = cursor.makeLongColumnSelector(Column.TIME_COLUMN_NAME);
                    final List<ColumnSelectorPlus<SelectQueryEngine.SelectColumnSelectorStrategy>> selectorPlusList = Arrays.asList(DimensionHandlerUtils.createColumnSelectorPluses(STRATEGY_FACTORY, Lists.newArrayList(dims), cursor));
                    final Map<String, ObjectColumnSelector> metSelectors = Maps.newHashMap();
                    for (String metric : metrics) {
                        final ObjectColumnSelector metricSelector = cursor.makeObjectColumnSelector(metric);
                        metSelectors.put(metric, metricSelector);
                    }
                    final int batchSize = query.getBatchSize();
                    return new Iterator<ScanResultValue>() {

                        private int offset = 0;

                        @Override
                        public boolean hasNext() {
                            return !cursor.isDone() && offset < limit;
                        }

                        @Override
                        public ScanResultValue next() {
                            if (System.currentTimeMillis() >= timeoutAt) {
                                throw new QueryInterruptedException(new TimeoutException());
                            }
                            int lastOffset = offset;
                            Object events = null;
                            String resultFormat = query.getResultFormat();
                            if (ScanQuery.RESULT_FORMAT_VALUE_VECTOR.equals(resultFormat)) {
                                throw new UnsupportedOperationException("valueVector is not supported now");
                            } else if (ScanQuery.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
                                events = rowsToCompactedList();
                            } else {
                                events = rowsToList();
                            }
                            responseContext.put(ScanQueryRunnerFactory.CTX_COUNT, (int) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) + (offset - lastOffset));
                            responseContext.put(ScanQueryRunnerFactory.CTX_TIMEOUT_AT, timeoutAt - (System.currentTimeMillis() - start));
                            return new ScanResultValue(segmentId, allColumns, events);
                        }

                        @Override
                        public void remove() {
                            throw new UnsupportedOperationException();
                        }

                        private Object rowsToCompactedList() {
                            return Lists.transform((List<Map<String, Object>>) rowsToList(), new Function<Map<String, Object>, Object>() {

                                @Override
                                public Object apply(Map<String, Object> input) {
                                    List eventValues = Lists.newArrayListWithExpectedSize(allColumns.size());
                                    for (String expectedColumn : allColumns) {
                                        eventValues.add(input.get(expectedColumn));
                                    }
                                    return eventValues;
                                }
                            });
                        }

                        private Object rowsToList() {
                            List<Map<String, Object>> events = Lists.newArrayListWithCapacity(batchSize);
                            for (int i = 0; !cursor.isDone() && i < batchSize && offset < limit; cursor.advance(), i++, offset++) {
                                final Map<String, Object> theEvent = SelectQueryEngine.singleEvent(ScanResultValue.timestampKey, timestampColumnSelector, selectorPlusList, metSelectors);
                                events.add(theEvent);
                            }
                            return events;
                        }

                        private Object rowsToValueVector() {
                            // only support list now, we can support ValueVector or Arrow in future
                            return rowsToList();
                        }
                    };
                }

                @Override
                public void cleanup(Iterator<ScanResultValue> iterFromMake) {
                }
            });
        }
    }));
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) StorageAdapter(io.druid.segment.StorageAdapter) Cursor(io.druid.segment.Cursor) Function(com.google.common.base.Function) Iterator(java.util.Iterator) ISE(io.druid.java.util.common.ISE) LongColumnSelector(io.druid.segment.LongColumnSelector) List(java.util.List) ObjectColumnSelector(io.druid.segment.ObjectColumnSelector) QueryInterruptedException(io.druid.query.QueryInterruptedException) TimeoutException(java.util.concurrent.TimeoutException) BaseSequence(io.druid.java.util.common.guava.BaseSequence) Filter(io.druid.query.filter.Filter) Map(java.util.Map) Interval(org.joda.time.Interval)

Example 50 with Function

use of com.google.common.base.Function in project druid by druid-io.

the class IncrementalIndexStorageAdapterTest method testCursoringAndIndexUpdationInterleaving.

@Test
public void testCursoringAndIndexUpdationInterleaving() throws Exception {
    final IncrementalIndex index = indexCreator.createIndex();
    final long timestamp = System.currentTimeMillis();
    for (int i = 0; i < 2; i++) {
        index.add(new MapBasedInputRow(timestamp, Lists.newArrayList("billy"), ImmutableMap.<String, Object>of("billy", "v1" + i)));
    }
    final StorageAdapter sa = new IncrementalIndexStorageAdapter(index);
    Sequence<Cursor> cursors = sa.makeCursors(null, new Interval(timestamp - 60_000, timestamp + 60_000), VirtualColumns.EMPTY, Granularities.ALL, false);
    Sequences.toList(Sequences.map(cursors, new Function<Cursor, Object>() {

        @Nullable
        @Override
        public Object apply(Cursor cursor) {
            DimensionSelector dimSelector = cursor.makeDimensionSelector(new DefaultDimensionSpec("billy", "billy"));
            int cardinality = dimSelector.getValueCardinality();
            //index gets more rows at this point, while other thread is iterating over the cursor
            try {
                for (int i = 0; i < 1; i++) {
                    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList("billy"), ImmutableMap.<String, Object>of("billy", "v2" + i)));
                }
            } catch (Exception ex) {
                throw new RuntimeException(ex);
            }
            // and then, cursoring continues in the other thread
            while (!cursor.isDone()) {
                IndexedInts row = dimSelector.getRow();
                for (int i : row) {
                    Assert.assertTrue(i < cardinality);
                }
                cursor.advance();
            }
            return null;
        }
    }), new ArrayList<>());
}
Also used : DimensionSelector(io.druid.segment.DimensionSelector) StorageAdapter(io.druid.segment.StorageAdapter) Cursor(io.druid.segment.Cursor) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) IOException(java.io.IOException) Function(com.google.common.base.Function) IndexedInts(io.druid.segment.data.IndexedInts) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Interval(org.joda.time.Interval) Test(org.junit.Test)

Aggregations

Function (com.google.common.base.Function)616 List (java.util.List)138 ArrayList (java.util.ArrayList)114 Nullable (javax.annotation.Nullable)103 Map (java.util.Map)97 IOException (java.io.IOException)89 HashMap (java.util.HashMap)78 Test (org.junit.Test)73 ImmutableList (com.google.common.collect.ImmutableList)49 File (java.io.File)46 Set (java.util.Set)46 Collection (java.util.Collection)35 ImmutableMap (com.google.common.collect.ImmutableMap)34 DateTime (org.joda.time.DateTime)33 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)29 HashSet (java.util.HashSet)27 Iterator (java.util.Iterator)27 LinkedList (java.util.LinkedList)26 ExecutionException (java.util.concurrent.ExecutionException)24 Collectors (java.util.stream.Collectors)15