Search in sources :

Example 1 with VersionedIntervalTimeline

use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.

the class HadoopIngestionSpec method updateSegmentListIfDatasourcePathSpecIsUsed.

public static HadoopIngestionSpec updateSegmentListIfDatasourcePathSpecIsUsed(HadoopIngestionSpec spec, ObjectMapper jsonMapper, UsedSegmentLister segmentLister) throws IOException {
    String dataSource = "dataSource";
    String type = "type";
    String multi = "multi";
    String children = "children";
    String segments = "segments";
    String ingestionSpec = "ingestionSpec";
    Map<String, Object> pathSpec = spec.getIOConfig().getPathSpec();
    Map<String, Object> datasourcePathSpec = null;
    if (pathSpec.get(type).equals(dataSource)) {
        datasourcePathSpec = pathSpec;
    } else if (pathSpec.get(type).equals(multi)) {
        List<Map<String, Object>> childPathSpecs = (List<Map<String, Object>>) pathSpec.get(children);
        for (Map<String, Object> childPathSpec : childPathSpecs) {
            if (childPathSpec.get(type).equals(dataSource)) {
                datasourcePathSpec = childPathSpec;
                break;
            }
        }
    }
    if (datasourcePathSpec != null) {
        Map<String, Object> ingestionSpecMap = (Map<String, Object>) datasourcePathSpec.get(ingestionSpec);
        DatasourceIngestionSpec ingestionSpecObj = jsonMapper.convertValue(ingestionSpecMap, DatasourceIngestionSpec.class);
        List<DataSegment> segmentsList = segmentLister.getUsedSegmentsForIntervals(ingestionSpecObj.getDataSource(), ingestionSpecObj.getIntervals());
        if (ingestionSpecObj.getSegments() != null) {
            //ensure that user supplied segment list matches with the segmentsList obtained from db
            //this safety check lets users do test-n-set kind of batch delta ingestion where the delta
            //ingestion task would only run if current state of the system is same as when they submitted
            //the task.
            List<DataSegment> userSuppliedSegmentsList = ingestionSpecObj.getSegments();
            if (segmentsList.size() == userSuppliedSegmentsList.size()) {
                Set<DataSegment> segmentsSet = new HashSet<>(segmentsList);
                for (DataSegment userSegment : userSuppliedSegmentsList) {
                    if (!segmentsSet.contains(userSegment)) {
                        throw new IOException("user supplied segments list did not match with segments list obtained from db");
                    }
                }
            } else {
                throw new IOException("user supplied segments list did not match with segments list obtained from db");
            }
        }
        VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.natural());
        for (DataSegment segment : segmentsList) {
            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
        }
        final List<WindowedDataSegment> windowedSegments = Lists.newArrayList();
        for (Interval interval : ingestionSpecObj.getIntervals()) {
            final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
            for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
                for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
                    windowedSegments.add(new WindowedDataSegment(chunk.getObject(), holder.getInterval()));
                }
            }
            datasourcePathSpec.put(segments, windowedSegments);
        }
    }
    return spec;
}
Also used : DatasourceIngestionSpec(io.druid.indexer.hadoop.DatasourceIngestionSpec) IOException(java.io.IOException) DataSegment(io.druid.timeline.DataSegment) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) List(java.util.List) Map(java.util.Map) HashSet(java.util.HashSet) Interval(org.joda.time.Interval)

Example 2 with VersionedIntervalTimeline

use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.

the class IngestSegmentFirehoseFactory method connect.

@Override
public Firehose connect(InputRowParser inputRowParser) throws IOException, ParseException {
    log.info("Connecting firehose: dataSource[%s], interval[%s]", dataSource, interval);
    if (taskToolbox == null) {
        // Noop Task is just used to create the toolbox and list segments.
        taskToolbox = injector.getInstance(TaskToolboxFactory.class).build(new NoopTask("reingest", 0, 0, null, null, null));
    }
    try {
        final List<DataSegment> usedSegments = taskToolbox.getTaskActionClient().submit(new SegmentListUsedAction(dataSource, interval, null));
        final Map<DataSegment, File> segmentFileMap = taskToolbox.fetchSegments(usedSegments);
        VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(Ordering.<String>natural().nullsFirst());
        for (DataSegment segment : usedSegments) {
            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
        }
        final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline.lookup(interval);
        final List<String> dims;
        if (dimensions != null) {
            dims = dimensions;
        } else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
            dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames();
        } else {
            Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {

                @Override
                public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> timelineObjectHolder) {
                    return Iterables.concat(Iterables.transform(timelineObjectHolder.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {

                        @Override
                        public Iterable<String> apply(PartitionChunk<DataSegment> input) {
                            return input.getObject().getDimensions();
                        }
                    }));
                }
            })));
            dims = Lists.newArrayList(Sets.difference(dimSet, inputRowParser.getParseSpec().getDimensionsSpec().getDimensionExclusions()));
        }
        final List<String> metricsList;
        if (metrics != null) {
            metricsList = metrics;
        } else {
            Set<String> metricsSet = Sets.newHashSet(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<String>>() {

                @Override
                public Iterable<String> apply(TimelineObjectHolder<String, DataSegment> input) {
                    return Iterables.concat(Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, Iterable<String>>() {

                        @Override
                        public Iterable<String> apply(PartitionChunk<DataSegment> input) {
                            return input.getObject().getMetrics();
                        }
                    }));
                }
            })));
            metricsList = Lists.newArrayList(metricsSet);
        }
        final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {

            @Override
            public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
                return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {

                    @Override
                    public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
                        final DataSegment segment = input.getObject();
                        try {
                            return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getIdentifier()))), holder.getInterval());
                        } catch (IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                });
            }
        })));
        return new IngestSegmentFirehose(adapters, dims, metricsList, dimFilter, Granularities.NONE);
    } catch (IOException e) {
        throw Throwables.propagate(e);
    } catch (SegmentLoadingException e) {
        throw Throwables.propagate(e);
    }
}
Also used : IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) Set(java.util.Set) NoopTask(io.druid.indexing.common.task.NoopTask) DataSegment(io.druid.timeline.DataSegment) Function(com.google.common.base.Function) PartitionChunk(io.druid.timeline.partition.PartitionChunk) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) IOException(java.io.IOException) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) SegmentListUsedAction(io.druid.indexing.common.actions.SegmentListUsedAction) File(java.io.File)

Example 3 with VersionedIntervalTimeline

use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.

the class SQLMetadataSegmentManager method enableDatasource.

@Override
public boolean enableDatasource(final String ds) {
    try {
        final IDBI dbi = connector.getDBI();
        VersionedIntervalTimeline<String, DataSegment> segmentTimeline = connector.inReadOnlyTransaction(new TransactionCallback<VersionedIntervalTimeline<String, DataSegment>>() {

            @Override
            public VersionedIntervalTimeline<String, DataSegment> inTransaction(Handle handle, TransactionStatus status) throws Exception {
                return handle.createQuery(String.format("SELECT payload FROM %s WHERE dataSource = :dataSource", getSegmentsTable())).setFetchSize(connector.getStreamingFetchSize()).bind("dataSource", ds).map(ByteArrayMapper.FIRST).fold(new VersionedIntervalTimeline<String, DataSegment>(Ordering.natural()), new Folder3<VersionedIntervalTimeline<String, DataSegment>, byte[]>() {

                    @Override
                    public VersionedIntervalTimeline<String, DataSegment> fold(VersionedIntervalTimeline<String, DataSegment> timeline, byte[] payload, FoldController foldController, StatementContext statementContext) throws SQLException {
                        try {
                            final DataSegment segment = DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(payload, DataSegment.class));
                            timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
                            return timeline;
                        } catch (Exception e) {
                            throw new SQLException(e.toString());
                        }
                    }
                });
            }
        });
        final List<DataSegment> segments = Lists.newArrayList();
        for (TimelineObjectHolder<String, DataSegment> objectHolder : segmentTimeline.lookup(new Interval("0000-01-01/3000-01-01"))) {
            for (PartitionChunk<DataSegment> partitionChunk : objectHolder.getObject()) {
                segments.add(partitionChunk.getObject());
            }
        }
        if (segments.isEmpty()) {
            log.warn("No segments found in the database!");
            return false;
        }
        dbi.withHandle(new HandleCallback<Void>() {

            @Override
            public Void withHandle(Handle handle) throws Exception {
                Batch batch = handle.createBatch();
                for (DataSegment segment : segments) {
                    batch.add(String.format("UPDATE %s SET used=true WHERE id = '%s'", getSegmentsTable(), segment.getIdentifier()));
                }
                batch.execute();
                return null;
            }
        });
    } catch (Exception e) {
        log.error(e, "Exception enabling datasource %s", ds);
        return false;
    }
    return true;
}
Also used : IDBI(org.skife.jdbi.v2.IDBI) SQLException(java.sql.SQLException) TransactionStatus(org.skife.jdbi.v2.TransactionStatus) DataSegment(io.druid.timeline.DataSegment) SQLException(java.sql.SQLException) IOException(java.io.IOException) Handle(org.skife.jdbi.v2.Handle) StatementContext(org.skife.jdbi.v2.StatementContext) FoldController(org.skife.jdbi.v2.FoldController) Batch(org.skife.jdbi.v2.Batch) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) Folder3(org.skife.jdbi.v2.Folder3) Interval(org.joda.time.Interval)

Example 4 with VersionedIntervalTimeline

use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.

the class DruidCoordinatorRuleRunner method run.

@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
    replicatorThrottler.updateParams(coordinator.getDynamicConfigs().getReplicationThrottleLimit(), coordinator.getDynamicConfigs().getReplicantLifetime());
    CoordinatorStats stats = new CoordinatorStats();
    DruidCluster cluster = params.getDruidCluster();
    if (cluster.isEmpty()) {
        log.warn("Uh... I have no servers. Not assigning anything...");
        return params;
    }
    // find available segments which are not overshadowed by other segments in DB
    // only those would need to be loaded/dropped
    // anything overshadowed by served segments is dropped automatically by DruidCoordinatorCleanupOvershadowed
    Map<String, VersionedIntervalTimeline<String, DataSegment>> timelines = new HashMap<>();
    for (DataSegment segment : params.getAvailableSegments()) {
        VersionedIntervalTimeline<String, DataSegment> timeline = timelines.get(segment.getDataSource());
        if (timeline == null) {
            timeline = new VersionedIntervalTimeline<>(Comparators.comparable());
            timelines.put(segment.getDataSource(), timeline);
        }
        timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
    }
    Set<DataSegment> overshadowed = new HashSet<>();
    for (VersionedIntervalTimeline<String, DataSegment> timeline : timelines.values()) {
        for (TimelineObjectHolder<String, DataSegment> holder : timeline.findOvershadowed()) {
            for (DataSegment dataSegment : holder.getObject().payloads()) {
                overshadowed.add(dataSegment);
            }
        }
    }
    Set<DataSegment> nonOvershadowed = new HashSet<>();
    for (DataSegment dataSegment : params.getAvailableSegments()) {
        if (!overshadowed.contains(dataSegment)) {
            nonOvershadowed.add(dataSegment);
        }
    }
    for (String tier : cluster.getTierNames()) {
        replicatorThrottler.updateReplicationState(tier);
    }
    DruidCoordinatorRuntimeParams paramsWithReplicationManager = params.buildFromExistingWithoutAvailableSegments().withReplicationManager(replicatorThrottler).withAvailableSegments(nonOvershadowed).build();
    // Run through all matched rules for available segments
    DateTime now = new DateTime();
    MetadataRuleManager databaseRuleManager = paramsWithReplicationManager.getDatabaseRuleManager();
    final List<String> segmentsWithMissingRules = Lists.newArrayListWithCapacity(MAX_MISSING_RULES);
    int missingRules = 0;
    for (DataSegment segment : paramsWithReplicationManager.getAvailableSegments()) {
        List<Rule> rules = databaseRuleManager.getRulesWithDefault(segment.getDataSource());
        boolean foundMatchingRule = false;
        for (Rule rule : rules) {
            if (rule.appliesTo(segment, now)) {
                stats.accumulate(rule.run(coordinator, paramsWithReplicationManager, segment));
                foundMatchingRule = true;
                break;
            }
        }
        if (!foundMatchingRule) {
            if (segmentsWithMissingRules.size() < MAX_MISSING_RULES) {
                segmentsWithMissingRules.add(segment.getIdentifier());
            }
            missingRules++;
        }
    }
    if (!segmentsWithMissingRules.isEmpty()) {
        log.makeAlert("Unable to find matching rules!").addData("segmentsWithMissingRulesCount", missingRules).addData("segmentsWithMissingRules", segmentsWithMissingRules).emit();
    }
    return paramsWithReplicationManager.buildFromExistingWithoutAvailableSegments().withCoordinatorStats(stats).withAvailableSegments(params.getAvailableSegments()).build();
}
Also used : DruidCoordinatorRuntimeParams(io.druid.server.coordinator.DruidCoordinatorRuntimeParams) CoordinatorStats(io.druid.server.coordinator.CoordinatorStats) MetadataRuleManager(io.druid.metadata.MetadataRuleManager) HashMap(java.util.HashMap) DruidCluster(io.druid.server.coordinator.DruidCluster) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) Rule(io.druid.server.coordinator.rules.Rule) HashSet(java.util.HashSet)

Example 5 with VersionedIntervalTimeline

use of io.druid.timeline.VersionedIntervalTimeline in project druid by druid-io.

the class SchemalessIndexTest method makeAppendedMMappedIndex.

private static QueryableIndex makeAppendedMMappedIndex(Iterable<Pair<String, AggregatorFactory[]>> files, final List<Interval> intervals) {
    try {
        File tmpFile = File.createTempFile("yay", "boo");
        tmpFile.delete();
        File mergedFile = new File(tmpFile, "merged");
        mergedFile.mkdirs();
        mergedFile.deleteOnExit();
        List<File> filesToMap = makeFilesToMap(tmpFile, files);
        VersionedIntervalTimeline<Integer, File> timeline = new VersionedIntervalTimeline<Integer, File>(Ordering.natural().nullsFirst());
        ShardSpec noneShardSpec = NoneShardSpec.instance();
        for (int i = 0; i < intervals.size(); i++) {
            timeline.add(intervals.get(i), i, noneShardSpec.createChunk(filesToMap.get(i)));
        }
        final List<IndexableAdapter> adapters = Lists.newArrayList(Iterables.concat(// TimelineObjectHolder is actually an iterable of iterable of indexable adapters
        Iterables.transform(timeline.lookup(new Interval("1000-01-01/3000-01-01")), new Function<TimelineObjectHolder<Integer, File>, Iterable<IndexableAdapter>>() {

            @Override
            public Iterable<IndexableAdapter> apply(final TimelineObjectHolder<Integer, File> timelineObjectHolder) {
                return Iterables.transform(timelineObjectHolder.getObject(), // Each chunk can be used to build the actual IndexableAdapter
                new Function<PartitionChunk<File>, IndexableAdapter>() {

                    @Override
                    public IndexableAdapter apply(PartitionChunk<File> chunk) {
                        try {
                            return new RowboatFilteringIndexAdapter(new QueryableIndexIndexableAdapter(INDEX_IO.loadIndex(chunk.getObject())), new Predicate<Rowboat>() {

                                @Override
                                public boolean apply(Rowboat input) {
                                    return timelineObjectHolder.getInterval().contains(input.getTimestamp());
                                }
                            });
                        } catch (IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                });
            }
        })));
        return INDEX_IO.loadIndex(INDEX_MERGER.append(adapters, null, mergedFile, indexSpec));
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}
Also used : IOException(java.io.IOException) ShardSpec(io.druid.timeline.partition.ShardSpec) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) Function(com.google.common.base.Function) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) PartitionChunk(io.druid.timeline.partition.PartitionChunk) File(java.io.File) Interval(org.joda.time.Interval)

Aggregations

VersionedIntervalTimeline (io.druid.timeline.VersionedIntervalTimeline)13 DataSegment (io.druid.timeline.DataSegment)10 Interval (org.joda.time.Interval)9 TimelineObjectHolder (io.druid.timeline.TimelineObjectHolder)6 IOException (java.io.IOException)4 Function (com.google.common.base.Function)3 CoordinatorStats (io.druid.server.coordinator.CoordinatorStats)3 PartitionChunk (io.druid.timeline.partition.PartitionChunk)3 HashSet (java.util.HashSet)3 Map (java.util.Map)3 CharSource (com.google.common.io.CharSource)2 IncrementalIndexSegment (io.druid.segment.IncrementalIndexSegment)2 Segment (io.druid.segment.Segment)2 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)2 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)2 DruidCluster (io.druid.server.coordinator.DruidCluster)2 SingleElementPartitionChunk (io.druid.timeline.partition.SingleElementPartitionChunk)2 File (java.io.File)2 DateTime (org.joda.time.DateTime)2 Predicate (com.google.common.base.Predicate)1