Search in sources :

Example 11 with ShardSpec

use of io.druid.timeline.partition.ShardSpec in project druid by druid-io.

the class IndexGeneratorJobTest method loadShardSpecs.

private Map<Long, List<HadoopyShardSpec>> loadShardSpecs(String partitionType, Object[][][] shardInfoForEachShard) {
    Map<Long, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
    int shardCount = 0;
    int segmentNum = 0;
    for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
        List<ShardSpec> specs = constructShardSpecFromShardInfo(partitionType, shardInfoForEachShard[segmentNum++]);
        List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
        for (int i = 0; i < specs.size(); ++i) {
            actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
        }
        shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs);
    }
    return shardSpecs;
}
Also used : List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) SingleDimensionShardSpec(io.druid.timeline.partition.SingleDimensionShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) Interval(org.joda.time.Interval)

Example 12 with ShardSpec

use of io.druid.timeline.partition.ShardSpec in project hive by apache.

the class DruidStorageHandlerUtils method publishSegmentsAndCommit.

/**
 * First computes the segments timeline to accommodate new segments for insert into case
 * Then moves segments to druid deep storage with updated metadata/version
 * ALL IS DONE IN ONE TRANSACTION
 *
 * @param connector DBI connector to commit
 * @param metadataStorageTablesConfig Druid metadata tables definitions
 * @param dataSource Druid datasource name
 * @param segments List of segments to move and commit to metadata
 * @param overwrite if it is an insert overwrite
 * @param conf Configuration
 * @param dataSegmentPusher segment pusher
 *
 * @return List of successfully published Druid segments.
 * This list has the updated versions and metadata about segments after move and timeline sorting
 *
 * @throws CallbackFailedException
 */
public static List<DataSegment> publishSegmentsAndCommit(final SQLMetadataConnector connector, final MetadataStorageTablesConfig metadataStorageTablesConfig, final String dataSource, final List<DataSegment> segments, boolean overwrite, Configuration conf, DataSegmentPusher dataSegmentPusher) throws CallbackFailedException {
    return connector.getDBI().inTransaction((handle, transactionStatus) -> {
        // We create the timeline for the existing and new segments
        VersionedIntervalTimeline<String, DataSegment> timeline;
        if (overwrite) {
            // If we are overwriting, we disable existing sources
            disableDataSourceWithHandle(handle, metadataStorageTablesConfig, dataSource);
            // When overwriting, we just start with empty timeline,
            // as we are overwriting segments with new versions
            timeline = new VersionedIntervalTimeline<>(Ordering.natural());
        } else {
            // Append Mode
            if (segments.isEmpty()) {
                // If there are no new segments, we can just bail out
                return Collections.EMPTY_LIST;
            }
            // Otherwise, build a timeline of existing segments in metadata storage
            Interval indexedInterval = JodaUtils.umbrellaInterval(Iterables.transform(segments, input -> input.getInterval()));
            LOG.info("Building timeline for umbrella Interval [{}]", indexedInterval);
            timeline = getTimelineForIntervalWithHandle(handle, dataSource, indexedInterval, metadataStorageTablesConfig);
        }
        final List<DataSegment> finalSegmentsToPublish = Lists.newArrayList();
        for (DataSegment segment : segments) {
            List<TimelineObjectHolder<String, DataSegment>> existingChunks = timeline.lookup(segment.getInterval());
            if (existingChunks.size() > 1) {
                // Druid shard specs does not support multiple partitions for same interval with different granularity.
                throw new IllegalStateException(String.format("Cannot allocate new segment for dataSource[%s], interval[%s], already have [%,d] chunks. Not possible to append new segment.", dataSource, segment.getInterval(), existingChunks.size()));
            }
            // Find out the segment with latest version and maximum partition number
            SegmentIdentifier max = null;
            final ShardSpec newShardSpec;
            final String newVersion;
            if (!existingChunks.isEmpty()) {
                // Some existing chunk, Find max
                TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks);
                for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) {
                    if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject().getShardSpec().getPartitionNum()) {
                        max = SegmentIdentifier.fromDataSegment(existing.getObject());
                    }
                }
            }
            if (max == null) {
                // No existing shard present in the database, use the current version.
                newShardSpec = segment.getShardSpec();
                newVersion = segment.getVersion();
            } else {
                // use version of existing max segment to generate new shard spec
                newShardSpec = getNextPartitionShardSpec(max.getShardSpec());
                newVersion = max.getVersion();
            }
            DataSegment publishedSegment = publishSegmentWithShardSpec(segment, newShardSpec, newVersion, getPath(segment).getFileSystem(conf), dataSegmentPusher);
            finalSegmentsToPublish.add(publishedSegment);
            timeline.add(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().createChunk(publishedSegment));
        }
        // Publish new segments to metadata storage
        final PreparedBatch batch = handle.prepareBatch(String.format("INSERT INTO %1$s (id, dataSource, created_date, start, \"end\", partitioned, version, used, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :partitioned, :version, :used, :payload)", metadataStorageTablesConfig.getSegmentsTable()));
        for (final DataSegment segment : finalSegmentsToPublish) {
            batch.add(new ImmutableMap.Builder<String, Object>().put("id", segment.getIdentifier()).put("dataSource", segment.getDataSource()).put("created_date", new DateTime().toString()).put("start", segment.getInterval().getStart().toString()).put("end", segment.getInterval().getEnd().toString()).put("partitioned", (segment.getShardSpec() instanceof NoneShardSpec) ? false : true).put("version", segment.getVersion()).put("used", true).put("payload", JSON_MAPPER.writeValueAsBytes(segment)).build());
            LOG.info("Published {}", segment.getIdentifier());
        }
        batch.execute();
        return finalSegmentsToPublish;
    });
}
Also used : SQLMetadataConnector(io.druid.metadata.SQLMetadataConnector) FoldController(org.skife.jdbi.v2.FoldController) Request(com.metamx.http.client.Request) FileSystem(org.apache.hadoop.fs.FileSystem) URL(java.net.URL) HttpMethod(org.jboss.netty.handler.codec.http.HttpMethod) LoggerFactory(org.slf4j.LoggerFactory) RetryPolicies(org.apache.hadoop.io.retry.RetryPolicies) FileStatus(org.apache.hadoop.fs.FileStatus) StatementContext(org.skife.jdbi.v2.StatementContext) InetAddress(java.net.InetAddress) SelectQueryConfig(io.druid.query.select.SelectQueryConfig) InputStreamResponseHandler(com.metamx.http.client.response.InputStreamResponseHandler) IndexIO(io.druid.segment.IndexIO) CharStreams(com.google.common.io.CharStreams) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) Path(org.apache.hadoop.fs.Path) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) DataSegmentPusher(io.druid.segment.loading.DataSegmentPusher) TimestampFloorExprMacro(io.druid.query.expression.TimestampFloorExprMacro) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) ByteArrayMapper(org.skife.jdbi.v2.util.ByteArrayMapper) DataSegment(io.druid.timeline.DataSegment) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) MapUtils(com.metamx.common.MapUtils) Collection(java.util.Collection) Set(java.util.Set) Interner(com.google.common.collect.Interner) Reader(java.io.Reader) MetadataStorageTablesConfig(io.druid.metadata.MetadataStorageTablesConfig) FileNotFoundException(java.io.FileNotFoundException) TimestampParseExprMacro(io.druid.query.expression.TimestampParseExprMacro) List(java.util.List) PartitionChunk(io.druid.timeline.partition.PartitionChunk) ISOChronology(org.joda.time.chrono.ISOChronology) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) TrimExprMacro(io.druid.query.expression.TrimExprMacro) HttpClient(com.metamx.http.client.HttpClient) Iterables(com.google.common.collect.Iterables) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) TimestampFormatExprMacro(io.druid.query.expression.TimestampFormatExprMacro) SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) TimestampExtractExprMacro(io.druid.query.expression.TimestampExtractExprMacro) HdfsDataSegmentPusher(io.druid.storage.hdfs.HdfsDataSegmentPusher) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) RegexpExtractExprMacro(io.druid.query.expression.RegexpExtractExprMacro) LikeExprMacro(io.druid.query.expression.LikeExprMacro) TimestampCeilExprMacro(io.druid.query.expression.TimestampCeilExprMacro) ShardSpec(io.druid.timeline.partition.ShardSpec) ArrayList(java.util.ArrayList) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) HashSet(java.util.HashSet) IndexMergerV9(io.druid.segment.IndexMergerV9) Interval(org.joda.time.Interval) SQLException(java.sql.SQLException) Lists(com.google.common.collect.Lists) JodaUtils(com.metamx.common.JodaUtils) ImmutableList(com.google.common.collect.ImmutableList) StringUtils(org.apache.hadoop.util.StringUtils) ResultIterator(org.skife.jdbi.v2.ResultIterator) TimestampShiftExprMacro(io.druid.query.expression.TimestampShiftExprMacro) OutputStream(java.io.OutputStream) HttpHeaders(org.jboss.netty.handler.codec.http.HttpHeaders) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) Logger(org.slf4j.Logger) Folder3(org.skife.jdbi.v2.Folder3) HandleCallback(org.skife.jdbi.v2.tweak.HandleCallback) EmittingLogger(com.metamx.emitter.EmittingLogger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) Throwables(com.google.common.base.Throwables) Interners(com.google.common.collect.Interners) Query(org.skife.jdbi.v2.Query) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) UnknownHostException(java.net.UnknownHostException) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) HdfsDataSegmentPusherConfig(io.druid.storage.hdfs.HdfsDataSegmentPusherConfig) Handle(org.skife.jdbi.v2.Handle) Ordering(com.google.common.collect.Ordering) ExprMacroTable(io.druid.math.expr.ExprMacroTable) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) HiveDruidSerializationModule(org.apache.hadoop.hive.druid.serde.HiveDruidSerializationModule) RetryProxy(org.apache.hadoop.io.retry.RetryProxy) NoopEmitter(com.metamx.emitter.core.NoopEmitter) ServiceEmitter(com.metamx.emitter.service.ServiceEmitter) Collections(java.util.Collections) MySQLConnector(io.druid.metadata.storage.mysql.MySQLConnector) InputStream(java.io.InputStream) SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) DataSegment(io.druid.timeline.DataSegment) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) ImmutableMap(com.google.common.collect.ImmutableMap) DateTime(org.joda.time.DateTime) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) Interval(org.joda.time.Interval)

Example 13 with ShardSpec

use of io.druid.timeline.partition.ShardSpec in project druid by druid-io.

the class HadoopDruidIndexerConfig method getBucket.

/********************************************
   Granularity/Bucket Helper Methods
   ********************************************/
/**
   * Get the proper bucket for some input row.
   *
   * @param inputRow an InputRow
   *
   * @return the Bucket that this row belongs to
   */
public Optional<Bucket> getBucket(InputRow inputRow) {
    final Optional<Interval> timeBucket = schema.getDataSchema().getGranularitySpec().bucketInterval(new DateTime(inputRow.getTimestampFromEpoch()));
    if (!timeBucket.isPresent()) {
        return Optional.absent();
    }
    final DateTime bucketStart = timeBucket.get().getStart();
    final ShardSpec actualSpec = shardSpecLookups.get(bucketStart.getMillis()).getShardSpec(rollupGran.bucketStart(inputRow.getTimestamp()).getMillis(), inputRow);
    final HadoopyShardSpec hadoopyShardSpec = hadoopShardSpecLookup.get(bucketStart.getMillis()).get(actualSpec);
    return Optional.of(new Bucket(hadoopyShardSpec.getShardNum(), bucketStart, actualSpec.getPartitionNum()));
}
Also used : DateTime(org.joda.time.DateTime) ShardSpec(io.druid.timeline.partition.ShardSpec) Interval(org.joda.time.Interval)

Example 14 with ShardSpec

use of io.druid.timeline.partition.ShardSpec in project druid by druid-io.

the class OrcIndexGeneratorJobTest method loadShardSpecs.

private Map<Long, List<HadoopyShardSpec>> loadShardSpecs(Integer[][][] shardInfoForEachShard) {
    Map<Long, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
    int shardCount = 0;
    int segmentNum = 0;
    for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
        List<ShardSpec> specs = Lists.newArrayList();
        for (Integer[] shardInfo : shardInfoForEachShard[segmentNum++]) {
            specs.add(new HashBasedNumberedShardSpec(shardInfo[0], shardInfo[1], null, HadoopDruidIndexerConfig.JSON_MAPPER));
        }
        List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
        for (ShardSpec spec : specs) {
            actualSpecs.add(new HadoopyShardSpec(spec, shardCount++));
        }
        shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs);
    }
    return shardSpecs;
}
Also used : HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HadoopyShardSpec(io.druid.indexer.HadoopyShardSpec) HadoopyShardSpec(io.druid.indexer.HadoopyShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) Interval(org.joda.time.Interval)

Example 15 with ShardSpec

use of io.druid.timeline.partition.ShardSpec in project druid by druid-io.

the class DimFilterUtilsTest method shardSpec.

private static ShardSpec shardSpec(String dimension, Range<String> range) {
    ShardSpec shard = EasyMock.createMock(ShardSpec.class);
    EasyMock.expect(shard.getDomain()).andReturn(ImmutableMap.of(dimension, range)).anyTimes();
    return shard;
}
Also used : ShardSpec(io.druid.timeline.partition.ShardSpec)

Aggregations

ShardSpec (io.druid.timeline.partition.ShardSpec)18 Interval (org.joda.time.Interval)10 NumberedShardSpec (io.druid.timeline.partition.NumberedShardSpec)8 NoneShardSpec (io.druid.timeline.partition.NoneShardSpec)7 List (java.util.List)6 HashBasedNumberedShardSpec (io.druid.timeline.partition.HashBasedNumberedShardSpec)5 IOException (java.io.IOException)5 Map (java.util.Map)5 Optional (com.google.common.base.Optional)4 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 DataSegment (io.druid.timeline.DataSegment)4 PartitionChunk (io.druid.timeline.partition.PartitionChunk)4 DateTime (org.joda.time.DateTime)4 Test (org.junit.Test)4 SegmentIdentifier (io.druid.segment.realtime.appenderator.SegmentIdentifier)3 TimelineObjectHolder (io.druid.timeline.TimelineObjectHolder)3 SingleDimensionShardSpec (io.druid.timeline.partition.SingleDimensionShardSpec)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 Function (com.google.common.base.Function)2