Search in sources :

Example 56 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class CompactionTaskRunTest method testRunRegularIndexTaskWithIngestSegmentFirehose.

/**
 * Run a regular index task that's equivalent to the compaction task in {@link #testRunWithDynamicPartitioning()},
 * using {@link IngestSegmentFirehoseFactory}.
 *
 * This is not entirely CompactionTask related, but it's similar conceptually and it requires
 * similar setup to what this test suite already has.
 *
 * It could be moved to a separate test class if needed.
 */
@Test
public void testRunRegularIndexTaskWithIngestSegmentFirehose() throws Exception {
    runIndexTask();
    IndexTask indexTask = new IndexTask(null, null, new IndexTask.IndexIngestionSpec(new DataSchema("test", getObjectMapper().convertValue(new StringInputRowParser(DEFAULT_PARSE_SPEC, null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null, getObjectMapper()), new IndexTask.IndexIOConfig(new IngestSegmentFirehoseFactory(DATA_SOURCE, Intervals.of("2014-01-01/2014-01-02"), null, null, null, null, null, getIndexIO(), coordinatorClient, segmentCacheManagerFactory, RETRY_POLICY_FACTORY), false, false), IndexTaskTest.createTuningConfig(5000000, null, null, Long.MAX_VALUE, null, false, true)), null);
    // This is a regular index so we need to explicitly add this context to store the CompactionState
    indexTask.addToContext(Tasks.STORE_COMPACTION_STATE_KEY, true);
    final Pair<TaskStatus, List<DataSegment>> resultPair = runTask(indexTask);
    Assert.assertTrue(resultPair.lhs.isSuccess());
    final List<DataSegment> segments = resultPair.rhs;
    Assert.assertEquals(3, segments.size());
    for (int i = 0; i < 3; i++) {
        Assert.assertEquals(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1), segments.get(i).getInterval());
        Assert.assertEquals(getDefaultCompactionState(Granularities.HOUR, Granularities.MINUTE, ImmutableList.of()), segments.get(i).getLastCompactionState());
        if (lockGranularity == LockGranularity.SEGMENT) {
            Assert.assertEquals(new NumberedOverwriteShardSpec(32768, 0, 2, (short) 1, (short) 1), segments.get(i).getShardSpec());
        } else {
            Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(i).getShardSpec());
        }
    }
}
Also used : LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) DataSchema(org.apache.druid.segment.indexing.DataSchema) IngestSegmentFirehoseFactory(org.apache.druid.indexing.firehose.IngestSegmentFirehoseFactory) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) Map(java.util.Map) HashMap(java.util.HashMap) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 57 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class KinesisSupervisorTest method getDataSchema.

private static DataSchema getDataSchema(String dataSource) {
    List<DimensionSchema> dimensions = new ArrayList<>();
    dimensions.add(StringDimensionSchema.create("dim1"));
    dimensions.add(StringDimensionSchema.create("dim2"));
    return new DataSchema(dataSource, new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(dimensions), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.of()), null);
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) ArrayList(java.util.ArrayList) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema)

Example 58 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class KafkaSupervisorTest method testIsTaskCurrent.

@Test
public void testIsTaskCurrent() {
    DateTime minMessageTime = DateTimes.nowUtc();
    DateTime maxMessageTime = DateTimes.nowUtc().plus(10000);
    KafkaSupervisor supervisor = getSupervisor(2, 1, true, "PT1H", new Period("P1D"), new Period("P1D"), false, kafkaHost, dataSchema, new KafkaSupervisorTuningConfig(null, 1000, null, null, 50000, null, new Period("P1Y"), new File("/test"), null, null, null, false, null, false, null, numThreads, TEST_CHAT_THREADS, TEST_CHAT_RETRIES, TEST_HTTP_TIMEOUT, TEST_SHUTDOWN_TIMEOUT, null, null, null, null, null));
    supervisor.addTaskGroupToActivelyReadingTaskGroup(42, ImmutableMap.of(0, 0L, 2, 0L), Optional.of(minMessageTime), Optional.of(maxMessageTime), ImmutableSet.of("id1", "id2", "id3", "id4"), ImmutableSet.of());
    DataSchema modifiedDataSchema = getDataSchema("some other datasource");
    KafkaSupervisorTuningConfig modifiedTuningConfig = new KafkaSupervisorTuningConfig(null, // This is different
    42, null, null, 50000, null, new Period("P1Y"), new File("/test"), null, null, null, false, null, null, null, numThreads, TEST_CHAT_THREADS, TEST_CHAT_RETRIES, TEST_HTTP_TIMEOUT, TEST_SHUTDOWN_TIMEOUT, null, null, null, null, null);
    KafkaIndexTask taskFromStorage = createKafkaIndexTask("id1", 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 2, Long.MAX_VALUE)), minMessageTime, maxMessageTime, dataSchema, supervisor.getTuningConfig());
    KafkaIndexTask taskFromStorageMismatchedDataSchema = createKafkaIndexTask("id2", 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 2, Long.MAX_VALUE)), minMessageTime, maxMessageTime, modifiedDataSchema, supervisor.getTuningConfig());
    KafkaIndexTask taskFromStorageMismatchedTuningConfig = createKafkaIndexTask("id3", 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 2, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 2, Long.MAX_VALUE)), minMessageTime, maxMessageTime, dataSchema, modifiedTuningConfig);
    KafkaIndexTask taskFromStorageMismatchedPartitionsWithTaskGroup = createKafkaIndexTask("id4", 0, new SeekableStreamStartSequenceNumbers<>("topic", ImmutableMap.of(0, 0L, 2, 6L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>("topic", ImmutableMap.of(0, Long.MAX_VALUE, 2, Long.MAX_VALUE)), minMessageTime, maxMessageTime, dataSchema, supervisor.getTuningConfig());
    EasyMock.expect(taskStorage.getTask("id1")).andReturn(Optional.of(taskFromStorage)).once();
    EasyMock.expect(taskStorage.getTask("id2")).andReturn(Optional.of(taskFromStorageMismatchedDataSchema)).once();
    EasyMock.expect(taskStorage.getTask("id3")).andReturn(Optional.of(taskFromStorageMismatchedTuningConfig)).once();
    EasyMock.expect(taskStorage.getTask("id4")).andReturn(Optional.of(taskFromStorageMismatchedPartitionsWithTaskGroup)).once();
    replayAll();
    Assert.assertTrue(supervisor.isTaskCurrent(42, "id1"));
    Assert.assertFalse(supervisor.isTaskCurrent(42, "id2"));
    Assert.assertFalse(supervisor.isTaskCurrent(42, "id3"));
    Assert.assertFalse(supervisor.isTaskCurrent(42, "id4"));
    verifyAll();
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) KafkaIndexTask(org.apache.druid.indexing.kafka.KafkaIndexTask) Period(org.joda.time.Period) File(java.io.File) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 59 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) {
    runThread = Thread.currentThread();
    authorizerMapper = toolbox.getAuthorizerMapper();
    rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
    parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, spec.getTuningConfig().isLogParseExceptions(), spec.getTuningConfig().getMaxParseExceptions(), spec.getTuningConfig().getMaxSavedParseExceptions());
    setupTimeoutAlert();
    DataSchema dataSchema = spec.getDataSchema();
    RealtimeAppenderatorTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir());
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
    final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, rowIngestionMeters);
    this.metrics = fireDepartmentForMetrics.getMetrics();
    final Supplier<Committer> committerSupplier = Committers.nilSupplier();
    DiscoveryDruidNode discoveryDruidNode = createDiscoveryDruidNode(toolbox);
    appenderator = newAppenderator(dataSchema, tuningConfig, metrics, toolbox);
    TaskLockType lockType = getContextValue(Tasks.USE_SHARED_LOCK, false) ? TaskLockType.SHARED : TaskLockType.EXCLUSIVE;
    StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics, lockType);
    try {
        log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
        toolbox.getChatHandlerProvider().register(getId(), this, false);
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().announce();
            toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
        }
        driver.startJob(segmentId -> {
            try {
                if (lockGranularity == LockGranularity.SEGMENT) {
                    return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
                } else {
                    final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
                    if (lock == null) {
                        return false;
                    }
                    if (lock.isRevoked()) {
                        throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
                    }
                    return true;
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        });
        // Set up metrics emission
        toolbox.addMonitor(metricsMonitor);
        // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
        final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
        final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
        int sequenceNumber = 0;
        String sequenceName = makeSequenceName(getId(), sequenceNumber);
        final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, mustBeNullOrEmptyDropSegments, segments, commitMetadata) -> {
            if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) {
                throw new ISE("Stream ingestion task unexpectedly attempted to overwrite segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyOverwriteSegments));
            }
            if (mustBeNullOrEmptyDropSegments != null && !mustBeNullOrEmptyDropSegments.isEmpty()) {
                throw new ISE("Stream ingestion task unexpectedly attempted to drop segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyDropSegments));
            }
            final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction(segments, null, null);
            return toolbox.getTaskActionClient().submit(action);
        };
        // Skip connecting firehose if we've been stopped before we got started.
        synchronized (this) {
            if (!gracefullyStopped) {
                firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
            }
        }
        ingestionState = IngestionState.BUILD_SEGMENTS;
        // Time to read data!
        while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) {
            try {
                InputRow inputRow = firehose.nextRow();
                if (inputRow == null) {
                    log.debug("Discarded null row, considering thrownAway.");
                    rowIngestionMeters.incrementThrownAway();
                } else {
                    AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName, committerSupplier);
                    if (addResult.isOk()) {
                        final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
                        if (isPushRequired) {
                            publishSegments(driver, publisher, committerSupplier, sequenceName);
                            sequenceNumber++;
                            sequenceName = makeSequenceName(getId(), sequenceNumber);
                        }
                    } else {
                        // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
                        throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
                    }
                }
            } catch (ParseException e) {
                handleParseException(e);
            }
        }
        ingestionState = IngestionState.COMPLETED;
        if (!gracefullyStopped) {
            synchronized (this) {
                if (gracefullyStopped) {
                    // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                    log.info("Gracefully stopping.");
                } else {
                    finishingJob = true;
                }
            }
            if (finishingJob) {
                log.info("Finishing job...");
                // Publish any remaining segments
                publishSegments(driver, publisher, committerSupplier, sequenceName);
                waitForSegmentPublishAndHandoff(tuningConfig.getPublishAndHandoffTimeout());
            }
        } else if (firehose != null) {
            log.info("Task was gracefully stopped, will persist data before exiting");
            persistAndWait(driver, committerSupplier.get());
        }
    } catch (Throwable e) {
        log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
        errorMsg = Throwables.getStackTraceAsString(e);
        toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
        return TaskStatus.failure(getId(), errorMsg);
    } finally {
        toolbox.getChatHandlerProvider().unregister(getId());
        CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
        appenderator.close();
        CloseableUtils.closeAndSuppressExceptions(driver, e -> log.warn("Failed to close AppenderatorDriver"));
        toolbox.removeMonitor(metricsMonitor);
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().unannounce();
            toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
        }
    }
    log.info("Job done!");
    toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
    return TaskStatus.success(getId());
}
Also used : StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) LookupNodeService(org.apache.druid.discovery.LookupNodeService) Produces(javax.ws.rs.Produces) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) TimeoutException(java.util.concurrent.TimeoutException) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Timer(java.util.Timer) IngestionState(org.apache.druid.indexer.IngestionState) NoopQueryRunner(org.apache.druid.query.NoopQueryRunner) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) MediaType(javax.ws.rs.core.MediaType) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) Map(java.util.Map) TaskLock(org.apache.druid.indexing.common.TaskLock) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) QueryRunner(org.apache.druid.query.QueryRunner) TimerTask(java.util.TimerTask) DateTimes(org.apache.druid.java.util.common.DateTimes) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) Context(javax.ws.rs.core.Context) RealtimeAppenderatorIngestionSpec(org.apache.druid.indexing.common.index.RealtimeAppenderatorIngestionSpec) ImmutableMap(com.google.common.collect.ImmutableMap) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Action(org.apache.druid.server.security.Action) LockGranularity(org.apache.druid.indexing.common.LockGranularity) TaskRealtimeMetricsMonitorBuilder(org.apache.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder) InputRow(org.apache.druid.data.input.InputRow) CountDownLatch(java.util.concurrent.CountDownLatch) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) Response(javax.ws.rs.core.Response) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TaskLockType(org.apache.druid.indexing.common.TaskLockType) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) NodeRole(org.apache.druid.discovery.NodeRole) SegmentAllocateAction(org.apache.druid.indexing.common.actions.SegmentAllocateAction) Queue(java.util.Queue) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ActionBasedSegmentAllocator(org.apache.druid.indexing.appenderator.ActionBasedSegmentAllocator) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) GET(javax.ws.rs.GET) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) ParseException(org.apache.druid.java.util.common.parsers.ParseException) Supplier(com.google.common.base.Supplier) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) TaskStatus(org.apache.druid.indexer.TaskStatus) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) HttpServletRequest(javax.servlet.http.HttpServletRequest) Query(org.apache.druid.query.Query) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) ActionBasedUsedSegmentChecker(org.apache.druid.indexing.appenderator.ActionBasedUsedSegmentChecker) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) Throwables(com.google.common.base.Throwables) Committers(org.apache.druid.segment.realtime.plumber.Committers) IOException(java.io.IOException) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Futures(com.google.common.util.concurrent.Futures) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) SegmentUtils(org.apache.druid.segment.SegmentUtils) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) AsyncFunction(com.google.common.util.concurrent.AsyncFunction) VisibleForTesting(com.google.common.annotations.VisibleForTesting) StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) DataSchema(org.apache.druid.segment.indexing.DataSchema) CloseableUtils(org.apache.druid.utils.CloseableUtils) Collections(java.util.Collections) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) TaskLock(org.apache.druid.indexing.common.TaskLock) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) ISE(org.apache.druid.java.util.common.ISE) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) IOException(java.io.IOException) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) DataSchema(org.apache.druid.segment.indexing.DataSchema) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) TaskLockType(org.apache.druid.indexing.common.TaskLockType) InputRow(org.apache.druid.data.input.InputRow) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) Committer(org.apache.druid.data.input.Committer) ParseException(org.apache.druid.java.util.common.parsers.ParseException)

Example 60 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class GranularityPathSpecTest method testAddInputPath.

@Test
public void testAddInputPath() throws Exception {
    UserGroupInformation.setLoginUser(UserGroupInformation.createUserForTesting("test", new String[] { "testGroup" }));
    HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, ImmutableList.of(Intervals.of("2015-11-06T00:00Z/2015-11-07T00:00Z"))), null, jsonMapper), new HadoopIOConfig(null, null, null), DEFAULT_TUNING_CONFIG);
    granularityPathSpec.setDataGranularity(Granularities.HOUR);
    granularityPathSpec.setFilePattern(".*");
    granularityPathSpec.setInputFormat(TextInputFormat.class);
    Job job = Job.getInstance();
    String formatStr = "file:%s/%s;org.apache.hadoop.mapreduce.lib.input.TextInputFormat";
    testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=00");
    testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=02");
    testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=05");
    testFolder.newFile("test/y=2015/m=11/d=06/H=00/file1");
    testFolder.newFile("test/y=2015/m=11/d=06/H=02/file2");
    testFolder.newFile("test/y=2015/m=11/d=06/H=05/file3");
    testFolder.newFile("test/y=2015/m=11/d=06/H=05/file4");
    granularityPathSpec.setInputPath(testFolder.getRoot().getPath() + "/test");
    granularityPathSpec.addInputPaths(HadoopDruidIndexerConfig.fromSpec(spec), job);
    String actual = job.getConfiguration().get("mapreduce.input.multipleinputs.dir.formats");
    String expected = Joiner.on(",").join(Lists.newArrayList(StringUtils.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=00/file1"), StringUtils.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=02/file2"), StringUtils.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=05/file3"), StringUtils.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=05/file4")));
    Assert.assertEquals("Did not find expected input paths", expected, actual);
}
Also used : HadoopIngestionSpec(org.apache.druid.indexer.HadoopIngestionSpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Job(org.apache.hadoop.mapreduce.Job) HadoopIOConfig(org.apache.druid.indexer.HadoopIOConfig) Test(org.junit.Test)

Aggregations

DataSchema (org.apache.druid.segment.indexing.DataSchema)80 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)49 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)45 Test (org.junit.Test)44 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)32 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)25 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)22 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)19 InputSource (org.apache.druid.data.input.InputSource)17 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)17 File (java.io.File)16 Map (java.util.Map)15 InputFormat (org.apache.druid.data.input.InputFormat)15 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)15 SamplerResponse (org.apache.druid.client.indexing.SamplerResponse)14 SamplerResponseRow (org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow)13 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)13 Interval (org.joda.time.Interval)13 ArrayList (java.util.ArrayList)12 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)12