Search in sources :

Example 1 with Firehose

use of org.apache.druid.data.input.Firehose in project druid by druid-io.

the class LocalFirehoseFactoryTest method testConnect.

@Test
public void testConnect() throws IOException {
    try (final Firehose firehose = factory.connect(new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("timestamp", "a"))), ",", Arrays.asList("timestamp", "a"), false, 0), StandardCharsets.UTF_8.name()), null)) {
        final List<Row> rows = new ArrayList<>();
        while (firehose.hasMore()) {
            rows.add(firehose.nextRow());
        }
        Assert.assertEquals(5, rows.size());
        rows.sort(Comparator.comparing(Row::getTimestamp));
        for (int i = 0; i < 5; i++) {
            final List<String> dimVals = rows.get(i).getDimension("a");
            Assert.assertEquals(1, dimVals.size());
            Assert.assertEquals(i + "th test file", dimVals.get(0));
        }
    }
}
Also used : Firehose(org.apache.druid.data.input.Firehose) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ArrayList(java.util.ArrayList) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Row(org.apache.druid.data.input.Row) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 2 with Firehose

use of org.apache.druid.data.input.Firehose in project druid by druid-io.

the class SqlFirehoseFactoryTest method testWithoutCacheAndFetch.

@Test
public void testWithoutCacheAndFetch() throws Exception {
    derbyConnector = derbyConnectorRule.getConnector();
    SqlTestUtils testUtils = new SqlTestUtils(derbyConnector);
    testUtils.createAndUpdateTable(TABLE_NAME_1, 10);
    final SqlFirehoseFactory factory = new SqlFirehoseFactory(SQLLIST1, 0L, 0L, 0L, 0L, true, testUtils.getDerbyFirehoseConnector(), mapper);
    final List<Row> rows = new ArrayList<>();
    final File firehoseTmpDir = createFirehoseTmpDir("testWithoutCacheAndFetch");
    try (Firehose firehose = factory.connect(parser, firehoseTmpDir)) {
        while (firehose.hasMore()) {
            rows.add(firehose.nextRow());
        }
    }
    assertResult(rows, SQLLIST1);
    assertNumRemainingCacheFiles(firehoseTmpDir, 0);
    testUtils.dropTable(TABLE_NAME_1);
}
Also used : SqlTestUtils(org.apache.druid.metadata.input.SqlTestUtils) Firehose(org.apache.druid.data.input.Firehose) ArrayList(java.util.ArrayList) Row(org.apache.druid.data.input.Row) File(java.io.File) Test(org.junit.Test)

Example 3 with Firehose

use of org.apache.druid.data.input.Firehose in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTaskTest method testReportParseExceptionsOnBadMetric.

@Test(timeout = 60_000L)
public void testReportParseExceptionsOnBadMetric() throws Exception {
    expectPublishedSegments(0);
    final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, true);
    final ListenableFuture<TaskStatus> statusFuture = runTask(task);
    // Wait for firehose to show up, it starts off null.
    while (task.getFirehose() == null) {
        Thread.sleep(50);
    }
    final TestFirehose firehose = (TestFirehose) task.getFirehose();
    firehose.addRows(ImmutableList.of(ImmutableMap.of("t", 2000000L, "dim1", "foo", "met1", "1"), ImmutableMap.of("t", 3000000L, "dim1", "foo", "met1", "foo"), ImmutableMap.of("t", now.minus(new Period("P1D")).getMillis(), "dim1", "foo", "met1", "foo"), ImmutableMap.of("t", 4000000L, "dim2", "bar", "met1", 2.0)));
    // Stop the firehose, this will drain out existing events.
    firehose.close();
    // Wait for the task to finish.
    TaskStatus status = statusFuture.get();
    Assert.assertTrue(status.getErrorMsg().contains("org.apache.druid.java.util.common.RE: Max parse exceptions[0] exceeded"));
    IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
    List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
    List<String> expectedMessages = ImmutableList.of("Unable to parse value[foo] for field[met1]");
    List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
        return ((List<String>) r.get("details")).get(0);
    }).collect(Collectors.toList());
    Assert.assertEquals(expectedMessages, actualMessages);
    List<String> expectedInputs = ImmutableList.of("{t=3000000, dim1=foo, met1=foo}");
    List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
        return (String) r.get("input");
    }).collect(Collectors.toList());
    Assert.assertEquals(expectedInputs, actualInputs);
}
Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) DirectQueryProcessingPool(org.apache.druid.query.DirectQueryProcessingPool) Arrays(java.util.Arrays) LookupNodeService(org.apache.druid.discovery.LookupNodeService) TestDataSegmentAnnouncer(org.apache.druid.indexing.test.TestDataSegmentAnnouncer) TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) IngestionState(org.apache.druid.indexer.IngestionState) Pair(org.apache.druid.java.util.common.Pair) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TestDataSegmentPusher(org.apache.druid.indexing.test.TestDataSegmentPusher) NoopJoinableFactory(org.apache.druid.segment.join.NoopJoinableFactory) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) Execs(org.apache.druid.java.util.common.concurrent.Execs) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) CacheConfig(org.apache.druid.client.cache.CacheConfig) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) InputRow(org.apache.druid.data.input.InputRow) TaskState(org.apache.druid.indexer.TaskState) CountDownLatch(java.util.concurrent.CountDownLatch) Firehose(org.apache.druid.data.input.Firehose) DimFilter(org.apache.druid.query.filter.DimFilter) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) TestDerbyConnector(org.apache.druid.metadata.TestDerbyConnector) TaskActionClientFactory(org.apache.druid.indexing.common.actions.TaskActionClientFactory) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) TransformSpec(org.apache.druid.segment.transform.TransformSpec) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) Iterables(com.google.common.collect.Iterables) DruidNodeAnnouncer(org.apache.druid.discovery.DruidNodeAnnouncer) ParseException(org.apache.druid.java.util.common.parsers.ParseException) TaskAuditLogConfig(org.apache.druid.indexing.common.actions.TaskAuditLogConfig) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) TaskStatus(org.apache.druid.indexer.TaskStatus) EntryExistsException(org.apache.druid.metadata.EntryExistsException) LinkedHashMap(java.util.LinkedHashMap) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) AuthTestUtils(org.apache.druid.server.security.AuthTestUtils) CachePopulatorStats(org.apache.druid.client.cache.CachePopulatorStats) Nullable(javax.annotation.Nullable) Before(org.junit.Before) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) TaskToolboxFactory(org.apache.druid.indexing.common.TaskToolboxFactory) Files(java.nio.file.Files) Executor(java.util.concurrent.Executor) DataSegmentServerAnnouncer(org.apache.druid.server.coordination.DataSegmentServerAnnouncer) QueryRunnerFactoryConglomerate(org.apache.druid.query.QueryRunnerFactoryConglomerate) FileUtils(org.apache.commons.io.FileUtils) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Result(org.apache.druid.query.Result) HeapMemoryTaskStorage(org.apache.druid.indexing.overlord.HeapMemoryTaskStorage) DefaultQueryRunnerFactoryConglomerate(org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate) DruidNode(org.apache.druid.server.DruidNode) Assert(org.junit.Assert) ArrayDeque(java.util.ArrayDeque) DataSchema(org.apache.druid.segment.indexing.DataSchema) QueryPlus(org.apache.druid.query.QueryPlus) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) Druids(org.apache.druid.query.Druids) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) After(org.junit.After) ServerType(org.apache.druid.server.coordination.ServerType) TypeReference(com.fasterxml.jackson.core.type.TypeReference) NoopChatHandlerProvider(org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider) DateTimes(org.apache.druid.java.util.common.DateTimes) RealtimeAppenderatorIngestionSpec(org.apache.druid.indexing.common.index.RealtimeAppenderatorIngestionSpec) JacksonUtils(org.apache.druid.java.util.common.jackson.JacksonUtils) ImmutableMap(com.google.common.collect.ImmutableMap) SegmentPublishResult(org.apache.druid.indexing.overlord.SegmentPublishResult) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Collectors(java.util.stream.Collectors) TestUtils(org.apache.druid.indexing.common.TestUtils) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexerSQLMetadataStorageCoordinator(org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator) DataNodeService(org.apache.druid.discovery.DataNodeService) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) MapCache(org.apache.druid.client.cache.MapCache) Logger(org.apache.druid.java.util.common.logger.Logger) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) SingleFileTaskReportFileWriter(org.apache.druid.indexing.common.SingleFileTaskReportFileWriter) TaskStorageConfig(org.apache.druid.indexing.common.config.TaskStorageConfig) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) Deque(java.util.Deque) TaskActionToolbox(org.apache.druid.indexing.common.actions.TaskActionToolbox) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) NoopEmitter(org.apache.druid.java.util.emitter.core.NoopEmitter) ExpectedException(org.junit.rules.ExpectedException) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) Period(org.joda.time.Period) TaskLockbox(org.apache.druid.indexing.overlord.TaskLockbox) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) TestDataSegmentKiller(org.apache.druid.indexing.test.TestDataSegmentKiller) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TimeUnit(java.util.concurrent.TimeUnit) TestHelper(org.apache.druid.segment.TestHelper) Rule(org.junit.Rule) NullHandling(org.apache.druid.common.config.NullHandling) MonitorScheduler(org.apache.druid.java.util.metrics.MonitorScheduler) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) TemporaryFolder(org.junit.rules.TemporaryFolder) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Period(org.joda.time.Period) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) TaskStatus(org.apache.druid.indexer.TaskStatus) LinkedHashMap(java.util.LinkedHashMap) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 4 with Firehose

use of org.apache.druid.data.input.Firehose in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTaskTest method testRestoreCorruptData.

@Test(timeout = 60_000L)
public void testRestoreCorruptData() throws Exception {
    final AppenderatorDriverRealtimeIndexTask task1 = makeRealtimeTask(null);
    // First run:
    {
        expectPublishedSegments(0);
        final ListenableFuture<TaskStatus> statusFuture = runTask(task1);
        // Wait for firehose to show up, it starts off null.
        while (task1.getFirehose() == null) {
            Thread.sleep(50);
        }
        final TestFirehose firehose = (TestFirehose) task1.getFirehose();
        firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo")));
        // Trigger graceful shutdown.
        task1.stopGracefully(taskToolboxFactory.build(task1).getConfig());
        // Wait for the task to finish. The status doesn't really matter, but we'll check it anyway.
        final TaskStatus taskStatus = statusFuture.get();
        Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
        // Nothing should be published.
        Assert.assertTrue(publishedSegments.isEmpty());
    }
    Optional<File> optional = FileUtils.listFiles(baseDir, null, true).stream().filter(f -> f.getName().equals("00000.smoosh")).findFirst();
    Assert.assertTrue("Could not find smoosh file", optional.isPresent());
    // Corrupt the data:
    final File smooshFile = optional.get();
    Files.write(smooshFile.toPath(), StringUtils.toUtf8("oops!"));
    // Second run:
    {
        expectPublishedSegments(0);
        final AppenderatorDriverRealtimeIndexTask task2 = makeRealtimeTask(task1.getId());
        final ListenableFuture<TaskStatus> statusFuture = runTask(task2);
        // Wait for the task to finish.
        TaskStatus status = statusFuture.get();
        Map<String, Object> expectedMetrics = ImmutableMap.of(RowIngestionMeters.BUILD_SEGMENTS, ImmutableMap.of(RowIngestionMeters.PROCESSED_WITH_ERROR, 0, RowIngestionMeters.PROCESSED, 0, RowIngestionMeters.UNPARSEABLE, 0, RowIngestionMeters.THROWN_AWAY, 0));
        IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
        Assert.assertEquals(expectedMetrics, reportData.getRowStats());
        Pattern errorPattern = Pattern.compile("(?s)java\\.lang\\.IllegalArgumentException.*\n" + "\tat (java\\.base/)?java\\.nio\\.Buffer\\..*");
        Assert.assertTrue(errorPattern.matcher(status.getErrorMsg()).matches());
    }
}
Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) DirectQueryProcessingPool(org.apache.druid.query.DirectQueryProcessingPool) Arrays(java.util.Arrays) LookupNodeService(org.apache.druid.discovery.LookupNodeService) TestDataSegmentAnnouncer(org.apache.druid.indexing.test.TestDataSegmentAnnouncer) TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) IngestionState(org.apache.druid.indexer.IngestionState) Pair(org.apache.druid.java.util.common.Pair) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TestDataSegmentPusher(org.apache.druid.indexing.test.TestDataSegmentPusher) NoopJoinableFactory(org.apache.druid.segment.join.NoopJoinableFactory) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) Execs(org.apache.druid.java.util.common.concurrent.Execs) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) CacheConfig(org.apache.druid.client.cache.CacheConfig) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) InputRow(org.apache.druid.data.input.InputRow) TaskState(org.apache.druid.indexer.TaskState) CountDownLatch(java.util.concurrent.CountDownLatch) Firehose(org.apache.druid.data.input.Firehose) DimFilter(org.apache.druid.query.filter.DimFilter) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) TestDerbyConnector(org.apache.druid.metadata.TestDerbyConnector) TaskActionClientFactory(org.apache.druid.indexing.common.actions.TaskActionClientFactory) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) TransformSpec(org.apache.druid.segment.transform.TransformSpec) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) Iterables(com.google.common.collect.Iterables) DruidNodeAnnouncer(org.apache.druid.discovery.DruidNodeAnnouncer) ParseException(org.apache.druid.java.util.common.parsers.ParseException) TaskAuditLogConfig(org.apache.druid.indexing.common.actions.TaskAuditLogConfig) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) TaskStatus(org.apache.druid.indexer.TaskStatus) EntryExistsException(org.apache.druid.metadata.EntryExistsException) LinkedHashMap(java.util.LinkedHashMap) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) AuthTestUtils(org.apache.druid.server.security.AuthTestUtils) CachePopulatorStats(org.apache.druid.client.cache.CachePopulatorStats) Nullable(javax.annotation.Nullable) Before(org.junit.Before) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) TaskToolboxFactory(org.apache.druid.indexing.common.TaskToolboxFactory) Files(java.nio.file.Files) Executor(java.util.concurrent.Executor) DataSegmentServerAnnouncer(org.apache.druid.server.coordination.DataSegmentServerAnnouncer) QueryRunnerFactoryConglomerate(org.apache.druid.query.QueryRunnerFactoryConglomerate) FileUtils(org.apache.commons.io.FileUtils) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Result(org.apache.druid.query.Result) HeapMemoryTaskStorage(org.apache.druid.indexing.overlord.HeapMemoryTaskStorage) DefaultQueryRunnerFactoryConglomerate(org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate) DruidNode(org.apache.druid.server.DruidNode) Assert(org.junit.Assert) ArrayDeque(java.util.ArrayDeque) DataSchema(org.apache.druid.segment.indexing.DataSchema) QueryPlus(org.apache.druid.query.QueryPlus) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) Druids(org.apache.druid.query.Druids) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) After(org.junit.After) ServerType(org.apache.druid.server.coordination.ServerType) TypeReference(com.fasterxml.jackson.core.type.TypeReference) NoopChatHandlerProvider(org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider) DateTimes(org.apache.druid.java.util.common.DateTimes) RealtimeAppenderatorIngestionSpec(org.apache.druid.indexing.common.index.RealtimeAppenderatorIngestionSpec) JacksonUtils(org.apache.druid.java.util.common.jackson.JacksonUtils) ImmutableMap(com.google.common.collect.ImmutableMap) SegmentPublishResult(org.apache.druid.indexing.overlord.SegmentPublishResult) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Collectors(java.util.stream.Collectors) TestUtils(org.apache.druid.indexing.common.TestUtils) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexerSQLMetadataStorageCoordinator(org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator) DataNodeService(org.apache.druid.discovery.DataNodeService) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) MapCache(org.apache.druid.client.cache.MapCache) Logger(org.apache.druid.java.util.common.logger.Logger) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) SingleFileTaskReportFileWriter(org.apache.druid.indexing.common.SingleFileTaskReportFileWriter) TaskStorageConfig(org.apache.druid.indexing.common.config.TaskStorageConfig) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) Deque(java.util.Deque) TaskActionToolbox(org.apache.druid.indexing.common.actions.TaskActionToolbox) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) NoopEmitter(org.apache.druid.java.util.emitter.core.NoopEmitter) ExpectedException(org.junit.rules.ExpectedException) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) Period(org.joda.time.Period) TaskLockbox(org.apache.druid.indexing.overlord.TaskLockbox) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) TestDataSegmentKiller(org.apache.druid.indexing.test.TestDataSegmentKiller) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TimeUnit(java.util.concurrent.TimeUnit) TestHelper(org.apache.druid.segment.TestHelper) Rule(org.junit.Rule) NullHandling(org.apache.druid.common.config.NullHandling) MonitorScheduler(org.apache.druid.java.util.metrics.MonitorScheduler) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) TemporaryFolder(org.junit.rules.TemporaryFolder) Pattern(java.util.regex.Pattern) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) TaskStatus(org.apache.druid.indexer.TaskStatus) File(java.io.File) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 5 with Firehose

use of org.apache.druid.data.input.Firehose in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTest method testTransformSpec.

@Test
public void testTransformSpec() throws IOException {
    Assert.assertEquals(MAX_SHARD_NUMBER.longValue(), SEGMENT_SET.size());
    Integer rowcount = 0;
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter(ColumnHolder.TIME_COLUMN_NAME, "1", null), ImmutableList.of(new ExpressionTransform(METRIC_FLOAT_NAME, METRIC_FLOAT_NAME + " * 10", ExprMacroTable.nil())));
    int skipped = 0;
    try (final Firehose firehose = factory.connect(transformSpec.decorate(rowParser), TMP_DIR)) {
        while (firehose.hasMore()) {
            InputRow row = firehose.nextRow();
            if (row == null) {
                skipped++;
                continue;
            }
            Assert.assertArrayEquals(new String[] { DIM_NAME }, row.getDimensions().toArray());
            Assert.assertArrayEquals(new String[] { DIM_VALUE }, row.getDimension(DIM_NAME).toArray());
            Assert.assertEquals(METRIC_LONG_VALUE.longValue(), row.getMetric(METRIC_LONG_NAME).longValue());
            Assert.assertEquals(METRIC_FLOAT_VALUE * 10, row.getMetric(METRIC_FLOAT_NAME).floatValue(), METRIC_FLOAT_VALUE * 0.0001);
            ++rowcount;
        }
    }
    Assert.assertEquals(90, skipped);
    Assert.assertEquals((int) MAX_ROWS, (int) rowcount);
}
Also used : SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Firehose(org.apache.druid.data.input.Firehose) InputRow(org.apache.druid.data.input.InputRow) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Test(org.junit.Test)

Aggregations

Firehose (org.apache.druid.data.input.Firehose)30 Test (org.junit.Test)25 File (java.io.File)19 ArrayList (java.util.ArrayList)16 Row (org.apache.druid.data.input.Row)14 InputRow (org.apache.druid.data.input.InputRow)13 List (java.util.List)7 ImmutableList (com.google.common.collect.ImmutableList)6 ImmutableMap (com.google.common.collect.ImmutableMap)6 IOException (java.io.IOException)6 Map (java.util.Map)6 InputRowParser (org.apache.druid.data.input.impl.InputRowParser)6 MapInputRowParser (org.apache.druid.data.input.impl.MapInputRowParser)6 DataSchema (org.apache.druid.segment.indexing.DataSchema)6 TypeReference (com.fasterxml.jackson.core.type.TypeReference)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)5 Iterables (com.google.common.collect.Iterables)4 ListeningExecutorService (com.google.common.util.concurrent.ListeningExecutorService)4 MoreExecutors (com.google.common.util.concurrent.MoreExecutors)4