Examples with IndexTuningConfig - org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig

Example 11 with IndexTuningConfig

use of org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig in project druid by druid-io.

the class TaskSerdeTest method testIndexTaskwithResourceSerde.

@Test
public void testIndexTaskwithResourceSerde() throws Exception {
    final IndexTask task = new IndexTask(null, new TaskResource("rofl", 2), new IndexIngestionSpec(new DataSchema("foo", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(Intervals.of("2010-01-01/P2D"))), null), new IndexIOConfig(null, new LocalInputSource(new File("lol"), "rofl"), new NoopInputFormat(), true, false), new IndexTuningConfig(null, null, null, 10, null, null, null, null, null, null, new DynamicPartitionsSpec(10000, null), indexSpec, null, 3, false, null, null, null, null, null, null, null, null, null)), null);
    for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) {
        jsonMapper.registerModule(jacksonModule);
    }
    final String json = jsonMapper.writeValueAsString(task);
    // Just want to run the clock a bit to make sure the task id doesn't change
    Thread.sleep(100);
    final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class);
    Assert.assertEquals("foo", task.getDataSource());
    Assert.assertEquals(task.getId(), task2.getId());
    Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
    Assert.assertEquals("rofl", task.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(task.getTaskResource().getRequiredCapacity(), task2.getTaskResource().getRequiredCapacity());
    Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(task.getGroupId(), task2.getGroupId());
    Assert.assertEquals(task.getDataSource(), task2.getDataSource());
    Assert.assertTrue(task.getIngestionSchema().getIOConfig().getInputSource() instanceof LocalInputSource);
    Assert.assertTrue(task2.getIngestionSchema().getIOConfig().getInputSource() instanceof LocalInputSource);
}

Also used : IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource) DataSchema(org.apache.druid.segment.indexing.DataSchema) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) FirehoseModule(org.apache.druid.guice.FirehoseModule) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) NoopInputFormat(org.apache.druid.data.input.impl.NoopInputFormat) Module(com.fasterxml.jackson.databind.Module) FirehoseModule(org.apache.druid.guice.FirehoseModule) File(java.io.File) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) Test(org.junit.Test)

Example 12 with IndexTuningConfig

use of org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig in project druid by druid-io.

the class TaskLifecycleTest method testIndexTask.

@Test
public void testIndexTask() throws Exception {
    final Task indexTask = new IndexTask(null, null, new IndexIngestionSpec(new DataSchema("foo", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(Intervals.of("2010-01-01/P2D"))), null), new IndexIOConfig(null, new MockInputSource(), new NoopInputFormat(), false, false), new IndexTuningConfig(null, 10000, null, 10, null, null, null, null, null, null, null, indexSpec, null, 3, false, null, null, null, null, null, null, null, null, null)), null);
    final Optional<TaskStatus> preRunTaskStatus = tsqa.getStatus(indexTask.getId());
    Assert.assertTrue("pre run task status not present", !preRunTaskStatus.isPresent());
    final TaskStatus mergedStatus = runTask(indexTask);
    final TaskStatus status = taskStorage.getStatus(indexTask.getId()).get();
    final List<DataSegment> publishedSegments = BY_INTERVAL_ORDERING.sortedCopy(mdc.getPublished());
    final List<DataSegment> loggedSegments = BY_INTERVAL_ORDERING.sortedCopy(tsqa.getInsertedSegments(indexTask.getId()));
    Assert.assertEquals("statusCode", TaskState.SUCCESS, status.getStatusCode());
    Assert.assertEquals(taskLocation, status.getLocation());
    Assert.assertEquals("merged statusCode", TaskState.SUCCESS, mergedStatus.getStatusCode());
    Assert.assertEquals("segments logged vs published", loggedSegments, publishedSegments);
    Assert.assertEquals("num segments published", 2, mdc.getPublished().size());
    Assert.assertEquals("num segments nuked", 0, mdc.getNuked().size());
    Assert.assertEquals("segment1 datasource", "foo", publishedSegments.get(0).getDataSource());
    Assert.assertEquals("segment1 interval", Intervals.of("2010-01-01/P1D"), publishedSegments.get(0).getInterval());
    Assert.assertEquals("segment1 dimensions", ImmutableList.of("dim1", "dim2"), publishedSegments.get(0).getDimensions());
    Assert.assertEquals("segment1 metrics", ImmutableList.of("met"), publishedSegments.get(0).getMetrics());
    Assert.assertEquals("segment2 datasource", "foo", publishedSegments.get(1).getDataSource());
    Assert.assertEquals("segment2 interval", Intervals.of("2010-01-02/P1D"), publishedSegments.get(1).getInterval());
    Assert.assertEquals("segment2 dimensions", ImmutableList.of("dim1", "dim2"), publishedSegments.get(1).getDimensions());
    Assert.assertEquals("segment2 metrics", ImmutableList.of("met"), publishedSegments.get(1).getMetrics());
}

Also used : IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) IndexTask(org.apache.druid.indexing.common.task.IndexTask) KillUnusedSegmentsTask(org.apache.druid.indexing.common.task.KillUnusedSegmentsTask) Task(org.apache.druid.indexing.common.task.Task) AbstractFixedIntervalTask(org.apache.druid.indexing.common.task.AbstractFixedIntervalTask) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) IndexTask(org.apache.druid.indexing.common.task.IndexTask) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) DataSchema(org.apache.druid.segment.indexing.DataSchema) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) NoopInputFormat(org.apache.druid.data.input.impl.NoopInputFormat) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) FireDepartmentTest(org.apache.druid.segment.realtime.FireDepartmentTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 13 with IndexTuningConfig

use of org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig in project druid by druid-io.

the class TaskLifecycleTest method testUnifiedAppenderatorsManagerCleanup.

@Test
public void testUnifiedAppenderatorsManagerCleanup() throws Exception {
    final ExecutorService exec = Executors.newFixedThreadPool(8);
    UnifiedIndexerAppenderatorsManager unifiedIndexerAppenderatorsManager = new UnifiedIndexerAppenderatorsManager(new ForwardingQueryProcessingPool(exec), NoopJoinableFactory.INSTANCE, new WorkerConfig(), MapCache.create(2048), new CacheConfig(), new CachePopulatorStats(), MAPPER, new NoopServiceEmitter(), () -> queryRunnerFactoryConglomerate);
    tb = setUpTaskToolboxFactory(dataSegmentPusher, handoffNotifierFactory, mdc, unifiedIndexerAppenderatorsManager);
    taskRunner = setUpThreadPoolTaskRunner(tb);
    taskQueue = setUpTaskQueue(taskStorage, taskRunner);
    final Task indexTask = new IndexTask(null, null, new IndexIngestionSpec(new DataSchema("foo", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(Intervals.of("2010-01-01/P2D"))), null), new IndexIOConfig(null, new MockInputSource(), new NoopInputFormat(), false, false), new IndexTuningConfig(null, 10000, null, 10, null, null, null, null, null, null, null, indexSpec, null, 3, false, null, null, null, null, null, null, null, null, null)), null);
    final Optional<TaskStatus> preRunTaskStatus = tsqa.getStatus(indexTask.getId());
    Assert.assertTrue("pre run task status not present", !preRunTaskStatus.isPresent());
    final TaskStatus mergedStatus = runTask(indexTask);
    final TaskStatus status = taskStorage.getStatus(indexTask.getId()).get();
    Assert.assertEquals("statusCode", TaskState.SUCCESS, status.getStatusCode());
    Map<String, UnifiedIndexerAppenderatorsManager.DatasourceBundle> bundleMap = unifiedIndexerAppenderatorsManager.getDatasourceBundles();
    Assert.assertEquals(1, bundleMap.size());
    unifiedIndexerAppenderatorsManager.removeAppenderatorsForTask(indexTask.getId(), "foo");
    Assert.assertTrue(bundleMap.isEmpty());
}

Also used : IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) IndexTask(org.apache.druid.indexing.common.task.IndexTask) KillUnusedSegmentsTask(org.apache.druid.indexing.common.task.KillUnusedSegmentsTask) Task(org.apache.druid.indexing.common.task.Task) AbstractFixedIntervalTask(org.apache.druid.indexing.common.task.AbstractFixedIntervalTask) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) ForwardingQueryProcessingPool(org.apache.druid.query.ForwardingQueryProcessingPool) UnifiedIndexerAppenderatorsManager(org.apache.druid.segment.realtime.appenderator.UnifiedIndexerAppenderatorsManager) IndexTask(org.apache.druid.indexing.common.task.IndexTask) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSchema(org.apache.druid.segment.indexing.DataSchema) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CachePopulatorStats(org.apache.druid.client.cache.CachePopulatorStats) ExecutorService(java.util.concurrent.ExecutorService) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) WorkerConfig(org.apache.druid.indexing.worker.config.WorkerConfig) NoopInputFormat(org.apache.druid.data.input.impl.NoopInputFormat) CacheConfig(org.apache.druid.client.cache.CacheConfig) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) FireDepartmentTest(org.apache.druid.segment.realtime.FireDepartmentTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 14 with IndexTuningConfig

use of org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig in project druid by druid-io.

the class IndexTaskTest method testCsvWithHeaderOfEmptyTimestamp.

@Test
public void testCsvWithHeaderOfEmptyTimestamp() throws Exception {
    final File tmpDir = temporaryFolder.newFolder();
    final File tmpFile = File.createTempFile("druid", "index", tmpDir);
    try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
        writer.write(",,\n");
        writer.write("2014-01-01T00:00:10Z,a,1\n");
    }
    final List<String> columns = Arrays.asList("ts", "", "");
    // report parse exception
    final IndexTuningConfig tuningConfig = createTuningConfig(2, null, null, null, null, false, true);
    final IndexIngestionSpec ingestionSpec;
    List<String> expectedMessages;
    if (useInputFormatApi) {
        ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, new CsvInputFormat(columns, null, null, true, 0), null, null, tuningConfig, false, false);
        expectedMessages = ImmutableList.of(StringUtils.format("Timestamp[null] is unparseable! Event: {column_1=2014-01-01T00:00:10Z, column_2=a, column_3=1} (Path: %s, Record: 1, Line: 2)", tmpFile.toURI()));
    } else {
        ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, null, columns, true, 0), null, null, tuningConfig, false, false);
        expectedMessages = ImmutableList.of("Timestamp[null] is unparseable! Event: {column_1=2014-01-01T00:00:10Z, column_2=a, column_3=1}");
    }
    IndexTask indexTask = new IndexTask(null, null, ingestionSpec, null);
    TaskStatus status = runTask(indexTask).lhs;
    Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
    checkTaskStatusErrorMsgForParseExceptionsExceeded(status);
    IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData();
    List<LinkedHashMap> parseExceptionReports = (List<LinkedHashMap>) reportData.getUnparseableEvents().get(RowIngestionMeters.BUILD_SEGMENTS);
    List<String> actualMessages = parseExceptionReports.stream().map((r) -> {
        return ((List<String>) r.get("details")).get(0);
    }).collect(Collectors.toList());
    Assert.assertEquals(expectedMessages, actualMessages);
    List<String> expectedInputs = ImmutableList.of("{column_1=2014-01-01T00:00:10Z, column_2=a, column_3=1}");
    List<String> actualInputs = parseExceptionReports.stream().map((r) -> {
        return (String) r.get("input");
    }).collect(Collectors.toList());
    Assert.assertEquals(expectedInputs, actualInputs);
}

Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) Arrays(java.util.Arrays) IndexSpec(org.apache.druid.segment.IndexSpec) Pair(org.apache.druid.java.util.common.Pair) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) IAE(org.apache.druid.java.util.common.IAE) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) NoopSegmentHandoffNotifierFactory(org.apache.druid.segment.realtime.plumber.NoopSegmentHandoffNotifierFactory) EqualsVerifier(nl.jqno.equalsverifier.EqualsVerifier) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) StandardCharsets(java.nio.charset.StandardCharsets) TaskState(org.apache.druid.indexer.TaskState) CountDownLatch(java.util.concurrent.CountDownLatch) PartitionIds(org.apache.druid.timeline.partition.PartitionIds) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) RowIngestionMetersFactory(org.apache.druid.segment.incremental.RowIngestionMetersFactory) SegmentLocalCacheManager(org.apache.druid.segment.loading.SegmentLocalCacheManager) SegmentId(org.apache.druid.timeline.SegmentId) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) SegmentLoaderConfig(org.apache.druid.segment.loading.SegmentLoaderConfig) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Interval(org.joda.time.Interval) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Before(org.junit.Before) BufferedWriter(java.io.BufferedWriter) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) CoreMatchers(org.hamcrest.CoreMatchers) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Event(org.apache.druid.java.util.emitter.core.Event) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Parameterized(org.junit.runners.Parameterized) ParseSpec(org.apache.druid.data.input.impl.ParseSpec) Sequence(org.apache.druid.java.util.common.guava.Sequence) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) LocalFirehoseFactory(org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) StringUtils(org.apache.druid.java.util.common.StringUtils) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) LockGranularity(org.apache.druid.indexing.common.LockGranularity) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) SegmentAllocateAction(org.apache.druid.indexing.common.actions.SegmentAllocateAction) Intervals(org.apache.druid.java.util.common.Intervals) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) Files(com.google.common.io.Files) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) DimensionSelector(org.apache.druid.segment.DimensionSelector) ExpectedException(org.junit.rules.ExpectedException) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) StorageLocationConfig(org.apache.druid.segment.loading.StorageLocationConfig) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) Cursor(org.apache.druid.segment.Cursor) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) TaskStatus(org.apache.druid.indexer.TaskStatus) BufferedWriter(java.io.BufferedWriter) LinkedHashMap(java.util.LinkedHashMap) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) File(java.io.File) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) Test(org.junit.Test)

Example 15 with IndexTuningConfig

use of org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig in project druid by druid-io.

the class IndexTaskTest method testCsvWithHeaderOfEmptyColumns.

@Test
public void testCsvWithHeaderOfEmptyColumns() throws Exception {
    final File tmpDir = temporaryFolder.newFolder();
    File tmpFile = File.createTempFile("druid", "index", tmpDir);
    try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
        writer.write("ts,,\n");
        writer.write("2014-01-01T00:00:10Z,a,1\n");
    }
    tmpFile = File.createTempFile("druid", "index", tmpDir);
    try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
        writer.write("ts,dim,\n");
        writer.write("2014-01-01T00:00:10Z,a,1\n");
    }
    tmpFile = File.createTempFile("druid", "index", tmpDir);
    try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
        writer.write("ts,,val\n");
        writer.write("2014-01-01T00:00:10Z,a,1\n");
    }
    // report parse exception
    final IndexTuningConfig tuningConfig = createTuningConfig(2, 1, null, null, null, true, true);
    final IndexIngestionSpec ingestionSpec;
    if (useInputFormatApi) {
        ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, new CsvInputFormat(null, null, null, true, 0), null, null, tuningConfig, false, false);
    } else {
        ingestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, DimensionsSpec.EMPTY, null, null, true, 0), null, null, tuningConfig, false, false);
    }
    IndexTask indexTask = new IndexTask(null, null, ingestionSpec, null);
    final List<DataSegment> segments = runTask(indexTask).rhs;
    // the order of result segments can be changed because hash shardSpec is used.
    // the below loop is to make this test deterministic.
    Assert.assertEquals(2, segments.size());
    Assert.assertNotEquals(segments.get(0), segments.get(1));
    for (DataSegment segment : segments) {
        System.out.println(segment.getDimensions());
    }
    for (int i = 0; i < 2; i++) {
        final DataSegment segment = segments.get(i);
        final Set<String> dimensions = new HashSet<>(segment.getDimensions());
        Assert.assertTrue(StringUtils.format("Actual dimensions: %s", dimensions), dimensions.equals(Sets.newHashSet("column_2")) || dimensions.equals(Sets.newHashSet("dim", "column_2", "column_3")));
        Assert.assertEquals(Collections.singletonList("val"), segment.getMetrics());
        Assert.assertEquals(Intervals.of("2014/P1D"), segment.getInterval());
    }
}

Also used : DataSegment(org.apache.druid.timeline.DataSegment) BufferedWriter(java.io.BufferedWriter) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) File(java.io.File) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

IndexTuningConfig (org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig)25 Test (org.junit.Test)24 IndexIngestionSpec (org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec)16 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)14 IndexSpec (org.apache.druid.segment.IndexSpec)14 File (java.io.File)12 IndexIOConfig (org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig)12 DataSchema (org.apache.druid.segment.indexing.DataSchema)12 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)12 DataSegment (org.apache.druid.timeline.DataSegment)12 BufferedWriter (java.io.BufferedWriter)10 CSVParseSpec (org.apache.druid.data.input.impl.CSVParseSpec)10 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)10 TaskStatus (org.apache.druid.indexer.TaskStatus)10 HashedPartitionsSpec (org.apache.druid.indexer.partitions.HashedPartitionsSpec)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)7 TypeReference (com.fasterxml.jackson.core.type.TypeReference)6 Preconditions (com.google.common.base.Preconditions)6 ImmutableList (com.google.common.collect.ImmutableList)6 ImmutableMap (com.google.common.collect.ImmutableMap)6