Search in sources :

Example 66 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class RecordSupplierInputSourceTest method testRead.

@Test
public void testRead() throws IOException {
    final RandomCsvSupplier supplier = new RandomCsvSupplier();
    final InputSource inputSource = new RecordSupplierInputSource<>("topic", supplier, false);
    final List<String> colNames = IntStream.range(0, NUM_COLS).mapToObj(i -> StringUtils.format("col_%d", i)).collect(Collectors.toList());
    final InputFormat inputFormat = new CsvInputFormat(colNames, null, null, false, 0);
    final InputSourceReader reader = inputSource.reader(new InputRowSchema(new TimestampSpec("col_0", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(colNames.subList(1, colNames.size()))), ColumnsFilter.all()), inputFormat, temporaryFolder.newFolder());
    int read = 0;
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        for (; read < NUM_ROWS && iterator.hasNext(); read++) {
            final InputRow inputRow = iterator.next();
            Assert.assertEquals(DateTimes.of(TIMESTAMP_STRING), inputRow.getTimestamp());
            Assert.assertEquals(NUM_COLS - 1, inputRow.getDimensions().size());
        }
    }
    Assert.assertEquals(NUM_ROWS, read);
    Assert.assertTrue(supplier.isClosed());
}
Also used : IntStream(java.util.stream.IntStream) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) RecordSupplier(org.apache.druid.indexing.seekablestream.common.RecordSupplier) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) Random(java.util.Random) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) ArrayList(java.util.ArrayList) InputSource(org.apache.druid.data.input.InputSource) Map(java.util.Map) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) InputSourceReader(org.apache.druid.data.input.InputSourceReader) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) Nullable(javax.annotation.Nullable) DateTimes(org.apache.druid.java.util.common.DateTimes) ColumnsFilter(org.apache.druid.data.input.ColumnsFilter) InputFormat(org.apache.druid.data.input.InputFormat) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IOException(java.io.IOException) NotNull(javax.validation.constraints.NotNull) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) Rule(org.junit.Rule) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) Assert(org.junit.Assert) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) InputSource(org.apache.druid.data.input.InputSource) InputSourceReader(org.apache.druid.data.input.InputSourceReader) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 67 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class InputSourceSamplerTest method testWithTransformsAutoDimensions.

@Test
public void testWithTransformsAutoDimensions() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final TransformSpec transformSpec = new TransformSpec(null, ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1, 'bar')", TestExprMacroTable.INSTANCE)));
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(4, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(2));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(3));
}
Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 68 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class InputSourceSamplerTest method testWithNoRollup.

@Test
public void testWithNoRollup() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, false, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, null);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(6, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 1L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 2L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 3L).build(), null, null), data.get(2));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(3));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(4));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(5));
}
Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 69 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class DataSegmentTest method testWithLastCompactionState.

@Test
public void testWithLastCompactionState() {
    final CompactionState compactionState = new CompactionState(new DynamicPartitionsSpec(null, null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ImmutableList.of(ImmutableMap.of("type", "count", "name", "count")), ImmutableMap.of("filter", ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo")), Collections.singletonMap("test", "map"), Collections.singletonMap("test2", "map2"));
    final DataSegment segment1 = DataSegment.builder().dataSource("foo").interval(Intervals.of("2012-01-01/2012-01-02")).version(DateTimes.of("2012-01-01T11:22:33.444Z").toString()).shardSpec(getShardSpec(7)).size(0).lastCompactionState(compactionState).build();
    final DataSegment segment2 = DataSegment.builder().dataSource("foo").interval(Intervals.of("2012-01-01/2012-01-02")).version(DateTimes.of("2012-01-01T11:22:33.444Z").toString()).shardSpec(getShardSpec(7)).size(0).build();
    Assert.assertEquals(segment1, segment2.withLastCompactionState(compactionState));
}
Also used : DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test)

Example 70 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class DataSegmentTest method testV1Serialization.

@Test
public void testV1Serialization() throws Exception {
    final Interval interval = Intervals.of("2011-10-01/2011-10-02");
    final ImmutableMap<String, Object> loadSpec = ImmutableMap.of("something", "or_other");
    DataSegment segment = new DataSegment("something", interval, "1", loadSpec, Arrays.asList("dim1", "dim2"), Arrays.asList("met1", "met2"), new NumberedShardSpec(3, 0), new CompactionState(new HashedPartitionsSpec(100000, null, ImmutableList.of("dim1")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "bar", "foo"))), ImmutableList.of(ImmutableMap.of("type", "count", "name", "count")), ImmutableMap.of("filter", ImmutableMap.of("type", "selector", "dimension", "dim1", "value", "foo")), ImmutableMap.of(), ImmutableMap.of()), TEST_VERSION, 1);
    final Map<String, Object> objectMap = MAPPER.readValue(MAPPER.writeValueAsString(segment), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
    Assert.assertEquals(11, objectMap.size());
    Assert.assertEquals("something", objectMap.get("dataSource"));
    Assert.assertEquals(interval.toString(), objectMap.get("interval"));
    Assert.assertEquals("1", objectMap.get("version"));
    Assert.assertEquals(loadSpec, objectMap.get("loadSpec"));
    Assert.assertEquals("dim1,dim2", objectMap.get("dimensions"));
    Assert.assertEquals("met1,met2", objectMap.get("metrics"));
    Assert.assertEquals(ImmutableMap.of("type", "numbered", "partitionNum", 3, "partitions", 0), objectMap.get("shardSpec"));
    Assert.assertEquals(TEST_VERSION, objectMap.get("binaryVersion"));
    Assert.assertEquals(1, objectMap.get("size"));
    Assert.assertEquals(6, ((Map) objectMap.get("lastCompactionState")).size());
    DataSegment deserializedSegment = MAPPER.readValue(MAPPER.writeValueAsString(segment), DataSegment.class);
    Assert.assertEquals(segment.getDataSource(), deserializedSegment.getDataSource());
    Assert.assertEquals(segment.getInterval(), deserializedSegment.getInterval());
    Assert.assertEquals(segment.getVersion(), deserializedSegment.getVersion());
    Assert.assertEquals(segment.getLoadSpec(), deserializedSegment.getLoadSpec());
    Assert.assertEquals(segment.getDimensions(), deserializedSegment.getDimensions());
    Assert.assertEquals(segment.getMetrics(), deserializedSegment.getMetrics());
    Assert.assertEquals(segment.getShardSpec(), deserializedSegment.getShardSpec());
    Assert.assertEquals(segment.getSize(), deserializedSegment.getSize());
    Assert.assertEquals(segment.getId(), deserializedSegment.getId());
    Assert.assertEquals(segment.getLastCompactionState(), deserializedSegment.getLastCompactionState());
    deserializedSegment = MAPPER.readValue(MAPPER.writeValueAsString(segment), DataSegment.class);
    Assert.assertEquals(0, segment.compareTo(deserializedSegment));
    deserializedSegment = MAPPER.readValue(MAPPER.writeValueAsString(segment), DataSegment.class);
    Assert.assertEquals(0, deserializedSegment.compareTo(segment));
    deserializedSegment = MAPPER.readValue(MAPPER.writeValueAsString(segment), DataSegment.class);
    Assert.assertEquals(segment.hashCode(), deserializedSegment.hashCode());
}
Also used : HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Interval(org.joda.time.Interval) Test(org.junit.Test)

Aggregations

DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)169 Test (org.junit.Test)129 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)114 InputRow (org.apache.druid.data.input.InputRow)52 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)47 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)47 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)42 DataSchema (org.apache.druid.segment.indexing.DataSchema)39 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)37 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)37 InputRowSchema (org.apache.druid.data.input.InputRowSchema)36 Map (java.util.Map)32 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)32 InputEntityReader (org.apache.druid.data.input.InputEntityReader)31 ArrayList (java.util.ArrayList)29 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)25 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)24 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)24 HashMap (java.util.HashMap)23 ImmutableMap (com.google.common.collect.ImmutableMap)21