Search in sources :

Example 96 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class WikiParquetReaderTest method testWiki.

@Test
public void testWiki() throws IOException {
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("page", "language", "user", "unpatrolled"))), ColumnsFilter.all());
    InputEntityReader reader = createReader("example/wiki/wiki.parquet", schema, JSONPathSpec.DEFAULT);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("Gypsy Danger", rows.get(0).getDimension("page").get(0));
    String s1 = rows.get(0).getDimension("language").get(0);
    String s2 = rows.get(0).getDimension("language").get(1);
    Assert.assertEquals("en", s1);
    Assert.assertEquals("zh", s2);
    reader = createReader("example/wiki/wiki.parquet", schema, JSONPathSpec.DEFAULT);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"continent\" : \"North America\",\n" + "  \"country\" : \"United States\",\n" + "  \"added\" : 57,\n" + "  \"city\" : \"San Francisco\",\n" + "  \"unpatrolled\" : \"true\",\n" + "  \"delta\" : -143,\n" + "  \"language\" : [ \"en\", \"zh\" ],\n" + "  \"robot\" : \"false\",\n" + "  \"deleted\" : 200,\n" + "  \"newPage\" : \"true\",\n" + "  \"namespace\" : \"article\",\n" + "  \"anonymous\" : \"false\",\n" + "  \"page\" : \"Gypsy Danger\",\n" + "  \"region\" : \"Bay Area\",\n" + "  \"user\" : \"nuclear\",\n" + "  \"timestamp\" : \"2013-08-31T01:02:33Z\"\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 97 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class DruidSegmentReaderTest method testReaderWithInclusiveColumnsFilterNoTimestamp.

@Test
public void testReaderWithInclusiveColumnsFilterNoTimestamp() throws IOException {
    final DruidSegmentReader reader = new DruidSegmentReader(makeInputEntity(Intervals.of("2000/P1D")), indexIO, new TimestampSpec("__time", "millis", DateTimes.of("1971")), new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("s"), new DoubleDimensionSchema("d"))), ColumnsFilter.inclusionBased(ImmutableSet.of("s", "d")), null, temporaryFolder.newFolder());
    Assert.assertEquals(ImmutableList.of(new MapBasedInputRow(DateTimes.of("1971"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("s", "foo").put("d", 1.23d).build()), new MapBasedInputRow(DateTimes.of("1971"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("s", "bar").put("d", 4.56d).build())), readRows(reader));
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) NullHandlingTest(org.apache.druid.common.config.NullHandlingTest) Test(org.junit.Test)

Example 98 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class DruidSegmentReaderTest method testReaderWithFilter.

@Test
public void testReaderWithFilter() throws IOException {
    final DruidSegmentReader reader = new DruidSegmentReader(makeInputEntity(Intervals.of("2000/P1D")), indexIO, new TimestampSpec("__time", "millis", DateTimes.of("1971")), new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("s"), new DoubleDimensionSchema("d"))), ColumnsFilter.all(), new SelectorDimFilter("d", "1.23", null), temporaryFolder.newFolder());
    Assert.assertEquals(ImmutableList.of(new MapBasedInputRow(DateTimes.of("2000"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("__time", DateTimes.of("2000T").getMillis()).put("s", "foo").put("d", 1.23d).put("cnt", 1L).put("met_s", makeHLLC("foo")).build())), readRows(reader));
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) NullHandlingTest(org.apache.druid.common.config.NullHandlingTest) Test(org.junit.Test)

Example 99 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class DruidSegmentReaderTest method setUp.

@Before
public void setUp() throws IOException {
    // Write a segment with two rows in it, with columns: s (string), d (double), cnt (long), met_s (complex).
    final IncrementalIndex incrementalIndex = IndexBuilder.create().schema(new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("s"), new DoubleDimensionSchema("d")))).withMetrics(new CountAggregatorFactory("cnt"), new HyperUniquesAggregatorFactory("met_s", "s")).withRollup(false).build()).rows(ImmutableList.of(new MapBasedInputRow(DateTimes.of("2000"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("s", "foo").put("d", 1.23).build()), new MapBasedInputRow(DateTimes.of("2000T01"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("s", "bar").put("d", 4.56).build()))).buildIncrementalIndex();
    segmentDirectory = temporaryFolder.newFolder();
    try {
        TestHelper.getTestIndexMergerV9(OnHeapMemorySegmentWriteOutMediumFactory.instance()).persist(incrementalIndex, segmentDirectory, new IndexSpec(), null);
    } finally {
        incrementalIndex.close();
    }
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) IndexSpec(org.apache.druid.segment.IndexSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) Before(org.junit.Before)

Example 100 with DimensionsSpec

use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.

the class DruidSegmentReaderTest method testReaderAutoTimestampFormat.

@Test
public void testReaderAutoTimestampFormat() throws IOException {
    final DruidSegmentReader reader = new DruidSegmentReader(makeInputEntity(Intervals.of("2000/P1D")), indexIO, new TimestampSpec("__time", "auto", DateTimes.of("1971")), new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("s"), new DoubleDimensionSchema("d"))), ColumnsFilter.all(), null, temporaryFolder.newFolder());
    Assert.assertEquals(ImmutableList.of(new MapBasedInputRow(DateTimes.of("2000"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("__time", DateTimes.of("2000T").getMillis()).put("s", "foo").put("d", 1.23d).put("cnt", 1L).put("met_s", makeHLLC("foo")).build()), new MapBasedInputRow(DateTimes.of("2000T01"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("__time", DateTimes.of("2000T01").getMillis()).put("s", "bar").put("d", 4.56d).put("cnt", 1L).put("met_s", makeHLLC("bar")).build())), readRows(reader));
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) NullHandlingTest(org.apache.druid.common.config.NullHandlingTest) Test(org.junit.Test)

Aggregations

DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)169 Test (org.junit.Test)129 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)114 InputRow (org.apache.druid.data.input.InputRow)52 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)47 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)47 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)42 DataSchema (org.apache.druid.segment.indexing.DataSchema)39 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)37 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)37 InputRowSchema (org.apache.druid.data.input.InputRowSchema)36 Map (java.util.Map)32 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)32 InputEntityReader (org.apache.druid.data.input.InputEntityReader)31 ArrayList (java.util.ArrayList)29 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)25 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)24 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)24 HashMap (java.util.HashMap)23 ImmutableMap (com.google.common.collect.ImmutableMap)21