use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.
the class WikiParquetReaderTest method testWiki.
@Test
public void testWiki() throws IOException {
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("page", "language", "user", "unpatrolled"))), ColumnsFilter.all());
InputEntityReader reader = createReader("example/wiki/wiki.parquet", schema, JSONPathSpec.DEFAULT);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("Gypsy Danger", rows.get(0).getDimension("page").get(0));
String s1 = rows.get(0).getDimension("language").get(0);
String s2 = rows.get(0).getDimension("language").get(1);
Assert.assertEquals("en", s1);
Assert.assertEquals("zh", s2);
reader = createReader("example/wiki/wiki.parquet", schema, JSONPathSpec.DEFAULT);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"continent\" : \"North America\",\n" + " \"country\" : \"United States\",\n" + " \"added\" : 57,\n" + " \"city\" : \"San Francisco\",\n" + " \"unpatrolled\" : \"true\",\n" + " \"delta\" : -143,\n" + " \"language\" : [ \"en\", \"zh\" ],\n" + " \"robot\" : \"false\",\n" + " \"deleted\" : 200,\n" + " \"newPage\" : \"true\",\n" + " \"namespace\" : \"article\",\n" + " \"anonymous\" : \"false\",\n" + " \"page\" : \"Gypsy Danger\",\n" + " \"region\" : \"Bay Area\",\n" + " \"user\" : \"nuclear\",\n" + " \"timestamp\" : \"2013-08-31T01:02:33Z\"\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.
the class DruidSegmentReaderTest method testReaderWithInclusiveColumnsFilterNoTimestamp.
@Test
public void testReaderWithInclusiveColumnsFilterNoTimestamp() throws IOException {
final DruidSegmentReader reader = new DruidSegmentReader(makeInputEntity(Intervals.of("2000/P1D")), indexIO, new TimestampSpec("__time", "millis", DateTimes.of("1971")), new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("s"), new DoubleDimensionSchema("d"))), ColumnsFilter.inclusionBased(ImmutableSet.of("s", "d")), null, temporaryFolder.newFolder());
Assert.assertEquals(ImmutableList.of(new MapBasedInputRow(DateTimes.of("1971"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("s", "foo").put("d", 1.23d).build()), new MapBasedInputRow(DateTimes.of("1971"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("s", "bar").put("d", 4.56d).build())), readRows(reader));
}
use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.
the class DruidSegmentReaderTest method testReaderWithFilter.
@Test
public void testReaderWithFilter() throws IOException {
final DruidSegmentReader reader = new DruidSegmentReader(makeInputEntity(Intervals.of("2000/P1D")), indexIO, new TimestampSpec("__time", "millis", DateTimes.of("1971")), new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("s"), new DoubleDimensionSchema("d"))), ColumnsFilter.all(), new SelectorDimFilter("d", "1.23", null), temporaryFolder.newFolder());
Assert.assertEquals(ImmutableList.of(new MapBasedInputRow(DateTimes.of("2000"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("__time", DateTimes.of("2000T").getMillis()).put("s", "foo").put("d", 1.23d).put("cnt", 1L).put("met_s", makeHLLC("foo")).build())), readRows(reader));
}
use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.
the class DruidSegmentReaderTest method setUp.
@Before
public void setUp() throws IOException {
// Write a segment with two rows in it, with columns: s (string), d (double), cnt (long), met_s (complex).
final IncrementalIndex incrementalIndex = IndexBuilder.create().schema(new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("s"), new DoubleDimensionSchema("d")))).withMetrics(new CountAggregatorFactory("cnt"), new HyperUniquesAggregatorFactory("met_s", "s")).withRollup(false).build()).rows(ImmutableList.of(new MapBasedInputRow(DateTimes.of("2000"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("s", "foo").put("d", 1.23).build()), new MapBasedInputRow(DateTimes.of("2000T01"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("s", "bar").put("d", 4.56).build()))).buildIncrementalIndex();
segmentDirectory = temporaryFolder.newFolder();
try {
TestHelper.getTestIndexMergerV9(OnHeapMemorySegmentWriteOutMediumFactory.instance()).persist(incrementalIndex, segmentDirectory, new IndexSpec(), null);
} finally {
incrementalIndex.close();
}
}
use of org.apache.druid.data.input.impl.DimensionsSpec in project druid by druid-io.
the class DruidSegmentReaderTest method testReaderAutoTimestampFormat.
@Test
public void testReaderAutoTimestampFormat() throws IOException {
final DruidSegmentReader reader = new DruidSegmentReader(makeInputEntity(Intervals.of("2000/P1D")), indexIO, new TimestampSpec("__time", "auto", DateTimes.of("1971")), new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("s"), new DoubleDimensionSchema("d"))), ColumnsFilter.all(), null, temporaryFolder.newFolder());
Assert.assertEquals(ImmutableList.of(new MapBasedInputRow(DateTimes.of("2000"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("__time", DateTimes.of("2000T").getMillis()).put("s", "foo").put("d", 1.23d).put("cnt", 1L).put("met_s", makeHLLC("foo")).build()), new MapBasedInputRow(DateTimes.of("2000T01"), ImmutableList.of("s", "d"), ImmutableMap.<String, Object>builder().put("__time", DateTimes.of("2000T01").getMillis()).put("s", "bar").put("d", 4.56d).put("cnt", 1L).put("met_s", makeHLLC("bar")).build())), readRows(reader));
}
Aggregations