Search in sources :

Example 21 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class TimestampsParquetReaderTest method testParseInt96Timestamp.

@Test
public void testParseInt96Timestamp() throws IOException {
    // the source parquet file was found in apache spark sql repo tests, where it is known as impala_timestamp.parq
    // it has a single column, "ts" which is an int96 timestamp
    final String file = "example/timestamps/int96_timestamp.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("ts", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
    InputEntityReader reader = createReader(file, schema, JSONPathSpec.DEFAULT);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("2001-01-01T01:01:01.000Z", rows.get(0).getTimestamp().toString());
    reader = createReader(file, schema, JSONPathSpec.DEFAULT);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"ts\" : 978310861000\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 22 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class TimestampsParquetReaderTest method testTimeMillisInInt64.

@Test
public void testTimeMillisInInt64() throws IOException {
    final String file = "example/timestamps/timemillis-in-i64.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
    InputEntityReader reader = createReader(file, schema, JSONPathSpec.DEFAULT);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("1970-01-01T00:00:00.010Z", rows.get(0).getTimestamp().toString());
    reader = createReader(file, schema, JSONPathSpec.DEFAULT);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"time\" : 10\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 23 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class WikiParquetReaderTest method testWiki.

@Test
public void testWiki() throws IOException {
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("page", "language", "user", "unpatrolled"))), ColumnsFilter.all());
    InputEntityReader reader = createReader("example/wiki/wiki.parquet", schema, JSONPathSpec.DEFAULT);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("Gypsy Danger", rows.get(0).getDimension("page").get(0));
    String s1 = rows.get(0).getDimension("language").get(0);
    String s2 = rows.get(0).getDimension("language").get(1);
    Assert.assertEquals("en", s1);
    Assert.assertEquals("zh", s2);
    reader = createReader("example/wiki/wiki.parquet", schema, JSONPathSpec.DEFAULT);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"continent\" : \"North America\",\n" + "  \"country\" : \"United States\",\n" + "  \"added\" : 57,\n" + "  \"city\" : \"San Francisco\",\n" + "  \"unpatrolled\" : \"true\",\n" + "  \"delta\" : -143,\n" + "  \"language\" : [ \"en\", \"zh\" ],\n" + "  \"robot\" : \"false\",\n" + "  \"deleted\" : 200,\n" + "  \"newPage\" : \"true\",\n" + "  \"namespace\" : \"article\",\n" + "  \"anonymous\" : \"false\",\n" + "  \"page\" : \"Gypsy Danger\",\n" + "  \"region\" : \"Bay Area\",\n" + "  \"user\" : \"nuclear\",\n" + "  \"timestamp\" : \"2013-08-31T01:02:33Z\"\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 24 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class DecimalParquetReaderTest method testReadParquetDecimali32.

@Test
public void testReadParquetDecimali32() throws IOException {
    final String file = "example/decimals/dec-in-i32.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("i32_dec"))), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "i32_dec", "i32_dec"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric1", "$.i32_dec"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
    Assert.assertEquals("100", rows.get(1).getDimension("i32_dec").get(0));
    Assert.assertEquals(new BigDecimal(100), rows.get(1).getMetric("metric1"));
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"i32_dec\" : 100\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(1).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) BigDecimal(java.math.BigDecimal) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 25 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class FlattenSpecParquetReaderTest method testNested1Flatten.

@Test
public void testNested1Flatten() throws IOException {
    final String file = "example/flattening/test_nested_1.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim1", null), new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim2", "$.nestedData.dim2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim3", "$.nestedData.dim3"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric2", "$.nestedData.metric2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "listDim", "$.nestedData.listDim[*]"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
    Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
    Assert.assertEquals("listDim1v1", rows.get(0).getDimension("listDim").get(0));
    Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listDim").get(1));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
    Assert.assertEquals(2, rows.get(0).getMetric("metric2").longValue());
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    Assert.assertEquals(NESTED_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Aggregations

InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)34 Test (org.junit.Test)31 InputRow (org.apache.druid.data.input.InputRow)29 InputEntityReader (org.apache.druid.data.input.InputEntityReader)26 InputRowSchema (org.apache.druid.data.input.InputRowSchema)24 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)21 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)21 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)17 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)15 HashMap (java.util.HashMap)4 BigDecimal (java.math.BigDecimal)3 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 File (java.io.File)2 AvroHadoopInputRowParserTest (org.apache.druid.data.input.AvroHadoopInputRowParserTest)2 AvroStreamInputRowParserTest (org.apache.druid.data.input.AvroStreamInputRowParserTest)2 InputSourceReader (org.apache.druid.data.input.InputSourceReader)2 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)2 ImmutableMap (com.google.common.collect.ImmutableMap)1