Search in sources :

Example 51 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class CompatParquetReaderTest method testParquet1217.

@Test
public void testParquet1217() throws IOException {
    final String file = "example/compat/parquet-1217.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "col", "col"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric1", "$.col"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
    Assert.assertEquals("-1", rows.get(0).getDimension("col").get(0));
    Assert.assertEquals(-1, rows.get(0).getMetric("metric1"));
    Assert.assertTrue(rows.get(4).getDimension("col").isEmpty());
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"col\" : -1\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 52 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class DecimalParquetReaderTest method testReadParquetDecimali32.

@Test
public void testReadParquetDecimali32() throws IOException {
    final String file = "example/decimals/dec-in-i32.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("i32_dec"))), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "i32_dec", "i32_dec"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric1", "$.i32_dec"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
    Assert.assertEquals("100", rows.get(1).getDimension("i32_dec").get(0));
    Assert.assertEquals(new BigDecimal(100), rows.get(1).getMetric("metric1"));
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    final String expectedJson = "{\n" + "  \"i32_dec\" : 100\n" + "}";
    Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(1).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) BigDecimal(java.math.BigDecimal) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 53 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class FlattenSpecParquetReaderTest method testNested1Flatten.

@Test
public void testNested1Flatten() throws IOException {
    final String file = "example/flattening/test_nested_1.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
    List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim1", null), new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim2", "$.nestedData.dim2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim3", "$.nestedData.dim3"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric2", "$.nestedData.metric2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "listDim", "$.nestedData.listDim[*]"));
    JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
    Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
    Assert.assertEquals("listDim1v1", rows.get(0).getDimension("listDim").get(0));
    Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listDim").get(1));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
    Assert.assertEquals(2, rows.get(0).getMetric("metric2").longValue());
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    Assert.assertEquals(NESTED_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 54 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class FlattenSpecParquetReaderTest method testNested1NoFlattenSpec.

@Test
public void testNested1NoFlattenSpec() throws IOException {
    final String file = "example/flattening/test_nested_1.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1"))), ColumnsFilter.all());
    JSONPathSpec flattenSpec = new JSONPathSpec(false, ImmutableList.of());
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    List<String> dims = rows.get(0).getDimensions();
    Assert.assertEquals(1, dims.size());
    Assert.assertFalse(dims.contains("dim2"));
    Assert.assertFalse(dims.contains("dim3"));
    Assert.assertFalse(dims.contains("listDim"));
    Assert.assertFalse(dims.contains("nestedData"));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    Assert.assertEquals(NESTED_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 55 with InputEntityReader

use of org.apache.druid.data.input.InputEntityReader in project druid by druid-io.

the class OrcReaderTest method testJsonPathFunctions.

/**
 * schema: struct<string1:string, list:array<int>, ts:timestamp>
 * data:   {"dim1","[7,8,9]","2000-03-12 15:00:00"}
 */
@Test
public void testJsonPathFunctions() throws IOException {
    final OrcInputFormat inputFormat = new OrcInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "min", "$.list.min()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "max", "$.list.max()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "avg", "$.list.avg()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "len", "$.list.length()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "sum", "$.list.sum()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "stddev", "$.list.stddev()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "append", "$.list.append(10)"))), null, new Configuration());
    final InputEntityReader reader = createReader(new TimestampSpec("ts", "millis", null), new DimensionsSpec(null), inputFormat, "example/test_json_path_functions.orc");
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int actualRowCount = 0;
        while (iterator.hasNext()) {
            final InputRow row = iterator.next();
            actualRowCount++;
            Assert.assertEquals("7.0", Iterables.getOnlyElement(row.getDimension("min")));
            Assert.assertEquals("8.0", Iterables.getOnlyElement(row.getDimension("avg")));
            Assert.assertEquals("9.0", Iterables.getOnlyElement(row.getDimension("max")));
            Assert.assertEquals("24.0", Iterables.getOnlyElement(row.getDimension("sum")));
            Assert.assertEquals("3", Iterables.getOnlyElement(row.getDimension("len")));
            // deviation of [7,8,9] is 1/3, stddev is sqrt(1/3), approximately 0.8165
            Assert.assertEquals(0.8165, Double.parseDouble(Iterables.getOnlyElement(row.getDimension("stddev"))), 0.0001);
            // append is not supported
            Assert.assertEquals(Collections.emptyList(), row.getDimension("append"));
        }
        Assert.assertEquals(1, actualRowCount);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Aggregations

InputEntityReader (org.apache.druid.data.input.InputEntityReader)58 Test (org.junit.Test)56 InputRow (org.apache.druid.data.input.InputRow)54 InputRowSchema (org.apache.druid.data.input.InputRowSchema)39 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)33 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)33 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)31 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)28 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)26 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)8 Configuration (org.apache.hadoop.conf.Configuration)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 AvroHadoopInputRowParserTest (org.apache.druid.data.input.AvroHadoopInputRowParserTest)5 AvroStreamInputRowParserTest (org.apache.druid.data.input.AvroStreamInputRowParserTest)5 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)5 KafkaRecordEntity (org.apache.druid.data.input.kafka.KafkaRecordEntity)4 ConsumerRecord (org.apache.kafka.clients.consumer.ConsumerRecord)4 Headers (org.apache.kafka.common.header.Headers)4 RecordHeaders (org.apache.kafka.common.header.internals.RecordHeaders)4 BigDecimal (java.math.BigDecimal)3