use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.
the class CompatParquetReaderTest method testProtoStructWithArray.
@Test
public void testProtoStructWithArray() throws IOException {
final String file = "example/compat/proto-struct-with-array.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "extractedOptional", "$.optionalMessage.someId"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "extractedRequired", "$.requiredMessage.someId"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "extractedRepeated", "$.repeatedMessage[*]"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
Assert.assertEquals("10", rows.get(0).getDimension("optionalPrimitive").get(0));
Assert.assertEquals("9", rows.get(0).getDimension("requiredPrimitive").get(0));
Assert.assertTrue(rows.get(0).getDimension("repeatedPrimitive").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("extractedOptional").isEmpty());
Assert.assertEquals("9", rows.get(0).getDimension("extractedRequired").get(0));
Assert.assertEquals("9", rows.get(0).getDimension("extractedRepeated").get(0));
Assert.assertEquals("10", rows.get(0).getDimension("extractedRepeated").get(1));
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"optionalMessage\" : { },\n" + " \"requiredPrimitive\" : 9,\n" + " \"repeatedPrimitive\" : { },\n" + " \"repeatedMessage\" : [ 9, 10 ],\n" + " \"optionalPrimitive\" : 10,\n" + " \"requiredMessage\" : {\n" + " \"someId\" : 9\n" + " }\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.
the class CompatParquetReaderTest method testParquet1217.
@Test
public void testParquet1217() throws IOException {
final String file = "example/compat/parquet-1217.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "col", "col"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric1", "$.col"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
Assert.assertEquals("-1", rows.get(0).getDimension("col").get(0));
Assert.assertEquals(-1, rows.get(0).getMetric("metric1"));
Assert.assertTrue(rows.get(4).getDimension("col").isEmpty());
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"col\" : -1\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.
the class DecimalParquetReaderTest method testReadParquetDecimali32.
@Test
public void testReadParquetDecimali32() throws IOException {
final String file = "example/decimals/dec-in-i32.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("i32_dec"))), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "i32_dec", "i32_dec"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric1", "$.i32_dec"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
Assert.assertEquals("100", rows.get(1).getDimension("i32_dec").get(0));
Assert.assertEquals(new BigDecimal(100), rows.get(1).getMetric("metric1"));
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"i32_dec\" : 100\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(1).getRawValues()));
}
use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.
the class FlattenSpecParquetReaderTest method testNested1Flatten.
@Test
public void testNested1Flatten() throws IOException {
final String file = "example/flattening/test_nested_1.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim1", null), new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim2", "$.nestedData.dim2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim3", "$.nestedData.dim3"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric2", "$.nestedData.metric2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "listDim", "$.nestedData.listDim[*]"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
Assert.assertEquals("listDim1v1", rows.get(0).getDimension("listDim").get(0));
Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listDim").get(1));
Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
Assert.assertEquals(2, rows.get(0).getMetric("metric2").longValue());
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
Assert.assertEquals(NESTED_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.java.util.common.parsers.JSONPathSpec in project druid by druid-io.
the class FlattenSpecParquetReaderTest method testNested1NoFlattenSpec.
@Test
public void testNested1NoFlattenSpec() throws IOException {
final String file = "example/flattening/test_nested_1.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1"))), ColumnsFilter.all());
JSONPathSpec flattenSpec = new JSONPathSpec(false, ImmutableList.of());
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
List<String> dims = rows.get(0).getDimensions();
Assert.assertEquals(1, dims.size());
Assert.assertFalse(dims.contains("dim2"));
Assert.assertFalse(dims.contains("dim3"));
Assert.assertFalse(dims.contains("listDim"));
Assert.assertFalse(dims.contains("nestedData"));
Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
Assert.assertEquals(NESTED_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Aggregations