use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.
the class DecimalParquetReaderTest method testReadParquetDecimali32.
@Test
public void testReadParquetDecimali32() throws IOException {
final String file = "example/decimals/dec-in-i32.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("i32_dec"))), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "i32_dec", "i32_dec"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric1", "$.i32_dec"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(1).getTimestamp().toString());
Assert.assertEquals("100", rows.get(1).getDimension("i32_dec").get(0));
Assert.assertEquals(new BigDecimal(100), rows.get(1).getMetric("metric1"));
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"i32_dec\" : 100\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(1).getRawValues()));
}
use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.
the class FlattenSpecParquetReaderTest method testNested1Flatten.
@Test
public void testNested1Flatten() throws IOException {
final String file = "example/flattening/test_nested_1.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "timestamp", null), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "dim1", null), new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim2", "$.nestedData.dim2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "dim3", "$.nestedData.dim3"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric2", "$.nestedData.metric2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "listDim", "$.nestedData.listDim[*]"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
Assert.assertEquals("d2v1", rows.get(0).getDimension("dim2").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("dim3").get(0));
Assert.assertEquals("listDim1v1", rows.get(0).getDimension("listDim").get(0));
Assert.assertEquals("listDim1v2", rows.get(0).getDimension("listDim").get(1));
Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
Assert.assertEquals(2, rows.get(0).getMetric("metric2").longValue());
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
Assert.assertEquals(NESTED_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.
the class OrcReaderTest method testJsonPathFunctions.
/**
* schema: struct<string1:string, list:array<int>, ts:timestamp>
* data: {"dim1","[7,8,9]","2000-03-12 15:00:00"}
*/
@Test
public void testJsonPathFunctions() throws IOException {
final OrcInputFormat inputFormat = new OrcInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "min", "$.list.min()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "max", "$.list.max()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "avg", "$.list.avg()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "len", "$.list.length()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "sum", "$.list.sum()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "stddev", "$.list.stddev()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "append", "$.list.append(10)"))), null, new Configuration());
final InputEntityReader reader = createReader(new TimestampSpec("ts", "millis", null), new DimensionsSpec(null), inputFormat, "example/test_json_path_functions.orc");
try (CloseableIterator<InputRow> iterator = reader.read()) {
int actualRowCount = 0;
while (iterator.hasNext()) {
final InputRow row = iterator.next();
actualRowCount++;
Assert.assertEquals("7.0", Iterables.getOnlyElement(row.getDimension("min")));
Assert.assertEquals("8.0", Iterables.getOnlyElement(row.getDimension("avg")));
Assert.assertEquals("9.0", Iterables.getOnlyElement(row.getDimension("max")));
Assert.assertEquals("24.0", Iterables.getOnlyElement(row.getDimension("sum")));
Assert.assertEquals("3", Iterables.getOnlyElement(row.getDimension("len")));
// deviation of [7,8,9] is 1/3, stddev is sqrt(1/3), approximately 0.8165
Assert.assertEquals(0.8165, Double.parseDouble(Iterables.getOnlyElement(row.getDimension("stddev"))), 0.0001);
// append is not supported
Assert.assertEquals(Collections.emptyList(), row.getDimension("append"));
}
Assert.assertEquals(1, actualRowCount);
}
}
use of org.apache.druid.java.util.common.parsers.JSONPathFieldSpec in project druid by druid-io.
the class KafkaInputFormatTest method testWithOutKeyAndHeaderSpecs.
@Test
public void testWithOutKeyAndHeaderSpecs() throws IOException {
final byte[] payload = StringUtils.toUtf8("{\n" + " \"timestamp\": \"2021-06-24\",\n" + " \"bar\": null,\n" + " \"foo\": \"x\",\n" + " \"baz\": 4,\n" + " \"o\": {\n" + " \"mg\": 1\n" + " }\n" + "}");
Headers headers = new RecordHeaders(SAMPLE_HEADERS);
inputEntity = new KafkaRecordEntity(new ConsumerRecord<byte[], byte[]>("sample", 0, 0, timestamp, null, null, 0, 0, null, payload, headers));
KafkaInputFormat localFormat = new KafkaInputFormat(null, null, // Value Format
new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), // make sure JsonReader is used
null, // make sure JsonReader is used
null, // make sure JsonReader is used
false), "kafka.newheader.", "kafka.newkey.", "kafka.newts.");
final InputEntityReader reader = localFormat.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo", "kafka.newts.timestamp"))), ColumnsFilter.all()), inputEntity, null);
final int numExpectedIterations = 1;
try (CloseableIterator<InputRow> iterator = reader.read()) {
int numActualIterations = 0;
while (iterator.hasNext()) {
final InputRow row = iterator.next();
// Key verification
Assert.assertTrue(row.getDimension("kafka.newkey.key").isEmpty());
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
numActualIterations++;
}
Assert.assertEquals(numExpectedIterations, numActualIterations);
}
}
Aggregations