use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class JsonReaderTest method testParseMultipleRows.
@Test
public void testParseMultipleRows() throws IOException {
final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
false);
final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":1}}" + "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":2}}\n" + "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":3}}\n"));
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
final int numExpectedIterations = 3;
try (CloseableIterator<InputRow> iterator = reader.read()) {
int numActualIterations = 0;
while (iterator.hasNext()) {
final InputRow row = iterator.next();
final String msgId = String.valueOf(++numActualIterations);
Assert.assertEquals(DateTimes.of("2019-01-01"), row.getTimestamp());
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("path_omg")));
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("jq_omg")));
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
}
Assert.assertEquals(numExpectedIterations, numActualIterations);
}
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class JsonReaderTest method testSampleMultipleRows.
@Test
public void testSampleMultipleRows() throws IOException {
final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
false);
final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":1}}" + "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":2}}\n" + "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":3}}\n"));
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
int acturalRowCount = 0;
try (CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample()) {
while (iterator.hasNext()) {
final InputRowListPlusRawValues rawValues = iterator.next();
// 3 rows returned together
Assert.assertEquals(3, rawValues.getInputRows().size());
for (int i = 0; i < 3; i++) {
InputRow row = rawValues.getInputRows().get(i);
final String msgId = String.valueOf(++acturalRowCount);
Assert.assertEquals(DateTimes.of("2019-01-01"), row.getTimestamp());
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("path_omg")));
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("jq_omg")));
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
}
}
}
Assert.assertEquals(3, acturalRowCount);
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class JsonReaderTest method testSamplInvalidJSONText.
@Test
public void testSamplInvalidJSONText() throws IOException {
final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
false);
// 2nd row is ill-formed
final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":1}}" + "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4xxx,\"o\":{\"mg\":2}}\n" + // value of baz is invalid
"{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":3}}\n"));
final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
// the invalid character in line 2 stops parsing of the 3-line text in a whole
// so the total num of iteration is 1
final int numExpectedIterations = 1;
try (CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample()) {
int numActualIterations = 0;
while (iterator.hasNext()) {
numActualIterations++;
final InputRowListPlusRawValues rawValues = iterator.next();
Assert.assertNotNull(rawValues.getParseException());
}
Assert.assertEquals(numExpectedIterations, numActualIterations);
}
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class TimestampsParquetReaderTest method testParseInt96Timestamp.
@Test
public void testParseInt96Timestamp() throws IOException {
// the source parquet file was found in apache spark sql repo tests, where it is known as impala_timestamp.parq
// it has a single column, "ts" which is an int96 timestamp
final String file = "example/timestamps/int96_timestamp.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("ts", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
InputEntityReader reader = createReader(file, schema, JSONPathSpec.DEFAULT);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("2001-01-01T01:01:01.000Z", rows.get(0).getTimestamp().toString());
reader = createReader(file, schema, JSONPathSpec.DEFAULT);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"ts\" : 978310861000\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputRowSchema in project druid by druid-io.
the class TimestampsParquetReaderTest method testTimeMillisInInt64.
@Test
public void testTimeMillisInInt64() throws IOException {
final String file = "example/timestamps/timemillis-in-i64.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
InputEntityReader reader = createReader(file, schema, JSONPathSpec.DEFAULT);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("1970-01-01T00:00:00.010Z", rows.get(0).getTimestamp().toString());
reader = createReader(file, schema, JSONPathSpec.DEFAULT);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"time\" : 10\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Aggregations