use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.
the class InlineFirehoseTest method testMultiline.
@Test
public void testMultiline() {
InlineFirehose target = create(MULTILINE);
// First line
Assert.assertTrue(target.hasMore());
InputRow row0 = target.nextRow();
assertRowValue(VALUE_0, row0);
// Second line
InputRowListPlusRawValues rowPlusRaw = target.nextRowWithRaw();
assertRowValue(VALUE_1, Iterables.getOnlyElement(rowPlusRaw.getInputRows()));
Map<String, Object> raw = rowPlusRaw.getRawValues();
Map<String, Object> expected = new HashMap<>();
expected.put("timestamp", TIMESTAMP_1);
expected.put("value", VALUE_1);
Assert.assertEquals(expected, raw);
Assert.assertNull(rowPlusRaw.getParseException());
Assert.assertFalse(target.hasMore());
}
use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.
the class CompatParquetReaderTest method testBinaryAsString.
@Test
public void testBinaryAsString() throws IOException {
final String file = "example/compat/284a0e001476716b-56d5676f53bd6e85_115466471_data.0.parq";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("ts", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("field"))), ColumnsFilter.all());
InputEntityReader reader = createReader(file, schema, JSONPathSpec.DEFAULT, true);
InputEntityReader readerNotAsString = createReader(file, schema, JSONPathSpec.DEFAULT, false);
List<InputRow> rows = readAllRows(reader);
List<InputRow> rowsAsBinary = readAllRows(readerNotAsString);
Assert.assertEquals("hey this is &é(-è_çà)=^$ù*! Ω^^", rows.get(0).getDimension("field").get(0));
Assert.assertEquals(1471800234, rows.get(0).getTimestampFromEpoch());
Assert.assertEquals("aGV5IHRoaXMgaXMgJsOpKC3DqF/Dp8OgKT1eJMO5KiEgzqleXg==", rowsAsBinary.get(0).getDimension("field").get(0));
Assert.assertEquals(1471800234, rowsAsBinary.get(0).getTimestampFromEpoch());
reader = createReader(file, schema, JSONPathSpec.DEFAULT, true);
readerNotAsString = createReader(file, schema, JSONPathSpec.DEFAULT, false);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
List<InputRowListPlusRawValues> sampledAsBinary = sampleAllRows(readerNotAsString);
final String expectedJson = "{\n" + " \"field\" : \"hey this is &é(-è_çà)=^$ù*! Ω^^\",\n" + " \"ts\" : 1471800234\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
final String expectedJsonBinary = "{\n" + " \"field\" : \"aGV5IHRoaXMgaXMgJsOpKC3DqF/Dp8OgKT1eJMO5KiEgzqleXg==\",\n" + " \"ts\" : 1471800234\n" + "}";
Assert.assertEquals(expectedJsonBinary, DEFAULT_JSON_WRITER.writeValueAsString(sampledAsBinary.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.
the class CompatParquetReaderTest method testParquetThriftCompat.
@Test
public void testParquetThriftCompat() throws IOException {
/*
message ParquetSchema {
required boolean boolColumn;
required int32 byteColumn;
required int32 shortColumn;
required int32 intColumn;
required int64 longColumn;
required double doubleColumn;
required binary binaryColumn (UTF8);
required binary stringColumn (UTF8);
required binary enumColumn (ENUM);
optional boolean maybeBoolColumn;
optional int32 maybeByteColumn;
optional int32 maybeShortColumn;
optional int32 maybeIntColumn;
optional int64 maybeLongColumn;
optional double maybeDoubleColumn;
optional binary maybeBinaryColumn (UTF8);
optional binary maybeStringColumn (UTF8);
optional binary maybeEnumColumn (ENUM);
required group stringsColumn (LIST) {
repeated binary stringsColumn_tuple (UTF8);
}
required group intSetColumn (LIST) {
repeated int32 intSetColumn_tuple;
}
required group intToStringColumn (MAP) {
repeated group map (MAP_KEY_VALUE) {
required int32 key;
optional binary value (UTF8);
}
}
required group complexColumn (MAP) {
repeated group map (MAP_KEY_VALUE) {
required int32 key;
optional group value (LIST) {
repeated group value_tuple {
required group nestedIntsColumn (LIST) {
repeated int32 nestedIntsColumn_tuple;
}
required binary nestedStringColumn (UTF8);
}
}
}
}
}
*/
final String file = "example/compat/parquet-thrift-compat.snappy.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "extractByLogicalMap", "$.intToStringColumn.1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "extractByComplexLogicalMap", "$.complexColumn.1[0].nestedIntsColumn[1]"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
Assert.assertEquals("true", rows.get(0).getDimension("boolColumn").get(0));
Assert.assertEquals("0", rows.get(0).getDimension("byteColumn").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("shortColumn").get(0));
Assert.assertEquals("2", rows.get(0).getDimension("intColumn").get(0));
Assert.assertEquals("0", rows.get(0).getDimension("longColumn").get(0));
Assert.assertEquals("0.2", rows.get(0).getDimension("doubleColumn").get(0));
Assert.assertEquals("val_0", rows.get(0).getDimension("binaryColumn").get(0));
Assert.assertEquals("val_0", rows.get(0).getDimension("stringColumn").get(0));
Assert.assertEquals("SPADES", rows.get(0).getDimension("enumColumn").get(0));
Assert.assertTrue(rows.get(0).getDimension("maybeBoolColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeByteColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeShortColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeIntColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeLongColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeDoubleColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeBinaryColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeStringColumn").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("maybeEnumColumn").isEmpty());
Assert.assertEquals("arr_0", rows.get(0).getDimension("stringsColumn").get(0));
Assert.assertEquals("arr_1", rows.get(0).getDimension("stringsColumn").get(1));
Assert.assertEquals("0", rows.get(0).getDimension("intSetColumn").get(0));
Assert.assertEquals("val_1", rows.get(0).getDimension("extractByLogicalMap").get(0));
Assert.assertEquals("1", rows.get(0).getDimension("extractByComplexLogicalMap").get(0));
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"enumColumn\" : \"SPADES\",\n" + " \"maybeStringColumn\" : { },\n" + " \"maybeBinaryColumn\" : { },\n" + " \"shortColumn\" : 1,\n" + " \"byteColumn\" : 0,\n" + " \"maybeBoolColumn\" : { },\n" + " \"intColumn\" : 2,\n" + " \"doubleColumn\" : 0.2,\n" + " \"maybeByteColumn\" : { },\n" + " \"intSetColumn\" : [ 0 ],\n" + " \"boolColumn\" : true,\n" + " \"binaryColumn\" : \"val_0\",\n" + " \"maybeIntColumn\" : { },\n" + " \"intToStringColumn\" : {\n" + " \"0\" : \"val_0\",\n" + " \"1\" : \"val_1\",\n" + " \"2\" : \"val_2\"\n" + " },\n" + " \"maybeDoubleColumn\" : { },\n" + " \"maybeEnumColumn\" : { },\n" + " \"maybeLongColumn\" : { },\n" + " \"stringsColumn\" : [ \"arr_0\", \"arr_1\", \"arr_2\" ],\n" + " \"longColumn\" : 0,\n" + " \"stringColumn\" : \"val_0\",\n" + " \"maybeShortColumn\" : { },\n" + " \"complexColumn\" : {\n" + " \"0\" : [ {\n" + " \"nestedStringColumn\" : \"val_0\",\n" + " \"nestedIntsColumn\" : [ 0, 1, 2 ]\n" + " }, {\n" + " \"nestedStringColumn\" : \"val_1\",\n" + " \"nestedIntsColumn\" : [ 1, 2, 3 ]\n" + " }, {\n" + " \"nestedStringColumn\" : \"val_2\",\n" + " \"nestedIntsColumn\" : [ 2, 3, 4 ]\n" + " } ],\n" + " \"1\" : [ {\n" + " \"nestedStringColumn\" : \"val_0\",\n" + " \"nestedIntsColumn\" : [ 0, 1, 2 ]\n" + " }, {\n" + " \"nestedStringColumn\" : \"val_1\",\n" + " \"nestedIntsColumn\" : [ 1, 2, 3 ]\n" + " }, {\n" + " \"nestedStringColumn\" : \"val_2\",\n" + " \"nestedIntsColumn\" : [ 2, 3, 4 ]\n" + " } ],\n" + " \"2\" : [ {\n" + " \"nestedStringColumn\" : \"val_0\",\n" + " \"nestedIntsColumn\" : [ 0, 1, 2 ]\n" + " }, {\n" + " \"nestedStringColumn\" : \"val_1\",\n" + " \"nestedIntsColumn\" : [ 1, 2, 3 ]\n" + " }, {\n" + " \"nestedStringColumn\" : \"val_2\",\n" + " \"nestedIntsColumn\" : [ 2, 3, 4 ]\n" + " } ]\n" + " }\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.
the class CompatParquetReaderTest method testProtoStructWithArray.
@Test
public void testProtoStructWithArray() throws IOException {
final String file = "example/compat/proto-struct-with-array.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "extractedOptional", "$.optionalMessage.someId"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "extractedRequired", "$.requiredMessage.someId"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "extractedRepeated", "$.repeatedMessage[*]"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
Assert.assertEquals("10", rows.get(0).getDimension("optionalPrimitive").get(0));
Assert.assertEquals("9", rows.get(0).getDimension("requiredPrimitive").get(0));
Assert.assertTrue(rows.get(0).getDimension("repeatedPrimitive").isEmpty());
Assert.assertTrue(rows.get(0).getDimension("extractedOptional").isEmpty());
Assert.assertEquals("9", rows.get(0).getDimension("extractedRequired").get(0));
Assert.assertEquals("9", rows.get(0).getDimension("extractedRepeated").get(0));
Assert.assertEquals("10", rows.get(0).getDimension("extractedRepeated").get(1));
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"optionalMessage\" : { },\n" + " \"requiredPrimitive\" : 9,\n" + " \"repeatedPrimitive\" : { },\n" + " \"repeatedMessage\" : [ 9, 10 ],\n" + " \"optionalPrimitive\" : 10,\n" + " \"requiredMessage\" : {\n" + " \"someId\" : 9\n" + " }\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.
the class CompatParquetReaderTest method testParquet1217.
@Test
public void testParquet1217() throws IOException {
final String file = "example/compat/parquet-1217.parquet";
InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", DateTimes.of("2018-09-01T00:00:00.000Z")), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of())), ColumnsFilter.all());
List<JSONPathFieldSpec> flattenExpr = ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "col", "col"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "metric1", "$.col"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, flattenExpr);
InputEntityReader reader = createReader(file, schema, flattenSpec);
List<InputRow> rows = readAllRows(reader);
Assert.assertEquals("2018-09-01T00:00:00.000Z", rows.get(0).getTimestamp().toString());
Assert.assertEquals("-1", rows.get(0).getDimension("col").get(0));
Assert.assertEquals(-1, rows.get(0).getMetric("metric1"));
Assert.assertTrue(rows.get(4).getDimension("col").isEmpty());
reader = createReader(file, schema, flattenSpec);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n" + " \"col\" : -1\n" + "}";
Assert.assertEquals(expectedJson, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Aggregations