Search in sources :

Example 26 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class FlattenSpecParquetReaderTest method testNested1NoFlattenSpec.

@Test
public void testNested1NoFlattenSpec() throws IOException {
    final String file = "example/flattening/test_nested_1.parquet";
    InputRowSchema schema = new InputRowSchema(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1"))), ColumnsFilter.all());
    JSONPathSpec flattenSpec = new JSONPathSpec(false, ImmutableList.of());
    InputEntityReader reader = createReader(file, schema, flattenSpec);
    List<InputRow> rows = readAllRows(reader);
    Assert.assertEquals(FlattenSpecParquetInputTest.TS1, rows.get(0).getTimestamp().toString());
    Assert.assertEquals("d1v1", rows.get(0).getDimension("dim1").get(0));
    List<String> dims = rows.get(0).getDimensions();
    Assert.assertEquals(1, dims.size());
    Assert.assertFalse(dims.contains("dim2"));
    Assert.assertFalse(dims.contains("dim3"));
    Assert.assertFalse(dims.contains("listDim"));
    Assert.assertFalse(dims.contains("nestedData"));
    Assert.assertEquals(1, rows.get(0).getMetric("metric1").longValue());
    reader = createReader(file, schema, flattenSpec);
    List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
    Assert.assertEquals(NESTED_JSON, DEFAULT_JSON_WRITER.writeValueAsString(sampled.get(0).getRawValues()));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 27 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class TransformerTest method testInputRowListPlusRawValuesTransformWithFilter.

@Test
public void testInputRowListPlusRawValuesTransformWithFilter() {
    final Transformer transformer = new Transformer(new TransformSpec(new SelectorDimFilter("dim", "val1", null), null));
    List<InputRow> rows = Arrays.asList(new MapBasedInputRow(DateTimes.nowUtc(), ImmutableList.of("dim"), ImmutableMap.of("dim", "val1")), // this row will be filtered
    new MapBasedInputRow(DateTimes.nowUtc(), ImmutableList.of("dim"), ImmutableMap.of("dim", "val2")));
    List<Map<String, Object>> valList = Arrays.asList(ImmutableMap.of("dim", "val1"), ImmutableMap.of("dim", "val2"));
    final InputRowListPlusRawValues actual = transformer.transform(InputRowListPlusRawValues.ofList(valList, rows));
    Assert.assertNotNull(actual);
    Assert.assertEquals(1, actual.getInputRows().size());
    Assert.assertEquals(1, actual.getRawValuesList().size());
    Assert.assertEquals("val1", actual.getInputRows().get(0).getRaw("dim"));
    Assert.assertEquals("val1", actual.getRawValuesList().get(0).get("dim"));
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) ImmutableMap(com.google.common.collect.ImmutableMap) Map(java.util.Map) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 28 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class SqlInputSourceTest method testSample.

@Test
public void testSample() throws Exception {
    derbyConnector = derbyConnectorRule.getConnector();
    SqlTestUtils testUtils = new SqlTestUtils(derbyConnector);
    testUtils.createAndUpdateTable(TABLE_NAME_1, 10);
    try {
        final File tempDir = createFirehoseTmpDir("testSingleSplit");
        SqlInputSource sqlInputSource = new SqlInputSource(SQLLIST1, true, testUtils.getDerbyFirehoseConnector(), mapper);
        InputSourceReader sqlReader = sqlInputSource.fixedFormatReader(INPUT_ROW_SCHEMA, tempDir);
        CloseableIterator<InputRowListPlusRawValues> resultIterator = sqlReader.sample();
        final List<InputRowListPlusRawValues> rows = new ArrayList<>();
        while (resultIterator.hasNext()) {
            InputRowListPlusRawValues row = resultIterator.next();
            Assert.assertNull(row.getParseException());
            rows.add(row);
        }
        assertResult(rows.stream().flatMap(r -> r.getInputRows().stream()).collect(Collectors.toList()), SQLLIST1);
    } finally {
        testUtils.dropTable(TABLE_NAME_1);
    }
}
Also used : InputSourceReader(org.apache.druid.data.input.InputSourceReader) InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) ArrayList(java.util.ArrayList) File(java.io.File) Test(org.junit.Test)

Example 29 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class JsonReaderTest method testSampleEmptyText.

@Test
public void testSampleEmptyText() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
    false);
    // input is empty
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8(""));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
    // the total num of iteration is 1
    final int numExpectedIterations = 1;
    try (CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample()) {
        int numActualIterations = 0;
        while (iterator.hasNext()) {
            numActualIterations++;
            final InputRowListPlusRawValues rawValues = iterator.next();
            Assert.assertNotNull(rawValues.getParseException());
        }
        Assert.assertEquals(numExpectedIterations, numActualIterations);
    }
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Example 30 with InputRowListPlusRawValues

use of org.apache.druid.data.input.InputRowListPlusRawValues in project druid by druid-io.

the class JsonReaderTest method testSampleMultipleRows.

@Test
public void testSampleMultipleRows() throws IOException {
    final JsonInputFormat format = new JsonInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz", "baz"), new JSONPathFieldSpec(JSONPathFieldType.ROOT, "root_baz2", "baz2"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"), new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"))), null, null, // make sure JsonReader is used
    false);
    final ByteEntity source = new ByteEntity(StringUtils.toUtf8("{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":1}}" + "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":2}}\n" + "{\"timestamp\":\"2019-01-01\",\"bar\":null,\"foo\":\"x\",\"baz\":4,\"o\":{\"mg\":3}}\n"));
    final InputEntityReader reader = format.createReader(new InputRowSchema(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), ColumnsFilter.all()), source, null);
    int acturalRowCount = 0;
    try (CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample()) {
        while (iterator.hasNext()) {
            final InputRowListPlusRawValues rawValues = iterator.next();
            // 3 rows returned together
            Assert.assertEquals(3, rawValues.getInputRows().size());
            for (int i = 0; i < 3; i++) {
                InputRow row = rawValues.getInputRows().get(i);
                final String msgId = String.valueOf(++acturalRowCount);
                Assert.assertEquals(DateTimes.of("2019-01-01"), row.getTimestamp());
                Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
                Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
                Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
                Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("path_omg")));
                Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("jq_omg")));
                Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
                Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
                Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
            }
        }
    }
    Assert.assertEquals(3, acturalRowCount);
}
Also used : InputRowListPlusRawValues(org.apache.druid.data.input.InputRowListPlusRawValues) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Test(org.junit.Test)

Aggregations

InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)34 Test (org.junit.Test)31 InputRow (org.apache.druid.data.input.InputRow)29 InputEntityReader (org.apache.druid.data.input.InputEntityReader)26 InputRowSchema (org.apache.druid.data.input.InputRowSchema)24 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)21 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)21 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)17 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)15 HashMap (java.util.HashMap)4 BigDecimal (java.math.BigDecimal)3 ArrayList (java.util.ArrayList)3 Map (java.util.Map)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 File (java.io.File)2 AvroHadoopInputRowParserTest (org.apache.druid.data.input.AvroHadoopInputRowParserTest)2 AvroStreamInputRowParserTest (org.apache.druid.data.input.AvroStreamInputRowParserTest)2 InputSourceReader (org.apache.druid.data.input.InputSourceReader)2 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)2 ImmutableMap (com.google.common.collect.ImmutableMap)1