Search in sources :

Example 71 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class OrcReaderTest method testOrcFile11Format.

// This test is migrated from OrcHadoopInputRowParserTest
@Test
public void testOrcFile11Format() throws IOException {
    final OrcInputFormat inputFormat = new OrcInputFormat(new JSONPathSpec(true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_int", "$.middle.list[1].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_intlist", "$.middle.list[*].int1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "struct_list_struct_middleListLength", "$.middle.list.length()"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "list_struct_string", "$.list[0].string1"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "map_struct_int", "$.map.chani.int1"))), null, new Configuration());
    final InputEntityReader reader = createReader(new TimestampSpec("ts", "millis", null), new DimensionsSpec(null), inputFormat, "example/orc-file-11-format.orc");
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int actualRowCount = 0;
        // Check the first row
        Assert.assertTrue(iterator.hasNext());
        InputRow row = iterator.next();
        actualRowCount++;
        Assert.assertEquals("false", Iterables.getOnlyElement(row.getDimension("boolean1")));
        Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("byte1")));
        Assert.assertEquals("1024", Iterables.getOnlyElement(row.getDimension("short1")));
        Assert.assertEquals("65536", Iterables.getOnlyElement(row.getDimension("int1")));
        Assert.assertEquals("9223372036854775807", Iterables.getOnlyElement(row.getDimension("long1")));
        Assert.assertEquals("1.0", Iterables.getOnlyElement(row.getDimension("float1")));
        Assert.assertEquals("-15.0", Iterables.getOnlyElement(row.getDimension("double1")));
        Assert.assertEquals("AAECAwQAAA==", Iterables.getOnlyElement(row.getDimension("bytes1")));
        Assert.assertEquals("hi", Iterables.getOnlyElement(row.getDimension("string1")));
        Assert.assertEquals("1.23456786547456E7", Iterables.getOnlyElement(row.getDimension("decimal1")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_int")));
        Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist"));
        Assert.assertEquals("good", Iterables.getOnlyElement(row.getDimension("list_struct_string")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_middleListLength")));
        Assert.assertEquals(DateTimes.of("2000-03-12T15:00:00.0Z"), row.getTimestamp());
        while (iterator.hasNext()) {
            actualRowCount++;
            row = iterator.next();
        }
        // Check the last row
        Assert.assertEquals("true", Iterables.getOnlyElement(row.getDimension("boolean1")));
        Assert.assertEquals("100", Iterables.getOnlyElement(row.getDimension("byte1")));
        Assert.assertEquals("2048", Iterables.getOnlyElement(row.getDimension("short1")));
        Assert.assertEquals("65536", Iterables.getOnlyElement(row.getDimension("int1")));
        Assert.assertEquals("9223372036854775807", Iterables.getOnlyElement(row.getDimension("long1")));
        Assert.assertEquals("2.0", Iterables.getOnlyElement(row.getDimension("float1")));
        Assert.assertEquals("-5.0", Iterables.getOnlyElement(row.getDimension("double1")));
        Assert.assertEquals("", Iterables.getOnlyElement(row.getDimension("bytes1")));
        Assert.assertEquals("bye", Iterables.getOnlyElement(row.getDimension("string1")));
        Assert.assertEquals("1.23456786547457E7", Iterables.getOnlyElement(row.getDimension("decimal1")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("struct_list_struct_int")));
        Assert.assertEquals(ImmutableList.of("1", "2"), row.getDimension("struct_list_struct_intlist"));
        Assert.assertEquals("cat", Iterables.getOnlyElement(row.getDimension("list_struct_string")));
        Assert.assertEquals("5", Iterables.getOnlyElement(row.getDimension("map_struct_int")));
        Assert.assertEquals(DateTimes.of("2000-03-12T15:00:01.000Z"), row.getTimestamp());
        Assert.assertEquals(7500, actualRowCount);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 72 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class OrcReaderTest method testDate1900.

// This test is migrated from OrcHadoopInputRowParserTest
@Test
public void testDate1900() throws IOException {
    final InputEntityReader reader = createReader(new TimestampSpec("time", "millis", null), DimensionsSpec.builder().setDimensionExclusions(Collections.singletonList("time")).build(), new OrcInputFormat(new JSONPathSpec(true, null), null, new Configuration()), "example/TestOrcFile.testDate1900.orc");
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int actualRowCount = 0;
        Assert.assertTrue(iterator.hasNext());
        final InputRow row = iterator.next();
        actualRowCount++;
        Assert.assertEquals(1, row.getDimensions().size());
        Assert.assertEquals(DateTimes.of("1900-05-05T12:34:56.1Z"), row.getTimestamp());
        Assert.assertEquals("1900-12-25T00:00:00.000Z", Iterables.getOnlyElement(row.getDimension("date")));
        while (iterator.hasNext()) {
            actualRowCount++;
            iterator.next();
        }
        Assert.assertEquals(70000, actualRowCount);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 73 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class OrcReaderTest method testDate2038.

// This test is migrated from OrcHadoopInputRowParserTest
@Test
public void testDate2038() throws IOException {
    final InputEntityReader reader = createReader(new TimestampSpec("time", "millis", null), DimensionsSpec.builder().setDimensionExclusions(Collections.singletonList("time")).build(), new OrcInputFormat(new JSONPathSpec(true, null), null, new Configuration()), "example/TestOrcFile.testDate2038.orc");
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        int actualRowCount = 0;
        Assert.assertTrue(iterator.hasNext());
        final InputRow row = iterator.next();
        actualRowCount++;
        Assert.assertEquals(1, row.getDimensions().size());
        Assert.assertEquals(DateTimes.of("2038-05-05T12:34:56.1Z"), row.getTimestamp());
        Assert.assertEquals("2038-12-25T00:00:00.000Z", Iterables.getOnlyElement(row.getDimension("date")));
        while (iterator.hasNext()) {
            actualRowCount++;
            iterator.next();
        }
        Assert.assertEquals(212000, actualRowCount);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 74 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class OrcReaderTest method testTest2.

// This test is migrated from OrcHadoopInputRowParserTest
@Test
public void testTest2() throws IOException {
    final InputFormat inputFormat = new OrcInputFormat(new JSONPathSpec(true, Collections.singletonList(new JSONPathFieldSpec(JSONPathFieldType.PATH, "col7-subcol7", "$.col7.subcol7"))), null, new Configuration());
    final InputEntityReader reader = createReader(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(null), inputFormat, "example/test_2.orc");
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        Assert.assertTrue(iterator.hasNext());
        final InputRow row = iterator.next();
        Assert.assertEquals(DateTimes.of("2016-01-01T00:00:00.000Z"), row.getTimestamp());
        Assert.assertEquals("bar", Iterables.getOnlyElement(row.getDimension("col1")));
        Assert.assertEquals(ImmutableList.of("dat1", "dat2", "dat3"), row.getDimension("col2"));
        Assert.assertEquals("1.1", Iterables.getOnlyElement(row.getDimension("col3")));
        Assert.assertEquals("2", Iterables.getOnlyElement(row.getDimension("col4")));
        Assert.assertEquals("3.5", Iterables.getOnlyElement(row.getDimension("col5")));
        Assert.assertTrue(row.getDimension("col6").isEmpty());
        Assert.assertFalse(iterator.hasNext());
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) InputFormat(org.apache.druid.data.input.InputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) JSONPathSpec(org.apache.druid.java.util.common.parsers.JSONPathSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JSONPathFieldSpec(org.apache.druid.java.util.common.parsers.JSONPathFieldSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Example 75 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class OrcReaderTest method testTest1.

// This test is migrated from OrcHadoopInputRowParserTest
@Test
public void testTest1() throws IOException {
    final InputEntityReader reader = createReader(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("col1", "col2"))), new OrcInputFormat(null, null, new Configuration()), "example/test_1.orc");
    try (CloseableIterator<InputRow> iterator = reader.read()) {
        Assert.assertTrue(iterator.hasNext());
        final InputRow row = iterator.next();
        Assert.assertEquals(DateTimes.of("2016-01-01T00:00:00.000Z"), row.getTimestamp());
        Assert.assertEquals("bar", Iterables.getOnlyElement(row.getDimension("col1")));
        Assert.assertEquals(ImmutableList.of("dat1", "dat2", "dat3"), row.getDimension("col2"));
        Assert.assertEquals(1.1, row.getMetric("val1").doubleValue(), 0.001);
        Assert.assertFalse(iterator.hasNext());
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InputEntityReader(org.apache.druid.data.input.InputEntityReader) Test(org.junit.Test)

Aggregations

TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)154 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)113 Test (org.junit.Test)110 DataSchema (org.apache.druid.segment.indexing.DataSchema)49 InputRow (org.apache.druid.data.input.InputRow)47 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)41 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)39 InputRowSchema (org.apache.druid.data.input.InputRowSchema)37 InputEntityReader (org.apache.druid.data.input.InputEntityReader)33 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)32 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)30 JSONParseSpec (org.apache.druid.data.input.impl.JSONParseSpec)29 ArrayList (java.util.ArrayList)28 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)28 StringInputRowParser (org.apache.druid.data.input.impl.StringInputRowParser)27 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)27 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)25 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)21 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)21 ArbitraryGranularitySpec (org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec)20