Search in sources :

Example 6 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class DataSchemaTest method testDuplicateAggregators.

@Test
public void testDuplicateAggregators() {
    Map<String, Object> parser = jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("time", "auto", null), DimensionsSpec.builder().setDimensions(DimensionsSpec.getDefaultSchemas(ImmutableList.of("time"))).setDimensionExclusions(ImmutableList.of("dimC")).build(), null, null, null), null), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
    expectedException.expect(IllegalArgumentException.class);
    expectedException.expectMessage("Cannot specify a column more than once: [metric1] seen in metricsSpec list (2 occurrences); " + "[metric3] seen in metricsSpec list (2 occurrences)");
    DataSchema schema = new DataSchema(IdUtilsTest.VALID_ID_CHARS, parser, new AggregatorFactory[] { new DoubleSumAggregatorFactory("metric1", "col1"), new DoubleSumAggregatorFactory("metric2", "col2"), new DoubleSumAggregatorFactory("metric1", "col3"), new DoubleSumAggregatorFactory("metric3", "col4"), new DoubleSumAggregatorFactory("metric3", "col5") }, new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Intervals.of("2014/2015"))), null, jsonMapper);
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IdUtilsTest(org.apache.druid.common.utils.IdUtilsTest)

Example 7 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class SegmentAnalyzerTest method testAnalyzingSegmentWithNonExistentAggregator.

/**
 * This test verifies that if a segment was created using an unknown/invalid aggregator
 * (which can happen if an aggregator was removed for a later version), then,
 * analyzing the segment doesn't fail and the result of analysis of the complex column
 * is reported as an error.
 * @throws IOException
 */
@Test
public void testAnalyzingSegmentWithNonExistentAggregator() throws IOException {
    final URL resource = SegmentAnalyzerTest.class.getClassLoader().getResource("druid.sample.numeric.tsv");
    CharSource source = Resources.asByteSource(resource).asCharSource(StandardCharsets.UTF_8);
    String invalid_aggregator = "invalid_aggregator";
    AggregatorFactory[] metrics = new AggregatorFactory[] { new DoubleSumAggregatorFactory(TestIndex.DOUBLE_METRICS[0], "index"), new HyperUniquesAggregatorFactory("quality_uniques", "quality"), new InvalidAggregatorFactory(invalid_aggregator, "quality") };
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(DateTimes.of("2011-01-12T00:00:00.000Z").getMillis()).withTimestampSpec(new TimestampSpec("ds", "auto", null)).withDimensionsSpec(TestIndex.DIMENSIONS_SPEC).withMetrics(metrics).withRollup(true).build();
    final IncrementalIndex retVal = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(10000).build();
    IncrementalIndex incrementalIndex = TestIndex.loadIncrementalIndex(retVal, source);
    // Analyze the in-memory segment.
    {
        SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
        IncrementalIndexSegment segment = new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("ds"));
        Map<String, ColumnAnalysis> analyses = analyzer.analyze(segment);
        ColumnAnalysis columnAnalysis = analyses.get(invalid_aggregator);
        Assert.assertFalse(columnAnalysis.isError());
        Assert.assertEquals("invalid_complex_column_type", columnAnalysis.getType());
        Assert.assertEquals(ColumnType.ofComplex("invalid_complex_column_type"), columnAnalysis.getTypeSignature());
    }
    // Persist the index.
    final File segmentFile = TestIndex.INDEX_MERGER.persist(incrementalIndex, temporaryFolder.newFolder(), TestIndex.INDEX_SPEC, null);
    // Unload the complex serde, then analyze the persisted segment.
    ComplexMetrics.unregisterSerde(InvalidAggregatorFactory.TYPE);
    {
        SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
        QueryableIndexSegment segment = new QueryableIndexSegment(TestIndex.INDEX_IO.loadIndex(segmentFile), SegmentId.dummy("ds"));
        Map<String, ColumnAnalysis> analyses = analyzer.analyze(segment);
        ColumnAnalysis invalidColumnAnalysis = analyses.get(invalid_aggregator);
        Assert.assertTrue(invalidColumnAnalysis.isError());
        Assert.assertEquals("error:unknown_complex_invalid_complex_column_type", invalidColumnAnalysis.getErrorMessage());
        // Run a segment metadata query also to verify it doesn't break
        final List<SegmentAnalysis> results = getSegmentAnalysises(segment, EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
        for (SegmentAnalysis result : results) {
            Assert.assertTrue(result.getColumns().get(invalid_aggregator).isError());
        }
    }
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) CharSource(com.google.common.io.CharSource) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) ColumnBuilder(org.apache.druid.segment.column.ColumnBuilder) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) URL(java.net.URL) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) List(java.util.List) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) Map(java.util.Map) File(java.io.File) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 8 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class SegmentMetadataQueryTest method testSegmentMetadataQueryWithTimestampSpecMerge.

@Test
public void testSegmentMetadataQueryWithTimestampSpecMerge() {
    SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(differentIds ? "merged" : SegmentId.dummy("testSegment").toString(), null, ImmutableMap.of("placement", new ColumnAnalysis(ColumnType.STRING, ValueType.STRING.toString(), false, false, 0, 0, null, null, null)), 0, expectedSegmentAnalysis1.getNumRows() + expectedSegmentAnalysis2.getNumRows(), null, new TimestampSpec("ds", "auto", null), null, null);
    QueryToolChest toolChest = FACTORY.getToolchest();
    ExecutorService exec = Executors.newCachedThreadPool();
    QueryRunner myRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(FACTORY.mergeRunners(Execs.directExecutor(), Lists.newArrayList(toolChest.preMergeQueryDecoration(runner1), toolChest.preMergeQueryDecoration(runner2)))), toolChest);
    SegmentMetadataQuery query = Druids.newSegmentMetadataQueryBuilder().dataSource("testing").intervals("2013/2014").toInclude(new ListColumnIncluderator(Collections.singletonList("placement"))).analysisTypes(SegmentMetadataQuery.AnalysisType.TIMESTAMPSPEC).merge(true).build();
    TestHelper.assertExpectedObjects(ImmutableList.of(mergedSegmentAnalysis), myRunner.run(QueryPlus.wrap(query)), "failed SegmentMetadata merging query");
    exec.shutdownNow();
}
Also used : FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) SegmentMetadataQuery(org.apache.druid.query.metadata.metadata.SegmentMetadataQuery) ListColumnIncluderator(org.apache.druid.query.metadata.metadata.ListColumnIncluderator) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ExecutorService(java.util.concurrent.ExecutorService) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) QueryToolChest(org.apache.druid.query.QueryToolChest) QueryRunner(org.apache.druid.query.QueryRunner) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) Test(org.junit.Test)

Example 9 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class IngestSegmentFirehoseTest method createTestIndex.

private void createTestIndex(File segmentDir) throws Exception {
    final List<String> rows = Lists.newArrayList("2014102200\thost1\t10\t0\t1", "2014102200\thost2\t20\t1\t0", "2014102200\thost3\t30\t1\t1", "2014102201\thost1\t10\t1\t1", "2014102201\thost2\t20\t1\t1", "2014102201\thost3\t30\t1\t1", "2014102202\thost1\t10\t1\t1", "2014102202\thost2\t20\t1\t1", "2014102202\thost3\t30\t1\t1");
    final StringInputRowParser parser = new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), DIMENSIONS_SPEC, "\t", null, ImmutableList.of("timestamp", "host", "visited", "x", "y", "spatial"), false, 0), StandardCharsets.UTF_8.toString());
    try (final IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withDimensionsSpec(parser.getParseSpec().getDimensionsSpec()).withMetrics(AGGREGATORS.toArray(new AggregatorFactory[0])).build()).setMaxRowCount(5000).build()) {
        for (String line : rows) {
            index.add(parser.parse(line));
        }
        indexMerger.persist(index, segmentDir, new IndexSpec(), null);
    }
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) DelimitedParseSpec(org.apache.druid.data.input.impl.DelimitedParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory)

Example 10 with TimestampSpec

use of org.apache.druid.data.input.impl.TimestampSpec in project druid by druid-io.

the class SqlFirehoseTest method testFirehoseStringParser.

@Test
public void testFirehoseStringParser() throws Exception {
    final TestCloseable closeable = new TestCloseable();
    List<Object> expectedResults = new ArrayList<>();
    for (Map<String, Object> map : inputs) {
        expectedResults.add(map.get("x"));
    }
    final List<JsonIterator<Map<String, Object>>> lineIterators = fileList.stream().map(s -> new JsonIterator<Map<String, Object>>(TYPE_REF, s, closeable, objectMapper)).collect(Collectors.toList());
    final InputRowParser stringParser = TransformSpec.NONE.decorate(new StringInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("x")))), Charset.defaultCharset().name()));
    try (final SqlFirehose firehose = new SqlFirehose(lineIterators.iterator(), stringParser, closeable)) {
        final List<Object> results = new ArrayList<>();
        while (firehose.hasMore()) {
            final InputRow inputRow = firehose.nextRow();
            if (inputRow == null) {
                results.add(null);
            } else {
                results.add(inputRow.getDimension("x").get(0));
            }
        }
        Assert.assertEquals(expectedResults, results);
    }
}
Also used : JsonGenerator(com.fasterxml.jackson.core.JsonGenerator) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Charset(java.nio.charset.Charset) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) After(org.junit.After) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TypeReference(com.fasterxml.jackson.core.type.TypeReference) FileUtils(org.apache.druid.java.util.common.FileUtils) Before(org.junit.Before) JsonIterator(org.apache.druid.data.input.impl.prefetch.JsonIterator) ImmutableMap(com.google.common.collect.ImmutableMap) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FileOutputStream(java.io.FileOutputStream) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) FileInputStream(java.io.FileInputStream) Collectors(java.util.stream.Collectors) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) File(java.io.File) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) TransformingStringInputRowParser(org.apache.druid.segment.transform.TransformingStringInputRowParser) Closeable(java.io.Closeable) Assert(org.junit.Assert) TransformSpec(org.apache.druid.segment.transform.TransformSpec) ArrayList(java.util.ArrayList) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TransformingStringInputRowParser(org.apache.druid.segment.transform.TransformingStringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) JsonIterator(org.apache.druid.data.input.impl.prefetch.JsonIterator) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) TransformingStringInputRowParser(org.apache.druid.segment.transform.TransformingStringInputRowParser) Test(org.junit.Test)

Aggregations

TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)154 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)113 Test (org.junit.Test)110 DataSchema (org.apache.druid.segment.indexing.DataSchema)49 InputRow (org.apache.druid.data.input.InputRow)47 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)41 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)39 InputRowSchema (org.apache.druid.data.input.InputRowSchema)37 InputEntityReader (org.apache.druid.data.input.InputEntityReader)33 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)32 JSONPathSpec (org.apache.druid.java.util.common.parsers.JSONPathSpec)30 JSONParseSpec (org.apache.druid.data.input.impl.JSONParseSpec)29 ArrayList (java.util.ArrayList)28 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)28 StringInputRowParser (org.apache.druid.data.input.impl.StringInputRowParser)27 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)27 JSONPathFieldSpec (org.apache.druid.java.util.common.parsers.JSONPathFieldSpec)25 InputRowListPlusRawValues (org.apache.druid.data.input.InputRowListPlusRawValues)21 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)21 ArbitraryGranularitySpec (org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec)20