Search in sources :

Example 11 with StringDimensionSchema

use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class InputRowSerdeTest method testSerde.

@Test
public void testSerde() {
    // Prepare the mocks & set close() call count expectation to 1
    final Aggregator mockedAggregator = EasyMock.createMock(DoubleSumAggregator.class);
    EasyMock.expect(mockedAggregator.isNull()).andReturn(false).times(1);
    EasyMock.expect(mockedAggregator.getDouble()).andReturn(0d).times(1);
    mockedAggregator.aggregate();
    EasyMock.expectLastCall().times(1);
    mockedAggregator.close();
    EasyMock.expectLastCall().times(1);
    EasyMock.replay(mockedAggregator);
    final Aggregator mockedNullAggregator = EasyMock.createMock(DoubleSumAggregator.class);
    EasyMock.expect(mockedNullAggregator.isNull()).andReturn(true).times(1);
    mockedNullAggregator.aggregate();
    EasyMock.expectLastCall().times(1);
    mockedNullAggregator.close();
    EasyMock.expectLastCall().times(1);
    EasyMock.replay(mockedNullAggregator);
    final AggregatorFactory mockedAggregatorFactory = EasyMock.createMock(AggregatorFactory.class);
    EasyMock.expect(mockedAggregatorFactory.factorize(EasyMock.anyObject(ColumnSelectorFactory.class))).andReturn(mockedAggregator);
    EasyMock.expect(mockedAggregatorFactory.getIntermediateType()).andReturn(ColumnType.DOUBLE).anyTimes();
    EasyMock.expect(mockedAggregatorFactory.getName()).andReturn("mockedAggregator").anyTimes();
    final AggregatorFactory mockedNullAggregatorFactory = EasyMock.createMock(AggregatorFactory.class);
    EasyMock.expect(mockedNullAggregatorFactory.factorize(EasyMock.anyObject(ColumnSelectorFactory.class))).andReturn(mockedNullAggregator);
    EasyMock.expect(mockedNullAggregatorFactory.getName()).andReturn("mockedNullAggregator").anyTimes();
    EasyMock.expect(mockedNullAggregatorFactory.getIntermediateType()).andReturn(ColumnType.DOUBLE).anyTimes();
    EasyMock.replay(mockedAggregatorFactory, mockedNullAggregatorFactory);
    InputRow in = new MapBasedInputRow(timestamp, dims, event);
    AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
    new LongSumAggregatorFactory("unparseable", "m3"), mockedAggregatorFactory, mockedNullAggregatorFactory };
    DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("d1"), new StringDimensionSchema("d2"), new LongDimensionSchema("d3"), new FloatDimensionSchema("d4"), new DoubleDimensionSchema("d5")));
    byte[] data = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories).getSerializedRow();
    InputRow out = InputRowSerde.fromBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), data, aggregatorFactories);
    Assert.assertEquals(timestamp, out.getTimestampFromEpoch());
    Assert.assertEquals(dims, out.getDimensions());
    Assert.assertEquals(Collections.emptyList(), out.getDimension("dim_non_existing"));
    Assert.assertEquals(ImmutableList.of("d1v"), out.getDimension("d1"));
    Assert.assertEquals(ImmutableList.of("d2v1", "d2v2"), out.getDimension("d2"));
    Assert.assertEquals(200L, out.getRaw("d3"));
    Assert.assertEquals(300.1f, out.getRaw("d4"));
    Assert.assertEquals(400.5d, out.getRaw("d5"));
    Assert.assertEquals(NullHandling.defaultDoubleValue(), out.getMetric("agg_non_existing"));
    Assert.assertEquals(5.0f, out.getMetric("m1out").floatValue(), 0.00001);
    Assert.assertEquals(100L, out.getMetric("m2out"));
    Assert.assertEquals(1, ((HyperLogLogCollector) out.getRaw("m3out")).estimateCardinality(), 0.001);
    Assert.assertEquals(NullHandling.defaultLongValue(), out.getMetric("unparseable"));
    EasyMock.verify(mockedAggregator);
    EasyMock.verify(mockedNullAggregator);
}
Also used : ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DoubleSumAggregator(org.apache.druid.query.aggregation.DoubleSumAggregator) Aggregator(org.apache.druid.query.aggregation.Aggregator) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Example 12 with StringDimensionSchema

use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTaskTest method makeRealtimeTask.

private AppenderatorDriverRealtimeIndexTask makeRealtimeTask(final String taskId, final TransformSpec transformSpec, final boolean reportParseExceptions, final long handoffTimeout, final Boolean logParseExceptions, final Integer maxParseExceptions, final Integer maxSavedParseExceptions, final Integer maxRowsPerSegment, final Long maxTotalRows) {
    DataSchema dataSchema = new DataSchema("test_ds", TestHelper.makeJsonMapper().convertValue(new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("t", "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim2"), new StringDimensionSchema("dim1t"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat"))))), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT), new AggregatorFactory[] { new CountAggregatorFactory("rows"), new LongSumAggregatorFactory("met1", "met1") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), transformSpec, OBJECT_MAPPER);
    RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(new TestFirehoseFactory(), null);
    RealtimeAppenderatorTuningConfig tuningConfig = new RealtimeAppenderatorTuningConfig(null, 1000, null, null, maxRowsPerSegment, maxTotalRows, null, null, null, null, null, null, reportParseExceptions, handoffTimeout, null, null, logParseExceptions, maxParseExceptions, maxSavedParseExceptions);
    return new AppenderatorDriverRealtimeIndexTask(taskId, null, new RealtimeAppenderatorIngestionSpec(dataSchema, realtimeIOConfig, tuningConfig), null) {

        @Override
        protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory) {
            return true;
        }
    };
}
Also used : RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) RealtimeAppenderatorIngestionSpec(org.apache.druid.indexing.common.index.RealtimeAppenderatorIngestionSpec)

Example 13 with StringDimensionSchema

use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class CompactionTaskTest method testCreateDimensionSchema.

@Test
public void testCreateDimensionSchema() {
    final String dimensionName = "dim";
    DimensionHandlerUtils.registerDimensionHandlerProvider(ExtensionDimensionHandler.TYPE_NAME, d -> new ExtensionDimensionHandler(d));
    DimensionSchema stringSchema = CompactionTask.createDimensionSchema(dimensionName, ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities().setHasBitmapIndexes(true).setDictionaryEncoded(true).setDictionaryValuesUnique(true).setDictionaryValuesUnique(true), DimensionSchema.MultiValueHandling.SORTED_SET);
    Assert.assertTrue(stringSchema instanceof StringDimensionSchema);
    DimensionSchema floatSchema = CompactionTask.createDimensionSchema(dimensionName, ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ColumnType.FLOAT), null);
    Assert.assertTrue(floatSchema instanceof FloatDimensionSchema);
    DimensionSchema doubleSchema = CompactionTask.createDimensionSchema(dimensionName, ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ColumnType.DOUBLE), null);
    Assert.assertTrue(doubleSchema instanceof DoubleDimensionSchema);
    DimensionSchema longSchema = CompactionTask.createDimensionSchema(dimensionName, ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ColumnType.LONG), null);
    Assert.assertTrue(longSchema instanceof LongDimensionSchema);
    DimensionSchema extensionSchema = CompactionTask.createDimensionSchema(dimensionName, ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ColumnType.ofComplex(ExtensionDimensionHandler.TYPE_NAME)), null);
    Assert.assertTrue(extensionSchema instanceof ExtensionDimensionSchema);
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) Test(org.junit.Test)

Example 14 with StringDimensionSchema

use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class CompactionTaskTest method testSerdeWithDimensions.

@Test
public void testSerdeWithDimensions() throws IOException {
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final CompactionTask task = builder.segments(SEGMENTS).dimensionsSpec(new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim2"), new StringDimensionSchema("dim3")))).tuningConfig(createTuningConfig()).context(ImmutableMap.of("testKey", "testVal")).build();
    final byte[] bytes = OBJECT_MAPPER.writeValueAsBytes(task);
    final CompactionTask fromJson = OBJECT_MAPPER.readValue(bytes, CompactionTask.class);
    assertEquals(task, fromJson);
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) Test(org.junit.Test)

Example 15 with StringDimensionSchema

use of org.apache.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class CompactionTaskTest method testCreateIngestionSchemaWithCustomDimensionsSpec.

@Test
public void testCreateIngestionSchemaWithCustomDimensionsSpec() throws IOException, SegmentLoadingException {
    final DimensionsSpec customSpec = new DimensionsSpec(Lists.newArrayList(new LongDimensionSchema("timestamp"), new StringDimensionSchema("string_dim_0"), new StringDimensionSchema("string_dim_1"), new StringDimensionSchema("string_dim_2"), new StringDimensionSchema("string_dim_3"), new StringDimensionSchema("string_dim_4"), new LongDimensionSchema("long_dim_0"), new LongDimensionSchema("long_dim_1"), new LongDimensionSchema("long_dim_2"), new LongDimensionSchema("long_dim_3"), new LongDimensionSchema("long_dim_4"), new FloatDimensionSchema("float_dim_0"), new FloatDimensionSchema("float_dim_1"), new FloatDimensionSchema("float_dim_2"), new FloatDimensionSchema("float_dim_3"), new FloatDimensionSchema("float_dim_4"), new DoubleDimensionSchema("double_dim_0"), new DoubleDimensionSchema("double_dim_1"), new DoubleDimensionSchema("double_dim_2"), new DoubleDimensionSchema("double_dim_3"), new DoubleDimensionSchema("double_dim_4"), new StringDimensionSchema(MIXED_TYPE_COLUMN)));
    final List<ParallelIndexIngestionSpec> ingestionSpecs = CompactionTask.createIngestionSchema(toolbox, LockGranularity.TIME_CHUNK, new SegmentProvider(DATA_SOURCE, new CompactionIntervalSpec(COMPACTION_INTERVAL, null)), new PartitionConfigurationManager(TUNING_CONFIG), customSpec, null, null, null, COORDINATOR_CLIENT, segmentCacheManagerFactory, RETRY_POLICY_FACTORY, IOConfig.DEFAULT_DROP_EXISTING);
    ingestionSpecs.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getDataSchema().getGranularitySpec().inputIntervals().get(0), s2.getDataSchema().getGranularitySpec().inputIntervals().get(0)));
    Assert.assertEquals(6, ingestionSpecs.size());
    final List<DimensionsSpec> dimensionsSpecs = new ArrayList<>(6);
    IntStream.range(0, 6).forEach(i -> dimensionsSpecs.add(customSpec));
    assertIngestionSchema(ingestionSpecs, dimensionsSpecs, AGGREGATORS.stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList()), SEGMENT_INTERVALS, Granularities.MONTH, Granularities.NONE, IOConfig.DEFAULT_DROP_EXISTING);
}
Also used : LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) ArrayList(java.util.ArrayList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) SegmentProvider(org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider) DoubleLastAggregatorFactory(org.apache.druid.query.aggregation.last.DoubleLastAggregatorFactory) FloatMinAggregatorFactory(org.apache.druid.query.aggregation.FloatMinAggregatorFactory) FloatFirstAggregatorFactory(org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) PartitionConfigurationManager(org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test)

Aggregations

StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)36 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)30 Test (org.junit.Test)24 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)19 LongDimensionSchema (org.apache.druid.data.input.impl.LongDimensionSchema)15 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)15 FloatDimensionSchema (org.apache.druid.data.input.impl.FloatDimensionSchema)14 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)12 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)11 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)11 File (java.io.File)8 ArrayList (java.util.ArrayList)8 Before (org.junit.Before)8 ImmutableList (com.google.common.collect.ImmutableList)7 HashMap (java.util.HashMap)7 DataSchema (org.apache.druid.segment.indexing.DataSchema)7 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)7 List (java.util.List)6 SupervisorStateManagerConfig (org.apache.druid.indexing.overlord.supervisor.SupervisorStateManagerConfig)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5