Search in sources :

Example 16 with RealtimeTuningConfig

use of org.apache.druid.segment.indexing.RealtimeTuningConfig in project druid by apache.

the class SinkTest method testSwap.

@Test
public void testSwap() throws Exception {
    final DataSchema schema = new DataSchema("test", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), null);
    final Interval interval = Intervals.of("2013-01-01/2013-01-02");
    final String version = DateTimes.nowUtc().toString();
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, 100, null, null, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null, null, null, null);
    final Sink sink = new Sink(interval, schema, tuningConfig.getShardSpec(), version, tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), tuningConfig.getMaxBytesInMemoryOrDefault(), true, tuningConfig.getDedupColumn());
    sink.add(new InputRow() {

        @Override
        public List<String> getDimensions() {
            return new ArrayList<>();
        }

        @Override
        public long getTimestampFromEpoch() {
            return DateTimes.of("2013-01-01").getMillis();
        }

        @Override
        public DateTime getTimestamp() {
            return DateTimes.of("2013-01-01");
        }

        @Override
        public List<String> getDimension(String dimension) {
            return new ArrayList<>();
        }

        @Override
        public Number getMetric(String metric) {
            return 0;
        }

        @Override
        public Object getRaw(String dimension) {
            return null;
        }

        @Override
        public int compareTo(Row o) {
            return 0;
        }
    }, false);
    FireHydrant currHydrant = sink.getCurrHydrant();
    Assert.assertEquals(Intervals.of("2013-01-01/PT1M"), currHydrant.getIndex().getInterval());
    FireHydrant swapHydrant = sink.swap();
    sink.add(new InputRow() {

        @Override
        public List<String> getDimensions() {
            return new ArrayList<>();
        }

        @Override
        public long getTimestampFromEpoch() {
            return DateTimes.of("2013-01-01").getMillis();
        }

        @Override
        public DateTime getTimestamp() {
            return DateTimes.of("2013-01-01");
        }

        @Override
        public List<String> getDimension(String dimension) {
            return new ArrayList<>();
        }

        @Override
        public Number getMetric(String metric) {
            return 0;
        }

        @Override
        public Object getRaw(String dimension) {
            return null;
        }

        @Override
        public int compareTo(Row o) {
            return 0;
        }
    }, false);
    Assert.assertEquals(currHydrant, swapHydrant);
    Assert.assertNotSame(currHydrant, sink.getCurrHydrant());
    Assert.assertEquals(Intervals.of("2013-01-01/PT1M"), sink.getCurrHydrant().getIndex().getInterval());
    Assert.assertEquals(2, Iterators.size(sink.iterator()));
}
Also used : Period(org.joda.time.Period) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) DateTime(org.joda.time.DateTime) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Row(org.apache.druid.data.input.Row) InputRow(org.apache.druid.data.input.InputRow) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) Interval(org.joda.time.Interval) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 17 with RealtimeTuningConfig

use of org.apache.druid.segment.indexing.RealtimeTuningConfig in project druid by apache.

the class StreamAppenderatorTest method testRestoreFromDisk.

@Test
public void testRestoreFromDisk() throws Exception {
    final RealtimeTuningConfig tuningConfig;
    try (final StreamAppenderatorTester tester = new StreamAppenderatorTester(2, true)) {
        final Appenderator appenderator = tester.getAppenderator();
        tuningConfig = tester.getTuningConfig();
        final AtomicInteger eventCount = new AtomicInteger(0);
        final Supplier<Committer> committerSupplier = new Supplier<Committer>() {

            @Override
            public Committer get() {
                final Object metadata = ImmutableMap.of("eventCount", eventCount.get());
                return new Committer() {

                    @Override
                    public Object getMetadata() {
                        return metadata;
                    }

                    @Override
                    public void run() {
                    // Do nothing
                    }
                };
            }
        };
        appenderator.startJob();
        eventCount.incrementAndGet();
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "foo", 1), committerSupplier);
        eventCount.incrementAndGet();
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "bar", 2), committerSupplier);
        eventCount.incrementAndGet();
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "baz", 3), committerSupplier);
        eventCount.incrementAndGet();
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "qux", 4), committerSupplier);
        eventCount.incrementAndGet();
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "bob", 5), committerSupplier);
        appenderator.close();
        try (final StreamAppenderatorTester tester2 = new StreamAppenderatorTester(2, -1, tuningConfig.getBasePersistDirectory(), true)) {
            final Appenderator appenderator2 = tester2.getAppenderator();
            Assert.assertEquals(ImmutableMap.of("eventCount", 4), appenderator2.startJob());
            Assert.assertEquals(ImmutableList.of(IDENTIFIERS.get(0)), appenderator2.getSegments());
            Assert.assertEquals(4, appenderator2.getRowCount(IDENTIFIERS.get(0)));
        }
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Supplier(com.google.common.base.Supplier) Committer(org.apache.druid.data.input.Committer) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 18 with RealtimeTuningConfig

use of org.apache.druid.segment.indexing.RealtimeTuningConfig in project hive by apache.

the class DruidOutputFormat method getHiveRecordWriter.

@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    final int targetNumShardsPerGranularity = Integer.parseUnsignedInt(tableProperties.getProperty(Constants.DRUID_TARGET_SHARDS_PER_GRANULARITY, "0"));
    final int maxPartitionSize = targetNumShardsPerGranularity > 0 ? -1 : HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_PARTITION_SIZE);
    // If datasource is in the table properties, it is an INSERT/INSERT OVERWRITE as the datasource
    // name was already persisted. Otherwise, it is a CT/CTAS and we need to get the name from the
    // job properties that are set by configureOutputJobProperties in the DruidStorageHandler
    final String dataSource = tableProperties.getProperty(Constants.DRUID_DATA_SOURCE) == null ? jc.get(Constants.DRUID_DATA_SOURCE) : tableProperties.getProperty(Constants.DRUID_DATA_SOURCE);
    final String segmentDirectory = jc.get(DruidConstants.DRUID_SEGMENT_INTERMEDIATE_DIRECTORY);
    final GranularitySpec granularitySpec = DruidStorageHandlerUtils.getGranularitySpec(jc, tableProperties);
    final String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
    final String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    if (StringUtils.isEmpty(columnNameProperty) || StringUtils.isEmpty(columnTypeProperty)) {
        throw new IllegalStateException(String.format("List of columns names [%s] or columns type [%s] is/are not present", columnNameProperty, columnTypeProperty));
    }
    ArrayList<String> columnNames = Lists.newArrayList(columnNameProperty.split(","));
    if (!columnNames.contains(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) {
        throw new IllegalStateException("Timestamp column (' " + DruidConstants.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + tableProperties.getProperty(serdeConstants.LIST_COLUMNS));
    }
    ArrayList<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    Pair<List<DimensionSchema>, AggregatorFactory[]> dimensionsAndAggregates = DruidStorageHandlerUtils.getDimensionsAndAggregates(columnNames, columnTypes);
    final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidConstants.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(dimensionsAndAggregates.lhs, Lists.newArrayList(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, Constants.DRUID_SHARD_KEY_COL_NAME), null)));
    Map<String, Object> inputParser = DruidStorageHandlerUtils.JSON_MAPPER.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
    });
    final DataSchema dataSchema = new DataSchema(Preconditions.checkNotNull(dataSource, "Data source name is null"), inputParser, dimensionsAndAggregates.rhs, granularitySpec, null, DruidStorageHandlerUtils.JSON_MAPPER);
    final String workingPath = jc.get(DruidConstants.DRUID_JOB_WORKING_DIRECTORY);
    final String version = jc.get(DruidConstants.DRUID_SEGMENT_VERSION);
    String basePersistDirectory = HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_BASE_PERSIST_DIRECTORY);
    if (Strings.isNullOrEmpty(basePersistDirectory)) {
        basePersistDirectory = System.getProperty("java.io.tmpdir");
    }
    Integer maxRowInMemory = HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_ROW_IN_MEMORY);
    IndexSpec indexSpec = DruidStorageHandlerUtils.getIndexSpec(jc);
    RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(maxRowInMemory, null, null, null, new File(basePersistDirectory, dataSource), new CustomVersioningPolicy(version), null, null, null, indexSpec, null, true, 0, 0, true, null, 0L, null, null);
    LOG.debug(String.format("running with Data schema [%s] ", dataSchema));
    return new DruidRecordWriter(dataSchema, realtimeTuningConfig, DruidStorageHandlerUtils.createSegmentPusherForDirectory(segmentDirectory, jc), maxPartitionSize, new Path(workingPath, SEGMENTS_DESCRIPTOR_DIR_NAME), finalOutPath.getFileSystem(jc));
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ArrayList(java.util.ArrayList) List(java.util.List) Path(org.apache.hadoop.fs.Path) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) DataSchema(org.apache.druid.segment.indexing.DataSchema) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) CustomVersioningPolicy(org.apache.druid.segment.realtime.plumber.CustomVersioningPolicy) Map(java.util.Map) File(java.io.File)

Example 19 with RealtimeTuningConfig

use of org.apache.druid.segment.indexing.RealtimeTuningConfig in project druid by druid-io.

the class RealtimePlumberSchoolTest method setUp.

@Before
public void setUp() throws Exception {
    tmpDir = FileUtils.createTempDir();
    ObjectMapper jsonMapper = new DefaultObjectMapper();
    schema = new DataSchema("test", jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "auto", null), DimensionsSpec.EMPTY, null, null, null), null), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), null, jsonMapper);
    schema2 = new DataSchema("test", jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "auto", null), DimensionsSpec.EMPTY, null, null, null), null), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.YEAR, Granularities.NONE, null), null, jsonMapper);
    announcer = EasyMock.createMock(DataSegmentAnnouncer.class);
    announcer.announceSegment(EasyMock.anyObject());
    EasyMock.expectLastCall().anyTimes();
    segmentPublisher = EasyMock.createNiceMock(SegmentPublisher.class);
    dataSegmentPusher = EasyMock.createNiceMock(DataSegmentPusher.class);
    handoffNotifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class);
    handoffNotifier = EasyMock.createNiceMock(SegmentHandoffNotifier.class);
    EasyMock.expect(handoffNotifierFactory.createSegmentHandoffNotifier(EasyMock.anyString())).andReturn(handoffNotifier).anyTimes();
    EasyMock.expect(handoffNotifier.registerSegmentHandoffCallback(EasyMock.anyObject(), EasyMock.anyObject(), EasyMock.anyObject())).andReturn(true).anyTimes();
    emitter = EasyMock.createMock(ServiceEmitter.class);
    EasyMock.replay(announcer, segmentPublisher, dataSegmentPusher, handoffNotifierFactory, handoffNotifier, emitter);
    tuningConfig = new RealtimeTuningConfig(null, 1, null, null, null, null, null, new IntervalStartVersioningPolicy(), rejectionPolicy, null, null, null, null, 0, 0, false, null, null, null, null);
    realtimePlumberSchool = new RealtimePlumberSchool(emitter, new DefaultQueryRunnerFactoryConglomerate(new HashMap<>()), dataSegmentPusher, announcer, segmentPublisher, handoffNotifierFactory, DirectQueryProcessingPool.INSTANCE, NoopJoinableFactory.INSTANCE, TestHelper.getTestIndexMergerV9(segmentWriteOutMediumFactory), TestHelper.getTestIndexIO(), MapCache.create(0), FireDepartmentTest.NO_CACHE_CONFIG, new CachePopulatorStats(), TestHelper.makeJsonMapper());
    metrics = new FireDepartmentMetrics();
    plumber = (RealtimePlumber) realtimePlumberSchool.findPlumber(schema, tuningConfig, metrics);
}
Also used : ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) DataSegmentAnnouncer(org.apache.druid.server.coordination.DataSegmentAnnouncer) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) DefaultQueryRunnerFactoryConglomerate(org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) SegmentPublisher(org.apache.druid.segment.realtime.SegmentPublisher) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) CachePopulatorStats(org.apache.druid.client.cache.CachePopulatorStats) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Before(org.junit.Before)

Example 20 with RealtimeTuningConfig

use of org.apache.druid.segment.indexing.RealtimeTuningConfig in project druid by druid-io.

the class DefaultOfflineAppenderatorFactoryTest method testBuild.

@Test
public void testBuild() throws IOException, SegmentNotWritableException {
    Injector injector = Initialization.makeInjectorWithModules(GuiceInjectors.makeStartupInjector(), ImmutableList.<Module>of(new Module() {

        @Override
        public void configure(Binder binder) {
            binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/tool");
            binder.bindConstant().annotatedWith(Names.named("servicePort")).to(9999);
            binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(-1);
            binder.bind(DruidProcessingConfig.class).toInstance(new DruidProcessingConfig() {

                @Override
                public String getFormatString() {
                    return "processing-%s";
                }

                @Override
                public int intermediateComputeSizeBytes() {
                    return 100 * 1024 * 1024;
                }

                @Override
                public int getNumThreads() {
                    return 1;
                }

                @Override
                public int columnCacheSizeBytes() {
                    return 25 * 1024 * 1024;
                }
            });
            binder.bind(ColumnConfig.class).to(DruidProcessingConfig.class);
        }
    }));
    ObjectMapper objectMapper = injector.getInstance(ObjectMapper.class);
    AppenderatorFactory defaultOfflineAppenderatorFactory = objectMapper.readerFor(AppenderatorFactory.class).readValue("{\"type\":\"offline\"}");
    final Map<String, Object> parserMap = objectMapper.convertValue(new MapInputRowParser(new JSONParseSpec(new TimestampSpec("ts", "auto", null), DimensionsSpec.EMPTY, null, null, null)), Map.class);
    DataSchema schema = new DataSchema("dataSourceName", parserMap, new AggregatorFactory[] { new CountAggregatorFactory("count"), new LongSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.MINUTE, Granularities.NONE, null), null, objectMapper);
    RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(null, 75000, null, null, null, null, temporaryFolder.newFolder(), null, null, null, null, null, null, 0, 0, null, null, null, null, null);
    Appenderator appenderator = defaultOfflineAppenderatorFactory.build(schema, tuningConfig, new FireDepartmentMetrics());
    try {
        Assert.assertEquals("dataSourceName", appenderator.getDataSource());
        Assert.assertEquals(null, appenderator.startJob());
        SegmentIdWithShardSpec identifier = new SegmentIdWithShardSpec("dataSourceName", Intervals.of("2000/2001"), "A", new LinearShardSpec(0));
        Assert.assertEquals(0, ((AppenderatorImpl) appenderator).getRowsInMemory());
        appenderator.add(identifier, StreamAppenderatorTest.ir("2000", "bar", 1), null);
        Assert.assertEquals(1, ((AppenderatorImpl) appenderator).getRowsInMemory());
        appenderator.add(identifier, StreamAppenderatorTest.ir("2000", "baz", 1), null);
        Assert.assertEquals(2, ((AppenderatorImpl) appenderator).getRowsInMemory());
        appenderator.close();
        Assert.assertEquals(0, ((AppenderatorImpl) appenderator).getRowsInMemory());
    } finally {
        appenderator.close();
    }
}
Also used : ColumnConfig(org.apache.druid.segment.column.ColumnConfig) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) DataSchema(org.apache.druid.segment.indexing.DataSchema) Binder(com.google.inject.Binder) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) Injector(com.google.inject.Injector) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) Module(com.google.inject.Module) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Aggregations

RealtimeTuningConfig (org.apache.druid.segment.indexing.RealtimeTuningConfig)22 DataSchema (org.apache.druid.segment.indexing.DataSchema)20 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)17 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)14 Test (org.junit.Test)13 Period (org.joda.time.Period)12 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)10 RealtimeIOConfig (org.apache.druid.segment.indexing.RealtimeIOConfig)10 FireDepartment (org.apache.druid.segment.realtime.FireDepartment)10 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)9 MapInputRowParser (org.apache.druid.data.input.impl.MapInputRowParser)8 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)7 File (java.io.File)6 TimeAndDimsParseSpec (org.apache.druid.data.input.impl.TimeAndDimsParseSpec)6 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)6 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)6 Interval (org.joda.time.Interval)6 List (java.util.List)4 Committer (org.apache.druid.data.input.Committer)4 FirehoseFactory (org.apache.druid.data.input.FirehoseFactory)4