Search in sources :

Example 6 with WindowedDataSegment

use of io.druid.indexer.hadoop.WindowedDataSegment in project druid by druid-io.

the class BatchDeltaIngestionTest method testReindexingWithPartialWindow.

@Test
public void testReindexingWithPartialWindow() throws Exception {
    List<WindowedDataSegment> segments = ImmutableList.of(new WindowedDataSegment(SEGMENT, INTERVAL_PARTIAL));
    HadoopDruidIndexerConfig config = makeHadoopDruidIndexerConfig(ImmutableMap.<String, Object>of("type", "dataSource", "ingestionSpec", ImmutableMap.of("dataSource", "xyz", "interval", INTERVAL_FULL), "segments", segments), temporaryFolder.newFolder());
    List<ImmutableMap<String, Object>> expectedRows = ImmutableList.of(ImmutableMap.<String, Object>of("time", DateTime.parse("2014-10-22T00:00:00.000Z"), "host", ImmutableList.of("a.example.com"), "visited_sum", 100L, "unique_hosts", 1.0d), ImmutableMap.<String, Object>of("time", DateTime.parse("2014-10-22T01:00:00.000Z"), "host", ImmutableList.of("b.example.com"), "visited_sum", 150L, "unique_hosts", 1.0d));
    testIngestion(config, expectedRows, Iterables.getOnlyElement(segments));
}
Also used : WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 7 with WindowedDataSegment

use of io.druid.indexer.hadoop.WindowedDataSegment in project druid by druid-io.

the class DatasourcePathSpecTest method testAddInputPaths.

@Test
public void testAddInputPaths() throws Exception {
    HadoopDruidIndexerConfig hadoopIndexerConfig = makeHadoopDruidIndexerConfig();
    ObjectMapper mapper = new DefaultObjectMapper();
    DatasourcePathSpec pathSpec = new DatasourcePathSpec(mapper, segments, ingestionSpec, null);
    Configuration config = new Configuration();
    Job job = EasyMock.createNiceMock(Job.class);
    EasyMock.expect(job.getConfiguration()).andReturn(config).anyTimes();
    EasyMock.replay(job);
    pathSpec.addInputPaths(hadoopIndexerConfig, job);
    List<WindowedDataSegment> actualSegments = mapper.readValue(config.get(DatasourceInputFormat.CONF_INPUT_SEGMENTS), new TypeReference<List<WindowedDataSegment>>() {
    });
    Assert.assertEquals(segments, actualSegments);
    DatasourceIngestionSpec actualIngestionSpec = mapper.readValue(config.get(DatasourceInputFormat.CONF_DRUID_SCHEMA), DatasourceIngestionSpec.class);
    Assert.assertEquals(ingestionSpec.withDimensions(ImmutableList.of("product")).withMetrics(ImmutableList.of("visited_sum")), actualIngestionSpec);
}
Also used : DatasourceIngestionSpec(io.druid.indexer.hadoop.DatasourceIngestionSpec) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) Configuration(org.apache.hadoop.conf.Configuration) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) HadoopDruidIndexerConfig(io.druid.indexer.HadoopDruidIndexerConfig) Job(org.apache.hadoop.mapreduce.Job) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 8 with WindowedDataSegment

use of io.druid.indexer.hadoop.WindowedDataSegment in project druid by druid-io.

the class BatchDeltaIngestionTest method testIngestion.

private void testIngestion(HadoopDruidIndexerConfig config, List<ImmutableMap<String, Object>> expectedRowsGenerated, WindowedDataSegment windowedDataSegment) throws Exception {
    IndexGeneratorJob job = new IndexGeneratorJob(config);
    JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
    File segmentFolder = new File(String.format("%s/%s/%s_%s/%s/0", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), INTERVAL_FULL.getStart().toString(), INTERVAL_FULL.getEnd().toString(), config.getSchema().getTuningConfig().getVersion()));
    Assert.assertTrue(segmentFolder.exists());
    File descriptor = new File(segmentFolder, "descriptor.json");
    File indexZip = new File(segmentFolder, "index.zip");
    Assert.assertTrue(descriptor.exists());
    Assert.assertTrue(indexZip.exists());
    DataSegment dataSegment = MAPPER.readValue(descriptor, DataSegment.class);
    Assert.assertEquals("website", dataSegment.getDataSource());
    Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
    Assert.assertEquals(INTERVAL_FULL, dataSegment.getInterval());
    Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
    Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
    Assert.assertEquals("host", dataSegment.getDimensions().get(0));
    Assert.assertEquals("visited_sum", dataSegment.getMetrics().get(0));
    Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
    Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
    HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
    Assert.assertEquals(0, spec.getPartitionNum());
    Assert.assertEquals(1, spec.getPartitions());
    File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
    new LocalDataSegmentPuller().getSegmentFiles(dataSegment, tmpUnzippedSegmentDir);
    QueryableIndex index = INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
    StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
    Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, windowedDataSegment.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, Granularities.NONE);
    List<InputRow> rows = Lists.newArrayList();
    while (firehose.hasMore()) {
        rows.add(firehose.nextRow());
    }
    verifyRows(expectedRowsGenerated, rows);
}
Also used : HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) Firehose(io.druid.data.input.Firehose) IngestSegmentFirehose(io.druid.segment.realtime.firehose.IngestSegmentFirehose) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) StorageAdapter(io.druid.segment.StorageAdapter) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) DataSegment(io.druid.timeline.DataSegment) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) LocalDataSegmentPuller(io.druid.segment.loading.LocalDataSegmentPuller) QueryableIndex(io.druid.segment.QueryableIndex) InputRow(io.druid.data.input.InputRow) File(java.io.File) WindowedStorageAdapter(io.druid.segment.realtime.firehose.WindowedStorageAdapter)

Aggregations

WindowedDataSegment (io.druid.indexer.hadoop.WindowedDataSegment)8 Test (org.junit.Test)5 DatasourceIngestionSpec (io.druid.indexer.hadoop.DatasourceIngestionSpec)4 ImmutableMap (com.google.common.collect.ImmutableMap)3 DataSegment (io.druid.timeline.DataSegment)2 File (java.io.File)2 List (java.util.List)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 ImmutableList (com.google.common.collect.ImmutableList)1 Firehose (io.druid.data.input.Firehose)1 InputRow (io.druid.data.input.InputRow)1 HadoopDruidIndexerConfig (io.druid.indexer.HadoopDruidIndexerConfig)1 DatasourcePathSpec (io.druid.indexer.path.DatasourcePathSpec)1 MultiplePathSpec (io.druid.indexer.path.MultiplePathSpec)1 PathSpec (io.druid.indexer.path.PathSpec)1 StaticPathSpec (io.druid.indexer.path.StaticPathSpec)1 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)1 ISE (io.druid.java.util.common.ISE)1 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)1 QueryableIndex (io.druid.segment.QueryableIndex)1