Search in sources :

Example 86 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class OrcIndexGeneratorJobTest method verifyJob.

private void verifyJob(IndexGeneratorJob job) throws IOException {
    JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
    int segmentNum = 0;
    for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) {
        Integer[][] shardInfo = shardInfoForEachSegment[segmentNum++];
        File segmentOutputFolder = new File(String.format("%s/%s/%s_%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), currTime.toString(), currTime.plusDays(1).toString(), config.getSchema().getTuningConfig().getVersion()));
        Assert.assertTrue(segmentOutputFolder.exists());
        Assert.assertEquals(shardInfo.length, segmentOutputFolder.list().length);
        int rowCount = 0;
        for (int partitionNum = 0; partitionNum < shardInfo.length; ++partitionNum) {
            File individualSegmentFolder = new File(segmentOutputFolder, Integer.toString(partitionNum));
            Assert.assertTrue(individualSegmentFolder.exists());
            File descriptor = new File(individualSegmentFolder, "descriptor.json");
            File indexZip = new File(individualSegmentFolder, "index.zip");
            Assert.assertTrue(descriptor.exists());
            Assert.assertTrue(indexZip.exists());
            DataSegment dataSegment = mapper.readValue(descriptor, DataSegment.class);
            Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
            Assert.assertEquals(new Interval(currTime, currTime.plusDays(1)), dataSegment.getInterval());
            Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
            Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
            Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
            Assert.assertEquals(dataSourceName, dataSegment.getDataSource());
            Assert.assertTrue(dataSegment.getDimensions().size() == 1);
            String[] dimensions = dataSegment.getDimensions().toArray(new String[dataSegment.getDimensions().size()]);
            Arrays.sort(dimensions);
            Assert.assertEquals("host", dimensions[0]);
            Assert.assertEquals("visited_num", dataSegment.getMetrics().get(0));
            Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
            Integer[] hashShardInfo = shardInfo[partitionNum];
            HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
            Assert.assertEquals((int) hashShardInfo[0], spec.getPartitionNum());
            Assert.assertEquals((int) hashShardInfo[1], spec.getPartitions());
            File dir = Files.createTempDir();
            unzip(indexZip, dir);
            QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(dir);
            QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(index);
            for (Rowboat row : adapter.getRows()) {
                Object[] metrics = row.getMetrics();
                rowCount++;
                Assert.assertTrue(metrics.length == 2);
            }
        }
        Assert.assertEquals(rowCount, data.size());
    }
}
Also used : HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) QueryableIndexIndexableAdapter(io.druid.segment.QueryableIndexIndexableAdapter) QueryableIndex(io.druid.segment.QueryableIndex) OrcFile(org.apache.orc.OrcFile) File(java.io.File) Rowboat(io.druid.segment.Rowboat) Interval(org.joda.time.Interval)

Example 87 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class CloudFilesDataSegmentPusherTest method testPush.

@Test
public void testPush() throws Exception {
    ObjectApi objectApi = EasyMock.createStrictMock(ObjectApi.class);
    EasyMock.expect(objectApi.put(EasyMock.anyString(), EasyMock.<Payload>anyObject())).andReturn(null).atLeastOnce();
    EasyMock.replay(objectApi);
    CloudFilesApi api = EasyMock.createStrictMock(CloudFilesApi.class);
    EasyMock.expect(api.getObjectApi(EasyMock.anyString(), EasyMock.anyString())).andReturn(objectApi).atLeastOnce();
    EasyMock.replay(api);
    CloudFilesDataSegmentPusherConfig config = new CloudFilesDataSegmentPusherConfig();
    config.setRegion("region");
    config.setContainer("container");
    config.setBasePath("basePath");
    CloudFilesDataSegmentPusher pusher = new CloudFilesDataSegmentPusher(api, config, new DefaultObjectMapper());
    // Create a mock segment on disk
    File tmp = tempFolder.newFile("version.bin");
    final byte[] data = new byte[] { 0x0, 0x0, 0x0, 0x1 };
    Files.write(data, tmp);
    final long size = data.length;
    DataSegment segmentToPush = new DataSegment("foo", new Interval("2015/2016"), "0", Maps.<String, Object>newHashMap(), Lists.<String>newArrayList(), Lists.<String>newArrayList(), NoneShardSpec.instance(), 0, size);
    DataSegment segment = pusher.push(tempFolder.getRoot(), segmentToPush);
    Assert.assertEquals(segmentToPush.getSize(), segment.getSize());
    EasyMock.verify(api);
}
Also used : ObjectApi(org.jclouds.openstack.swift.v1.features.ObjectApi) Payload(org.jclouds.io.Payload) CloudFilesApi(org.jclouds.rackspace.cloudfiles.v1.CloudFilesApi) DefaultObjectMapper(io.druid.jackson.DefaultObjectMapper) File(java.io.File) DataSegment(io.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 88 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class BatchServerInventoryView method addInnerInventory.

@Override
protected DruidServer addInnerInventory(final DruidServer container, String inventoryKey, final Set<DataSegment> inventory) {
    Set<DataSegment> filteredInventory = filterInventory(container, inventory);
    zNodes.put(inventoryKey, filteredInventory);
    for (DataSegment segment : filteredInventory) {
        addSingleInventory(container, segment);
    }
    return container;
}
Also used : DataSegment(io.druid.timeline.DataSegment)

Example 89 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class BatchServerInventoryView method updateInnerInventory.

@Override
protected DruidServer updateInnerInventory(DruidServer container, String inventoryKey, Set<DataSegment> inventory) {
    Set<DataSegment> filteredInventory = filterInventory(container, inventory);
    Set<DataSegment> existing = zNodes.get(inventoryKey);
    if (existing == null) {
        throw new ISE("Trying to update an inventoryKey[%s] that didn't exist?!", inventoryKey);
    }
    for (DataSegment segment : Sets.difference(filteredInventory, existing)) {
        addSingleInventory(container, segment);
    }
    for (DataSegment segment : Sets.difference(existing, filteredInventory)) {
        removeSingleInventory(container, segment.getIdentifier());
    }
    zNodes.put(inventoryKey, filteredInventory);
    return container;
}
Also used : ISE(io.druid.java.util.common.ISE) DataSegment(io.druid.timeline.DataSegment)

Example 90 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class BatchServerInventoryView method removeInnerInventory.

@Override
protected DruidServer removeInnerInventory(final DruidServer container, String inventoryKey) {
    log.debug("Server[%s] removed container[%s]", container.getName(), inventoryKey);
    Set<DataSegment> segments = zNodes.remove(inventoryKey);
    if (segments == null) {
        log.warn("Told to remove container[%s], which didn't exist", inventoryKey);
        return container;
    }
    for (DataSegment segment : segments) {
        removeSingleInventory(container, segment.getIdentifier());
    }
    return container;
}
Also used : DataSegment(io.druid.timeline.DataSegment)

Aggregations

DataSegment (io.druid.timeline.DataSegment)293 Test (org.junit.Test)151 Interval (org.joda.time.Interval)136 File (java.io.File)56 DateTime (org.joda.time.DateTime)52 IOException (java.io.IOException)37 DruidServer (io.druid.client.DruidServer)36 Map (java.util.Map)35 DruidDataSource (io.druid.client.DruidDataSource)19 ListeningExecutorService (com.google.common.util.concurrent.ListeningExecutorService)18 List (java.util.List)17 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)16 Rule (io.druid.server.coordinator.rules.Rule)16 ImmutableMap (com.google.common.collect.ImmutableMap)15 ForeverLoadRule (io.druid.server.coordinator.rules.ForeverLoadRule)14 IntervalDropRule (io.druid.server.coordinator.rules.IntervalDropRule)13 IntervalLoadRule (io.druid.server.coordinator.rules.IntervalLoadRule)13 CountDownLatch (java.util.concurrent.CountDownLatch)13 GET (javax.ws.rs.GET)13 Produces (javax.ws.rs.Produces)13