use of io.druid.timeline.DataSegment in project druid by druid-io.
the class OrcIndexGeneratorJobTest method verifyJob.
private void verifyJob(IndexGeneratorJob job) throws IOException {
JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
int segmentNum = 0;
for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) {
Integer[][] shardInfo = shardInfoForEachSegment[segmentNum++];
File segmentOutputFolder = new File(String.format("%s/%s/%s_%s/%s", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), currTime.toString(), currTime.plusDays(1).toString(), config.getSchema().getTuningConfig().getVersion()));
Assert.assertTrue(segmentOutputFolder.exists());
Assert.assertEquals(shardInfo.length, segmentOutputFolder.list().length);
int rowCount = 0;
for (int partitionNum = 0; partitionNum < shardInfo.length; ++partitionNum) {
File individualSegmentFolder = new File(segmentOutputFolder, Integer.toString(partitionNum));
Assert.assertTrue(individualSegmentFolder.exists());
File descriptor = new File(individualSegmentFolder, "descriptor.json");
File indexZip = new File(individualSegmentFolder, "index.zip");
Assert.assertTrue(descriptor.exists());
Assert.assertTrue(indexZip.exists());
DataSegment dataSegment = mapper.readValue(descriptor, DataSegment.class);
Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
Assert.assertEquals(new Interval(currTime, currTime.plusDays(1)), dataSegment.getInterval());
Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
Assert.assertEquals(dataSourceName, dataSegment.getDataSource());
Assert.assertTrue(dataSegment.getDimensions().size() == 1);
String[] dimensions = dataSegment.getDimensions().toArray(new String[dataSegment.getDimensions().size()]);
Arrays.sort(dimensions);
Assert.assertEquals("host", dimensions[0]);
Assert.assertEquals("visited_num", dataSegment.getMetrics().get(0));
Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
Integer[] hashShardInfo = shardInfo[partitionNum];
HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
Assert.assertEquals((int) hashShardInfo[0], spec.getPartitionNum());
Assert.assertEquals((int) hashShardInfo[1], spec.getPartitions());
File dir = Files.createTempDir();
unzip(indexZip, dir);
QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(dir);
QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(index);
for (Rowboat row : adapter.getRows()) {
Object[] metrics = row.getMetrics();
rowCount++;
Assert.assertTrue(metrics.length == 2);
}
}
Assert.assertEquals(rowCount, data.size());
}
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class CloudFilesDataSegmentPusherTest method testPush.
@Test
public void testPush() throws Exception {
ObjectApi objectApi = EasyMock.createStrictMock(ObjectApi.class);
EasyMock.expect(objectApi.put(EasyMock.anyString(), EasyMock.<Payload>anyObject())).andReturn(null).atLeastOnce();
EasyMock.replay(objectApi);
CloudFilesApi api = EasyMock.createStrictMock(CloudFilesApi.class);
EasyMock.expect(api.getObjectApi(EasyMock.anyString(), EasyMock.anyString())).andReturn(objectApi).atLeastOnce();
EasyMock.replay(api);
CloudFilesDataSegmentPusherConfig config = new CloudFilesDataSegmentPusherConfig();
config.setRegion("region");
config.setContainer("container");
config.setBasePath("basePath");
CloudFilesDataSegmentPusher pusher = new CloudFilesDataSegmentPusher(api, config, new DefaultObjectMapper());
// Create a mock segment on disk
File tmp = tempFolder.newFile("version.bin");
final byte[] data = new byte[] { 0x0, 0x0, 0x0, 0x1 };
Files.write(data, tmp);
final long size = data.length;
DataSegment segmentToPush = new DataSegment("foo", new Interval("2015/2016"), "0", Maps.<String, Object>newHashMap(), Lists.<String>newArrayList(), Lists.<String>newArrayList(), NoneShardSpec.instance(), 0, size);
DataSegment segment = pusher.push(tempFolder.getRoot(), segmentToPush);
Assert.assertEquals(segmentToPush.getSize(), segment.getSize());
EasyMock.verify(api);
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class BatchServerInventoryView method addInnerInventory.
@Override
protected DruidServer addInnerInventory(final DruidServer container, String inventoryKey, final Set<DataSegment> inventory) {
Set<DataSegment> filteredInventory = filterInventory(container, inventory);
zNodes.put(inventoryKey, filteredInventory);
for (DataSegment segment : filteredInventory) {
addSingleInventory(container, segment);
}
return container;
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class BatchServerInventoryView method updateInnerInventory.
@Override
protected DruidServer updateInnerInventory(DruidServer container, String inventoryKey, Set<DataSegment> inventory) {
Set<DataSegment> filteredInventory = filterInventory(container, inventory);
Set<DataSegment> existing = zNodes.get(inventoryKey);
if (existing == null) {
throw new ISE("Trying to update an inventoryKey[%s] that didn't exist?!", inventoryKey);
}
for (DataSegment segment : Sets.difference(filteredInventory, existing)) {
addSingleInventory(container, segment);
}
for (DataSegment segment : Sets.difference(existing, filteredInventory)) {
removeSingleInventory(container, segment.getIdentifier());
}
zNodes.put(inventoryKey, filteredInventory);
return container;
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class BatchServerInventoryView method removeInnerInventory.
@Override
protected DruidServer removeInnerInventory(final DruidServer container, String inventoryKey) {
log.debug("Server[%s] removed container[%s]", container.getName(), inventoryKey);
Set<DataSegment> segments = zNodes.remove(inventoryKey);
if (segments == null) {
log.warn("Told to remove container[%s], which didn't exist", inventoryKey);
return container;
}
for (DataSegment segment : segments) {
removeSingleInventory(container, segment.getIdentifier());
}
return container;
}
Aggregations