use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class DatasourceRecordReader method initialize.
@Override
public void initialize(InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException {
spec = readAndVerifyDatasourceIngestionSpec(context.getConfiguration(), HadoopDruidIndexerConfig.JSON_MAPPER);
List<WindowedDataSegment> segments = ((DatasourceInputSplit) split).getSegments();
List<WindowedStorageAdapter> adapters = Lists.transform(segments, new Function<WindowedDataSegment, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(WindowedDataSegment segment) {
try {
logger.info("Getting storage path for segment [%s]", segment.getSegment().getIdentifier());
Path path = new Path(JobHelper.getURIFromSegment(segment.getSegment()));
logger.info("Fetch segment files from [%s]", path);
File dir = Files.createTempDir();
tmpSegmentDirs.add(dir);
logger.info("Locally storing fetched segment at [%s]", dir);
JobHelper.unzipNoGuava(path, context.getConfiguration(), dir, context);
logger.info("finished fetching segment files");
QueryableIndex index = HadoopDruidIndexerConfig.INDEX_IO.loadIndex(dir);
indexes.add(index);
numRows += index.getNumRows();
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(index), segment.getInterval());
} catch (IOException ex) {
throw Throwables.propagate(ex);
}
}
});
firehose = new IngestSegmentFirehose(adapters, spec.getDimensions(), spec.getMetrics(), spec.getFilter(), spec.getGranularity());
}
use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class BatchDeltaIngestionTest method testIngestion.
private void testIngestion(HadoopDruidIndexerConfig config, List<ImmutableMap<String, Object>> expectedRowsGenerated, WindowedDataSegment windowedDataSegment) throws Exception {
IndexGeneratorJob job = new IndexGeneratorJob(config);
JobHelper.runJobs(ImmutableList.<Jobby>of(job), config);
File segmentFolder = new File(String.format("%s/%s/%s_%s/%s/0", config.getSchema().getIOConfig().getSegmentOutputPath(), config.getSchema().getDataSchema().getDataSource(), INTERVAL_FULL.getStart().toString(), INTERVAL_FULL.getEnd().toString(), config.getSchema().getTuningConfig().getVersion()));
Assert.assertTrue(segmentFolder.exists());
File descriptor = new File(segmentFolder, "descriptor.json");
File indexZip = new File(segmentFolder, "index.zip");
Assert.assertTrue(descriptor.exists());
Assert.assertTrue(indexZip.exists());
DataSegment dataSegment = MAPPER.readValue(descriptor, DataSegment.class);
Assert.assertEquals("website", dataSegment.getDataSource());
Assert.assertEquals(config.getSchema().getTuningConfig().getVersion(), dataSegment.getVersion());
Assert.assertEquals(INTERVAL_FULL, dataSegment.getInterval());
Assert.assertEquals("local", dataSegment.getLoadSpec().get("type"));
Assert.assertEquals(indexZip.getCanonicalPath(), dataSegment.getLoadSpec().get("path"));
Assert.assertEquals("host", dataSegment.getDimensions().get(0));
Assert.assertEquals("visited_sum", dataSegment.getMetrics().get(0));
Assert.assertEquals("unique_hosts", dataSegment.getMetrics().get(1));
Assert.assertEquals(Integer.valueOf(9), dataSegment.getBinaryVersion());
HashBasedNumberedShardSpec spec = (HashBasedNumberedShardSpec) dataSegment.getShardSpec();
Assert.assertEquals(0, spec.getPartitionNum());
Assert.assertEquals(1, spec.getPartitions());
File tmpUnzippedSegmentDir = temporaryFolder.newFolder();
new LocalDataSegmentPuller().getSegmentFiles(dataSegment, tmpUnzippedSegmentDir);
QueryableIndex index = INDEX_IO.loadIndex(tmpUnzippedSegmentDir);
StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
Firehose firehose = new IngestSegmentFirehose(ImmutableList.of(new WindowedStorageAdapter(adapter, windowedDataSegment.getInterval())), ImmutableList.of("host"), ImmutableList.of("visited_sum", "unique_hosts"), null, Granularities.NONE);
List<InputRow> rows = Lists.newArrayList();
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
verifyRows(expectedRowsGenerated, rows);
}
use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class DruidSchemaTest method setUp.
@Before
public void setUp() throws Exception {
Calcites.setSystemProperties();
final File tmpDir = temporaryFolder.newFolder();
final QueryableIndex index1 = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).indexMerger(TestHelper.getTestIndexMergerV9()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1"), new HyperUniquesAggregatorFactory("unique_dim1", "dim1") }).withRollup(false).build()).rows(ROWS1).buildMMappedIndex();
final QueryableIndex index2 = IndexBuilder.create().tmpDir(new File(tmpDir, "2")).indexMerger(TestHelper.getTestIndexMergerV9()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new LongSumAggregatorFactory("m1", "m1") }).withRollup(false).build()).rows(ROWS2).buildMMappedIndex();
walker = new SpecificSegmentsQuerySegmentWalker(CalciteTests.queryRunnerFactoryConglomerate()).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(new Interval("2000/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).build(), index1).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE1).interval(new Interval("2001/P1Y")).version("1").shardSpec(new LinearShardSpec(0)).build(), index2).add(DataSegment.builder().dataSource(CalciteTests.DATASOURCE2).interval(index2.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).build(), index2);
schema = new DruidSchema(walker, new TestServerInventoryView(walker.getSegments()), PLANNER_CONFIG_DEFAULT);
schema.start();
schema.awaitInitialization();
}
use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class CalciteTests method createMockWalker.
public static SpecificSegmentsQuerySegmentWalker createMockWalker(final File tmpDir) {
final QueryableIndex index1 = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).indexMerger(TestHelper.getTestIndexMergerV9()).schema(INDEX_SCHEMA).rows(ROWS1).buildMMappedIndex();
final QueryableIndex index2 = IndexBuilder.create().tmpDir(new File(tmpDir, "2")).indexMerger(TestHelper.getTestIndexMergerV9()).schema(INDEX_SCHEMA).rows(ROWS2).buildMMappedIndex();
return new SpecificSegmentsQuerySegmentWalker(queryRunnerFactoryConglomerate()).add(DataSegment.builder().dataSource(DATASOURCE1).interval(index1.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).build(), index1).add(DataSegment.builder().dataSource(DATASOURCE2).interval(index2.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).build(), index2);
}
use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class SegmentAnalyzer method analyze.
public Map<String, ColumnAnalysis> analyze(Segment segment) {
Preconditions.checkNotNull(segment, "segment");
// index is null for incremental-index-based segments, but storageAdapter is always available
final QueryableIndex index = segment.asQueryableIndex();
final StorageAdapter storageAdapter = segment.asStorageAdapter();
// get length and column names from storageAdapter
final int length = storageAdapter.getNumRows();
final Set<String> columnNames = Sets.newHashSet();
Iterables.addAll(columnNames, storageAdapter.getAvailableDimensions());
Iterables.addAll(columnNames, storageAdapter.getAvailableMetrics());
Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
for (String columnName : columnNames) {
final Column column = index == null ? null : index.getColumn(columnName);
final ColumnCapabilities capabilities = column != null ? column.getCapabilities() : storageAdapter.getColumnCapabilities(columnName);
final ColumnAnalysis analysis;
final ValueType type = capabilities.getType();
switch(type) {
case LONG:
analysis = analyzeNumericColumn(capabilities, length, Longs.BYTES);
break;
case FLOAT:
analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
break;
case STRING:
if (index != null) {
analysis = analyzeStringColumn(capabilities, column);
} else {
analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
}
break;
case COMPLEX:
analysis = analyzeComplexColumn(capabilities, column, storageAdapter.getColumnTypeName(columnName));
break;
default:
log.warn("Unknown column type[%s].", type);
analysis = ColumnAnalysis.error(String.format("unknown_type_%s", type));
}
columns.put(columnName, analysis);
}
// Add time column too
ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(Column.TIME_COLUMN_NAME);
if (timeCapabilities == null) {
timeCapabilities = new ColumnCapabilitiesImpl().setType(ValueType.LONG).setHasMultipleValues(false);
}
columns.put(Column.TIME_COLUMN_NAME, analyzeNumericColumn(timeCapabilities, length, NUM_BYTES_IN_TIMESTAMP));
return columns;
}
Aggregations