use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.
the class SegmentAnalyzer method analyze.
public Map<String, ColumnAnalysis> analyze(Segment segment) {
Preconditions.checkNotNull(segment, "segment");
// index is null for incremental-index-based segments, but storageAdapter is always available
final QueryableIndex index = segment.asQueryableIndex();
final StorageAdapter storageAdapter = segment.asStorageAdapter();
// get length and column names from storageAdapter
final int length = storageAdapter.getNumRows();
final Set<String> columnNames = Sets.newHashSet();
Iterables.addAll(columnNames, storageAdapter.getAvailableDimensions());
Iterables.addAll(columnNames, storageAdapter.getAvailableMetrics());
Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
for (String columnName : columnNames) {
final Column column = index == null ? null : index.getColumn(columnName);
final ColumnCapabilities capabilities = column != null ? column.getCapabilities() : storageAdapter.getColumnCapabilities(columnName);
final ColumnAnalysis analysis;
final ValueType type = capabilities.getType();
switch(type) {
case LONG:
analysis = analyzeNumericColumn(capabilities, length, Longs.BYTES);
break;
case FLOAT:
analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
break;
case STRING:
if (index != null) {
analysis = analyzeStringColumn(capabilities, column);
} else {
analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
}
break;
case COMPLEX:
analysis = analyzeComplexColumn(capabilities, column, storageAdapter.getColumnTypeName(columnName));
break;
default:
log.warn("Unknown column type[%s].", type);
analysis = ColumnAnalysis.error(String.format("unknown_type_%s", type));
}
columns.put(columnName, analysis);
}
// Add time column too
ColumnCapabilities timeCapabilities = storageAdapter.getColumnCapabilities(Column.TIME_COLUMN_NAME);
if (timeCapabilities == null) {
timeCapabilities = new ColumnCapabilitiesImpl().setType(ValueType.LONG).setHasMultipleValues(false);
}
columns.put(Column.TIME_COLUMN_NAME, analyzeNumericColumn(timeCapabilities, length, NUM_BYTES_IN_TIMESTAMP));
return columns;
}
use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.
the class SegmentAnalyzerTest method testIncrementalWorksHelper.
private void testIncrementalWorksHelper(EnumSet<SegmentMetadataQuery.AnalysisType> analyses) throws Exception {
final List<SegmentAnalysis> results = getSegmentAnalysises(new IncrementalIndexSegment(TestIndex.getIncrementalTestIndex(), null), analyses);
Assert.assertEquals(1, results.size());
final SegmentAnalysis analysis = results.get(0);
Assert.assertEquals(null, analysis.getId());
final Map<String, ColumnAnalysis> columns = analysis.getColumns();
Assert.assertEquals(TestIndex.COLUMNS.length, columns.size());
for (DimensionSchema schema : TestIndex.DIMENSION_SCHEMAS) {
final String dimension = schema.getName();
final ColumnAnalysis columnAnalysis = columns.get(dimension);
final boolean isString = schema.getValueType().name().equals(ValueType.STRING.name());
Assert.assertEquals(dimension, schema.getValueType().name(), columnAnalysis.getType());
Assert.assertEquals(dimension, 0, columnAnalysis.getSize());
if (isString) {
if (analyses == null) {
Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0);
} else {
Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue());
}
} else {
Assert.assertNull(dimension, columnAnalysis.getCardinality());
}
}
for (String metric : TestIndex.METRICS) {
final ColumnAnalysis columnAnalysis = columns.get(metric);
Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType());
Assert.assertEquals(metric, 0, columnAnalysis.getSize());
Assert.assertNull(metric, columnAnalysis.getCardinality());
}
}
use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.
the class SegmentMetadataQueryTest method testSegmentMetadataQueryWithDefaultAnalysisMerge2.
@Test
public void testSegmentMetadataQueryWithDefaultAnalysisMerge2() {
ColumnAnalysis analysis = new ColumnAnalysis(ValueType.STRING.toString(), false, (mmap1 ? 6882 : 6808) + (mmap2 ? 6882 : 6808), 3, "spot", "upfront", null);
testSegmentMetadataQueryWithDefaultAnalysisMerge("market", analysis);
}
use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.
the class SegmentMetadataQueryTest method testSegmentMetadataQueryWithTimestampSpecMerge.
@Test
public void testSegmentMetadataQueryWithTimestampSpecMerge() {
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(differentIds ? "merged" : "testSegment", null, ImmutableMap.of("placement", new ColumnAnalysis(ValueType.STRING.toString(), false, 0, 0, null, null, null)), 0, expectedSegmentAnalysis1.getNumRows() + expectedSegmentAnalysis2.getNumRows(), null, new TimestampSpec("ds", "auto", null), null, null);
QueryToolChest toolChest = FACTORY.getToolchest();
ExecutorService exec = Executors.newCachedThreadPool();
QueryRunner myRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(FACTORY.mergeRunners(MoreExecutors.sameThreadExecutor(), Lists.<QueryRunner<SegmentAnalysis>>newArrayList(toolChest.preMergeQueryDecoration(runner1), toolChest.preMergeQueryDecoration(runner2)))), toolChest);
TestHelper.assertExpectedObjects(ImmutableList.of(mergedSegmentAnalysis), myRunner.run(Druids.newSegmentMetadataQueryBuilder().dataSource("testing").intervals("2013/2014").toInclude(new ListColumnIncluderator(Arrays.asList("placement"))).analysisTypes(SegmentMetadataQuery.AnalysisType.TIMESTAMPSPEC).merge(true).build(), Maps.newHashMap()), "failed SegmentMetadata merging query");
exec.shutdownNow();
}
use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.
the class SegmentMetadataQueryTest method testSegmentMetadataQueryWithAggregatorsMerge.
@Test
public void testSegmentMetadataQueryWithAggregatorsMerge() {
final Map<String, AggregatorFactory> expectedAggregators = Maps.newHashMap();
for (AggregatorFactory agg : TestIndex.METRIC_AGGS) {
expectedAggregators.put(agg.getName(), agg.getCombiningFactory());
}
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(differentIds ? "merged" : "testSegment", null, ImmutableMap.of("placement", new ColumnAnalysis(ValueType.STRING.toString(), false, 0, 0, null, null, null)), 0, expectedSegmentAnalysis1.getNumRows() + expectedSegmentAnalysis2.getNumRows(), expectedAggregators, null, null, null);
QueryToolChest toolChest = FACTORY.getToolchest();
ExecutorService exec = Executors.newCachedThreadPool();
QueryRunner myRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(FACTORY.mergeRunners(MoreExecutors.sameThreadExecutor(), Lists.<QueryRunner<SegmentAnalysis>>newArrayList(toolChest.preMergeQueryDecoration(runner1), toolChest.preMergeQueryDecoration(runner2)))), toolChest);
TestHelper.assertExpectedObjects(ImmutableList.of(mergedSegmentAnalysis), myRunner.run(Druids.newSegmentMetadataQueryBuilder().dataSource("testing").intervals("2013/2014").toInclude(new ListColumnIncluderator(Arrays.asList("placement"))).analysisTypes(SegmentMetadataQuery.AnalysisType.AGGREGATORS).merge(true).build(), Maps.newHashMap()), "failed SegmentMetadata merging query");
exec.shutdownNow();
}
Aggregations