use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.
the class SegmentMetadataQueryTest method testSegmentMetadataQueryWithComplexColumnMerge.
@Test
public void testSegmentMetadataQueryWithComplexColumnMerge() {
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(differentIds ? "merged" : "testSegment", null, ImmutableMap.of("placement", new ColumnAnalysis(ValueType.STRING.toString(), false, 0, 1, null, null, null), "quality_uniques", new ColumnAnalysis("hyperUnique", false, 0, null, null, null, null)), 0, expectedSegmentAnalysis1.getNumRows() + expectedSegmentAnalysis2.getNumRows(), null, null, null, null);
QueryToolChest toolChest = FACTORY.getToolchest();
ExecutorService exec = Executors.newCachedThreadPool();
QueryRunner myRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(FACTORY.mergeRunners(MoreExecutors.sameThreadExecutor(), Lists.<QueryRunner<SegmentAnalysis>>newArrayList(toolChest.preMergeQueryDecoration(runner1), toolChest.preMergeQueryDecoration(runner2)))), toolChest);
TestHelper.assertExpectedObjects(ImmutableList.of(mergedSegmentAnalysis), myRunner.run(Druids.newSegmentMetadataQueryBuilder().dataSource("testing").intervals("2013/2014").toInclude(new ListColumnIncluderator(Arrays.asList("placement", "quality_uniques"))).analysisTypes(SegmentMetadataQuery.AnalysisType.CARDINALITY).merge(true).build(), Maps.newHashMap()), "failed SegmentMetadata merging query");
exec.shutdownNow();
}
use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.
the class SegmentMetadataQueryTest method testSegmentMetadataQueryWithHasMultipleValuesMerge.
@Test
public void testSegmentMetadataQueryWithHasMultipleValuesMerge() {
SegmentAnalysis mergedSegmentAnalysis = new SegmentAnalysis(differentIds ? "merged" : "testSegment", null, ImmutableMap.of("placement", new ColumnAnalysis(ValueType.STRING.toString(), false, 0, 1, null, null, null), "placementish", new ColumnAnalysis(ValueType.STRING.toString(), true, 0, 9, null, null, null)), 0, expectedSegmentAnalysis1.getNumRows() + expectedSegmentAnalysis2.getNumRows(), null, null, null, null);
QueryToolChest toolChest = FACTORY.getToolchest();
ExecutorService exec = Executors.newCachedThreadPool();
QueryRunner myRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(FACTORY.mergeRunners(MoreExecutors.sameThreadExecutor(), Lists.<QueryRunner<SegmentAnalysis>>newArrayList(toolChest.preMergeQueryDecoration(runner1), toolChest.preMergeQueryDecoration(runner2)))), toolChest);
TestHelper.assertExpectedObjects(ImmutableList.of(mergedSegmentAnalysis), myRunner.run(Druids.newSegmentMetadataQueryBuilder().dataSource("testing").intervals("2013/2014").toInclude(new ListColumnIncluderator(Arrays.asList("placement", "placementish"))).analysisTypes(SegmentMetadataQuery.AnalysisType.CARDINALITY).merge(true).build(), Maps.newHashMap()), "failed SegmentMetadata merging query");
exec.shutdownNow();
}
use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.
the class DruidSchema method computeTable.
private DruidTable computeTable(final String dataSource) {
final SegmentMetadataQuery segmentMetadataQuery = new SegmentMetadataQuery(new TableDataSource(dataSource), null, null, false, ImmutableMap.<String, Object>of("useCache", false, "populateCache", false), EnumSet.of(SegmentMetadataQuery.AnalysisType.INTERVAL), null, true);
final Sequence<SegmentAnalysis> sequence = segmentMetadataQuery.run(walker, Maps.<String, Object>newHashMap());
final List<SegmentAnalysis> results = Sequences.toList(sequence, Lists.<SegmentAnalysis>newArrayList());
if (results.isEmpty()) {
return null;
}
final Map<String, ValueType> columnTypes = Maps.newLinkedHashMap();
// Resolve conflicts by taking the latest metadata. This aids in gradual schema evolution.
long maxTimestamp = JodaUtils.MIN_INSTANT;
for (SegmentAnalysis analysis : results) {
final long timestamp;
if (analysis.getIntervals() != null && analysis.getIntervals().size() > 0) {
timestamp = analysis.getIntervals().get(analysis.getIntervals().size() - 1).getEndMillis();
} else {
timestamp = JodaUtils.MIN_INSTANT;
}
for (Map.Entry<String, ColumnAnalysis> entry : analysis.getColumns().entrySet()) {
if (entry.getValue().isError()) {
// Skip columns with analysis errors.
continue;
}
if (!columnTypes.containsKey(entry.getKey()) || timestamp >= maxTimestamp) {
ValueType valueType;
try {
valueType = ValueType.valueOf(entry.getValue().getType().toUpperCase());
} catch (IllegalArgumentException e) {
// Assume unrecognized types are some flavor of COMPLEX. This throws away information about exactly
// what kind of complex column it is, which we may want to preserve some day.
valueType = ValueType.COMPLEX;
}
columnTypes.put(entry.getKey(), valueType);
maxTimestamp = timestamp;
}
}
}
final RowSignature.Builder rowSignature = RowSignature.builder();
for (Map.Entry<String, ValueType> entry : columnTypes.entrySet()) {
rowSignature.add(entry.getKey(), entry.getValue());
}
return new DruidTable(new TableDataSource(dataSource), rowSignature.build());
}
use of io.druid.query.metadata.metadata.ColumnAnalysis in project hive by apache.
the class DruidSerDe method initFromMetaDataQuery.
private void initFromMetaDataQuery(final Configuration configuration, final Properties properties) throws SerDeException {
final List<String> columnNames = new ArrayList<>();
final List<PrimitiveTypeInfo> columnTypes = new ArrayList<>();
final List<ObjectInspector> inspectors = new ArrayList<>();
String dataSource = properties.getProperty(Constants.DRUID_DATA_SOURCE);
if (dataSource == null) {
throw new SerDeException("Druid data source not specified; use " + Constants.DRUID_DATA_SOURCE + " in table properties");
}
SegmentMetadataQueryBuilder builder = new Druids.SegmentMetadataQueryBuilder();
builder.dataSource(dataSource);
builder.merge(true);
builder.analysisTypes();
SegmentMetadataQuery query = builder.build();
// Execute query in Druid
String address = HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
if (org.apache.commons.lang3.StringUtils.isEmpty(address)) {
throw new SerDeException("Druid broker address not specified in configuration");
}
// Infer schema
SegmentAnalysis schemaInfo;
try {
schemaInfo = submitMetadataRequest(address, query);
} catch (IOException e) {
throw new SerDeException(e);
}
for (Entry<String, ColumnAnalysis> columnInfo : schemaInfo.getColumns().entrySet()) {
if (columnInfo.getKey().equals(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN)) {
// Special handling for timestamp column
// field name
columnNames.add(columnInfo.getKey());
// field type
PrimitiveTypeInfo type = tsTZTypeInfo;
columnTypes.add(type);
inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type));
continue;
}
// field name
columnNames.add(columnInfo.getKey());
PrimitiveTypeInfo type = DruidSerDeUtils.convertDruidToHiveType(// field type
columnInfo.getValue().getType());
columnTypes.add(type instanceof TimestampLocalTZTypeInfo ? tsTZTypeInfo : type);
inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(type));
}
columns = columnNames.toArray(new String[columnNames.size()]);
types = columnTypes.toArray(new PrimitiveTypeInfo[columnTypes.size()]);
inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
use of io.druid.query.metadata.metadata.ColumnAnalysis in project druid by druid-io.
the class SegmentAnalyzer method analyzeStringColumn.
private ColumnAnalysis analyzeStringColumn(final ColumnCapabilities capabilities, final StorageAdapter storageAdapter, final String columnName) {
int cardinality = 0;
long size = 0;
Comparable min = null;
Comparable max = null;
if (analyzingCardinality()) {
cardinality = storageAdapter.getDimensionCardinality(columnName);
}
if (analyzingSize()) {
final long start = storageAdapter.getMinTime().getMillis();
final long end = storageAdapter.getMaxTime().getMillis();
final Sequence<Cursor> cursors = storageAdapter.makeCursors(null, new Interval(start, end), VirtualColumns.EMPTY, Granularities.ALL, false);
size = cursors.accumulate(0L, new Accumulator<Long, Cursor>() {
@Override
public Long accumulate(Long accumulated, Cursor cursor) {
DimensionSelector selector = cursor.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName));
if (selector == null) {
return accumulated;
}
long current = accumulated;
while (!cursor.isDone()) {
final IndexedInts vals = selector.getRow();
for (int i = 0; i < vals.size(); ++i) {
final String dimVal = selector.lookupName(vals.get(i));
if (dimVal != null && !dimVal.isEmpty()) {
current += StringUtils.estimatedBinaryLengthAsUTF8(dimVal);
}
}
cursor.advance();
}
return current;
}
});
}
if (analyzingMinMax()) {
min = storageAdapter.getMinValue(columnName);
max = storageAdapter.getMaxValue(columnName);
}
return new ColumnAnalysis(capabilities.getType().name(), capabilities.hasMultipleValues(), size, cardinality, min, max, null);
}
Aggregations