use of org.apache.druid.data.input.impl.ParseSpec in project druid by druid-io.
the class DataSchema method getParser.
@Nullable
public InputRowParser getParser() {
if (inputRowParser == null) {
if (parserMap == null) {
return null;
}
// noinspection unchecked
inputRowParser = transformSpec.decorate(objectMapper.convertValue(this.parserMap, InputRowParser.class));
ParseSpec parseSpec = inputRowParser.getParseSpec();
parseSpec = parseSpec.withDimensionsSpec(computeDimensionsSpec(parseSpec.getTimestampSpec(), parseSpec.getDimensionsSpec(), aggregators));
if (timestampSpec != null) {
parseSpec = parseSpec.withTimestampSpec(timestampSpec);
}
if (dimensionsSpec != null) {
parseSpec = parseSpec.withDimensionsSpec(dimensionsSpec);
}
inputRowParser = inputRowParser.withParseSpec(parseSpec);
}
return inputRowParser;
}
use of org.apache.druid.data.input.impl.ParseSpec in project druid by druid-io.
the class DruidParquetAvroReadSupport method getPartialReadSchema.
/**
* Select the columns from the parquet schema that are used in the schema of the ingestion job
*
* @param context The context of the file to be read
*
* @return the partial schema that only contains the columns that are being used in the schema
*/
private MessageType getPartialReadSchema(InitContext context) {
MessageType fullSchema = context.getFileSchema();
String name = fullSchema.getName();
HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
ParseSpec parseSpec = config.getParser().getParseSpec();
if (parseSpec instanceof AvroParseSpec) {
if (((AvroParseSpec) parseSpec).getFlattenSpec() != null) {
return fullSchema;
}
}
String tsField = config.getParser().getParseSpec().getTimestampSpec().getTimestampColumn();
List<DimensionSchema> dimensionSchema = config.getParser().getParseSpec().getDimensionsSpec().getDimensions();
Set<String> dimensions = new HashSet<>();
for (DimensionSchema dim : dimensionSchema) {
dimensions.add(dim.getName());
}
Set<String> metricsFields = new HashSet<>();
for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) {
metricsFields.addAll(agg.requiredFields());
}
List<Type> partialFields = new ArrayList<>();
for (Type type : fullSchema.getFields()) {
if (tsField.equals(type.getName()) || metricsFields.contains(type.getName()) || dimensions.size() > 0 && dimensions.contains(type.getName()) || dimensions.size() == 0) {
partialFields.add(type);
}
}
return new MessageType(name, partialFields);
}
use of org.apache.druid.data.input.impl.ParseSpec in project druid by druid-io.
the class DruidParquetReadSupport method getPartialReadSchema.
/**
* Select the columns from the parquet schema that are used in the schema of the ingestion job
*
* @param context The context of the file to be read
*
* @return the partial schema that only contains the columns that are being used in the schema
*/
private MessageType getPartialReadSchema(InitContext context) {
MessageType fullSchema = context.getFileSchema();
String name = fullSchema.getName();
HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
ParseSpec parseSpec = config.getParser().getParseSpec();
// parse the flatten spec and determine it isn't auto discovering props?
if (parseSpec instanceof ParquetParseSpec) {
if (((ParquetParseSpec) parseSpec).getFlattenSpec() != null) {
return fullSchema;
}
}
String tsField = parseSpec.getTimestampSpec().getTimestampColumn();
List<DimensionSchema> dimensionSchema = parseSpec.getDimensionsSpec().getDimensions();
Set<String> dimensions = new HashSet<>();
for (DimensionSchema dim : dimensionSchema) {
dimensions.add(dim.getName());
}
Set<String> metricsFields = new HashSet<>();
for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) {
metricsFields.addAll(agg.requiredFields());
}
List<Type> partialFields = new ArrayList<>();
for (Type type : fullSchema.getFields()) {
if (tsField.equals(type.getName()) || metricsFields.contains(type.getName()) || dimensions.size() > 0 && dimensions.contains(type.getName()) || dimensions.size() == 0) {
partialFields.add(type);
}
}
return new MessageType(name, partialFields);
}
Aggregations