use of org.apache.hadoop.hive.druid.json.KafkaSupervisorSpec in project hive by apache.
the class DruidStorageHandler method updateKafkaIngestion.
private void updateKafkaIngestion(Table table) {
final String overlordAddress = HiveConf.getVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_OVERLORD_DEFAULT_ADDRESS);
final String dataSourceName = Preconditions.checkNotNull(DruidStorageHandlerUtils.getTableProperty(table, Constants.DRUID_DATA_SOURCE), "Druid datasource name is null");
final String kafkaTopic = Preconditions.checkNotNull(DruidStorageHandlerUtils.getTableProperty(table, DruidConstants.KAFKA_TOPIC), "kafka topic is null");
final String kafkaServers = Preconditions.checkNotNull(DruidStorageHandlerUtils.getTableProperty(table, DruidConstants.KAFKA_BOOTSTRAP_SERVERS), "kafka connect string is null");
Properties tableProperties = new Properties();
tableProperties.putAll(table.getParameters());
final GranularitySpec granularitySpec = DruidStorageHandlerUtils.getGranularitySpec(getConf(), tableProperties);
List<FieldSchema> columns = table.getSd().getCols();
List<String> columnNames = new ArrayList<>(columns.size());
List<TypeInfo> columnTypes = new ArrayList<>(columns.size());
for (FieldSchema schema : columns) {
columnNames.add(schema.getName());
columnTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(schema.getType()));
}
Pair<List<DimensionSchema>, AggregatorFactory[]> dimensionsAndAggregates = DruidStorageHandlerUtils.getDimensionsAndAggregates(columnNames, columnTypes);
if (!columnNames.contains(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) {
throw new IllegalStateException("Timestamp column (' " + DruidConstants.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + columnNames);
}
DimensionsSpec dimensionsSpec = new DimensionsSpec(dimensionsAndAggregates.lhs, null, null);
String timestampFormat = DruidStorageHandlerUtils.getTableProperty(table, DruidConstants.DRUID_TIMESTAMP_FORMAT);
String timestampColumnName = DruidStorageHandlerUtils.getTableProperty(table, DruidConstants.DRUID_TIMESTAMP_COLUMN);
if (timestampColumnName == null) {
timestampColumnName = DruidConstants.DEFAULT_TIMESTAMP_COLUMN;
}
final TimestampSpec timestampSpec = new TimestampSpec(timestampColumnName, timestampFormat, null);
final InputRowParser inputRowParser = DruidKafkaUtils.getInputRowParser(table, timestampSpec, dimensionsSpec);
final Map<String, Object> inputParser = JSON_MAPPER.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
});
final DataSchema dataSchema = new DataSchema(dataSourceName, inputParser, dimensionsAndAggregates.rhs, granularitySpec, null, DruidStorageHandlerUtils.JSON_MAPPER);
IndexSpec indexSpec = DruidStorageHandlerUtils.getIndexSpec(getConf());
KafkaSupervisorSpec spec = DruidKafkaUtils.createKafkaSupervisorSpec(table, kafkaTopic, kafkaServers, dataSchema, indexSpec);
// Fetch existing Ingestion Spec from Druid, if any
KafkaSupervisorSpec existingSpec = fetchKafkaIngestionSpec(table);
String targetState = DruidStorageHandlerUtils.getTableProperty(table, DruidConstants.DRUID_KAFKA_INGESTION);
if (targetState == null) {
// Case when user has not specified any ingestion state in the current command
// if there is a kafka supervisor running then keep it last known state is START otherwise STOP.
targetState = existingSpec == null ? "STOP" : "START";
}
if ("STOP".equalsIgnoreCase(targetState)) {
if (existingSpec != null) {
stopKafkaIngestion(overlordAddress, dataSourceName);
}
} else if ("START".equalsIgnoreCase(targetState)) {
if (existingSpec == null || !existingSpec.equals(spec)) {
DruidKafkaUtils.updateKafkaIngestionSpec(overlordAddress, spec);
}
} else if ("RESET".equalsIgnoreCase(targetState)) {
// Case when there are changes in multiple table properties.
if (existingSpec != null && !existingSpec.equals(spec)) {
DruidKafkaUtils.updateKafkaIngestionSpec(overlordAddress, spec);
}
resetKafkaIngestion(overlordAddress, dataSourceName);
} else {
throw new IllegalArgumentException(String.format("Invalid value for property [%s], Valid values are [START, STOP, RESET]", DruidConstants.DRUID_KAFKA_INGESTION));
}
// We do not want to keep state in two separate places so remove from hive table properties.
table.getParameters().remove(DruidConstants.DRUID_KAFKA_INGESTION);
}
Aggregations