Search in sources :

Example 1 with KafkaSupervisorSpec

use of org.apache.hadoop.hive.druid.json.KafkaSupervisorSpec in project hive by apache.

the class DruidStorageHandler method updateKafkaIngestion.

private void updateKafkaIngestion(Table table) {
    final String overlordAddress = HiveConf.getVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_OVERLORD_DEFAULT_ADDRESS);
    final String dataSourceName = Preconditions.checkNotNull(DruidStorageHandlerUtils.getTableProperty(table, Constants.DRUID_DATA_SOURCE), "Druid datasource name is null");
    final String kafkaTopic = Preconditions.checkNotNull(DruidStorageHandlerUtils.getTableProperty(table, DruidConstants.KAFKA_TOPIC), "kafka topic is null");
    final String kafkaServers = Preconditions.checkNotNull(DruidStorageHandlerUtils.getTableProperty(table, DruidConstants.KAFKA_BOOTSTRAP_SERVERS), "kafka connect string is null");
    Properties tableProperties = new Properties();
    tableProperties.putAll(table.getParameters());
    final GranularitySpec granularitySpec = DruidStorageHandlerUtils.getGranularitySpec(getConf(), tableProperties);
    List<FieldSchema> columns = table.getSd().getCols();
    List<String> columnNames = new ArrayList<>(columns.size());
    List<TypeInfo> columnTypes = new ArrayList<>(columns.size());
    for (FieldSchema schema : columns) {
        columnNames.add(schema.getName());
        columnTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(schema.getType()));
    }
    Pair<List<DimensionSchema>, AggregatorFactory[]> dimensionsAndAggregates = DruidStorageHandlerUtils.getDimensionsAndAggregates(columnNames, columnTypes);
    if (!columnNames.contains(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) {
        throw new IllegalStateException("Timestamp column (' " + DruidConstants.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + columnNames);
    }
    DimensionsSpec dimensionsSpec = new DimensionsSpec(dimensionsAndAggregates.lhs, null, null);
    String timestampFormat = DruidStorageHandlerUtils.getTableProperty(table, DruidConstants.DRUID_TIMESTAMP_FORMAT);
    String timestampColumnName = DruidStorageHandlerUtils.getTableProperty(table, DruidConstants.DRUID_TIMESTAMP_COLUMN);
    if (timestampColumnName == null) {
        timestampColumnName = DruidConstants.DEFAULT_TIMESTAMP_COLUMN;
    }
    final TimestampSpec timestampSpec = new TimestampSpec(timestampColumnName, timestampFormat, null);
    final InputRowParser inputRowParser = DruidKafkaUtils.getInputRowParser(table, timestampSpec, dimensionsSpec);
    final Map<String, Object> inputParser = JSON_MAPPER.convertValue(inputRowParser, new TypeReference<Map<String, Object>>() {
    });
    final DataSchema dataSchema = new DataSchema(dataSourceName, inputParser, dimensionsAndAggregates.rhs, granularitySpec, null, DruidStorageHandlerUtils.JSON_MAPPER);
    IndexSpec indexSpec = DruidStorageHandlerUtils.getIndexSpec(getConf());
    KafkaSupervisorSpec spec = DruidKafkaUtils.createKafkaSupervisorSpec(table, kafkaTopic, kafkaServers, dataSchema, indexSpec);
    // Fetch existing Ingestion Spec from Druid, if any
    KafkaSupervisorSpec existingSpec = fetchKafkaIngestionSpec(table);
    String targetState = DruidStorageHandlerUtils.getTableProperty(table, DruidConstants.DRUID_KAFKA_INGESTION);
    if (targetState == null) {
        // Case when user has not specified any ingestion state in the current command
        // if there is a kafka supervisor running then keep it last known state is START otherwise STOP.
        targetState = existingSpec == null ? "STOP" : "START";
    }
    if ("STOP".equalsIgnoreCase(targetState)) {
        if (existingSpec != null) {
            stopKafkaIngestion(overlordAddress, dataSourceName);
        }
    } else if ("START".equalsIgnoreCase(targetState)) {
        if (existingSpec == null || !existingSpec.equals(spec)) {
            DruidKafkaUtils.updateKafkaIngestionSpec(overlordAddress, spec);
        }
    } else if ("RESET".equalsIgnoreCase(targetState)) {
        // Case when there are changes in multiple table properties.
        if (existingSpec != null && !existingSpec.equals(spec)) {
            DruidKafkaUtils.updateKafkaIngestionSpec(overlordAddress, spec);
        }
        resetKafkaIngestion(overlordAddress, dataSourceName);
    } else {
        throw new IllegalArgumentException(String.format("Invalid value for property [%s], Valid values are [START, STOP, RESET]", DruidConstants.DRUID_KAFKA_INGESTION));
    }
    // We do not want to keep state in two separate places so remove from hive table properties.
    table.getParameters().remove(DruidConstants.DRUID_KAFKA_INGESTION);
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) DataSchema(org.apache.druid.segment.indexing.DataSchema) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) Map(java.util.Map) KafkaSupervisorSpec(org.apache.hadoop.hive.druid.json.KafkaSupervisorSpec)

Aggregations

ImmutableList (com.google.common.collect.ImmutableList)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Map (java.util.Map)1 Properties (java.util.Properties)1 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)1 InputRowParser (org.apache.druid.data.input.impl.InputRowParser)1 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)1 IndexSpec (org.apache.druid.segment.IndexSpec)1 DataSchema (org.apache.druid.segment.indexing.DataSchema)1 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)1 KafkaSupervisorSpec (org.apache.hadoop.hive.druid.json.KafkaSupervisorSpec)1 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)1 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)1