Search in sources :

Example 1 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class GenerateDataCommand method buildDataGeneratorSpec.

private DataGeneratorSpec buildDataGeneratorSpec(Schema schema, List<String> columns, HashMap<String, DataType> dataTypes, HashMap<String, FieldType> fieldTypes, HashMap<String, TimeUnit> timeUnits, HashMap<String, Integer> cardinality, HashMap<String, IntRange> range) {
    for (final FieldSpec fs : schema.getAllFieldSpecs()) {
        String col = fs.getName();
        columns.add(col);
        dataTypes.put(col, fs.getDataType());
        fieldTypes.put(col, fs.getFieldType());
        switch(fs.getFieldType()) {
            case DIMENSION:
                if (cardinality.get(col) == null) {
                    cardinality.put(col, 1000);
                }
                break;
            case METRIC:
                if (!range.containsKey(col)) {
                    range.put(col, new IntRange(1, 1000));
                }
                break;
            case TIME:
                if (!range.containsKey(col)) {
                    range.put(col, new IntRange(1, 1000));
                }
                TimeFieldSpec tfs = (TimeFieldSpec) fs;
                timeUnits.put(col, tfs.getIncomingGranularitySpec().getTimeType());
                break;
            default:
                throw new RuntimeException("Invalid field type.");
        }
    }
    return new DataGeneratorSpec(columns, cardinality, range, dataTypes, fieldTypes, timeUnits, FileFormat.AVRO, _outDir, _overwrite);
}
Also used : TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) IntRange(org.apache.commons.lang.math.IntRange) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DataGeneratorSpec(com.linkedin.pinot.tools.data.generator.DataGeneratorSpec)

Example 2 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class DataGenerator method buildSpec.

private FieldSpec buildSpec(DataGeneratorSpec genSpec, String column) {
    DataType dataType = genSpec.getDataTypesMap().get(column);
    FieldType fieldType = genSpec.getFieldTypesMap().get(column);
    FieldSpec spec;
    switch(fieldType) {
        case DIMENSION:
            spec = new DimensionFieldSpec();
            break;
        case METRIC:
            spec = new MetricFieldSpec();
            break;
        case TIME:
            spec = new TimeFieldSpec(column, dataType, genSpec.getTimeUnitMap().get(column));
            break;
        default:
            throw new RuntimeException("Invalid Field type.");
    }
    spec.setName(column);
    spec.setDataType(dataType);
    spec.setSingleValueField(true);
    return spec;
}
Also used : TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) DataType(com.linkedin.pinot.common.data.FieldSpec.DataType) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) FieldType(com.linkedin.pinot.common.data.FieldSpec.FieldType)

Example 3 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class ThirdEyeUtils method createSchema.

public static Schema createSchema(CollectionSchema collectionSchema) {
    Schema schema = new Schema();
    for (DimensionSpec dimensionSpec : collectionSchema.getDimensions()) {
        FieldSpec fieldSpec = new DimensionFieldSpec();
        String dimensionName = dimensionSpec.getName();
        fieldSpec.setName(dimensionName);
        fieldSpec.setDataType(DataType.STRING);
        fieldSpec.setSingleValueField(true);
        schema.addField(dimensionName, fieldSpec);
    }
    for (MetricSpec metricSpec : collectionSchema.getMetrics()) {
        FieldSpec fieldSpec = new MetricFieldSpec();
        String metricName = metricSpec.getName();
        fieldSpec.setName(metricName);
        fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
        fieldSpec.setSingleValueField(true);
        schema.addField(metricName, fieldSpec);
    }
    TimeSpec timeSpec = collectionSchema.getTime();
    String timeFormat = timeSpec.getFormat().equals("sinceEpoch") ? TimeFormat.EPOCH.toString() : TimeFormat.SIMPLE_DATE_FORMAT.toString() + ":" + timeSpec.getFormat();
    TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, timeSpec.getDataGranularity().getSize(), timeSpec.getDataGranularity().getUnit(), timeFormat, timeSpec.getColumnName());
    TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, timeSpec.getDataGranularity().getSize(), timeSpec.getDataGranularity().getUnit(), timeFormat, timeSpec.getColumnName());
    schema.addField(timeSpec.getColumnName(), new TimeFieldSpec(incoming, outgoing));
    schema.setSchemaName(collectionSchema.getCollection());
    return schema;
}
Also used : DimensionSpec(com.linkedin.thirdeye.api.DimensionSpec) TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) Schema(com.linkedin.pinot.common.data.Schema) CollectionSchema(com.linkedin.thirdeye.api.CollectionSchema) MetricSpec(com.linkedin.thirdeye.api.MetricSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeSpec(com.linkedin.thirdeye.api.TimeSpec)

Example 4 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class AutoAddInvertedIndex method runQueryStrategy.

private void runQueryStrategy() throws Exception {
    // Get all resources in cluster
    List<String> resourcesInCluster = _helixAdmin.getResourcesInCluster(_clusterName);
    for (String tableName : resourcesInCluster) {
        // Skip non-table resources
        if (!tableName.endsWith("_OFFLINE") && !tableName.endsWith("_REALTIME")) {
            continue;
        }
        // Skip tables that do not match the defined name pattern
        if (_tableNamePattern != null && !tableName.matches(_tableNamePattern)) {
            continue;
        }
        LOGGER.info("Table: {} matches the table name pattern: {}", tableName, _tableNamePattern);
        // Get the inverted index config
        AbstractTableConfig tableConfig = getTableConfig(tableName);
        IndexingConfig indexingConfig = tableConfig.getIndexingConfig();
        List<String> invertedIndexColumns = indexingConfig.getInvertedIndexColumns();
        boolean autoGeneratedInvertedIndex = indexingConfig.isAutoGeneratedInvertedIndex();
        // Handle auto-generated inverted index
        if (autoGeneratedInvertedIndex) {
            Preconditions.checkState(!invertedIndexColumns.isEmpty(), "Auto-generated inverted index list is empty");
            // NEW mode, skip
            if (_mode == Mode.NEW) {
                LOGGER.info("Table: {}, skip adding inverted index because it has auto-generated inverted index and under NEW mode", tableName);
                continue;
            }
            // REMOVE mode, remove the inverted index and update
            if (_mode == Mode.REMOVE) {
                invertedIndexColumns.clear();
                indexingConfig.setAutoGeneratedInvertedIndex(false);
                if (updateIndexConfig(tableName, tableConfig)) {
                    LOGGER.info("Table: {}, removed auto-generated inverted index", tableName);
                } else {
                    LOGGER.error("Table: {}, failed to remove auto-generated inverted index", tableName);
                }
                continue;
            }
            // REFRESH mode, remove auto-generated inverted index
            if (_mode == Mode.REFRESH) {
                invertedIndexColumns.clear();
            }
        } else {
            // Handle null inverted index columns
            if (invertedIndexColumns == null) {
                invertedIndexColumns = new ArrayList<>();
                indexingConfig.setInvertedIndexColumns(invertedIndexColumns);
            }
            // Remove empty strings
            int emptyStringIndex;
            while ((emptyStringIndex = invertedIndexColumns.indexOf("")) != -1) {
                invertedIndexColumns.remove(emptyStringIndex);
            }
            // Skip non-empty non-auto-generated inverted index
            if (!invertedIndexColumns.isEmpty()) {
                LOGGER.info("Table: {}, skip adding inverted index because it has non-auto-generated inverted index", tableName);
                continue;
            }
        }
        // Skip tables without a schema
        Schema tableSchema = getTableSchema(tableName);
        if (tableSchema == null) {
            LOGGER.info("Table: {}, skip adding inverted index because it does not have a schema", tableName);
            continue;
        }
        // Skip tables without dimensions
        List<String> dimensionNames = tableSchema.getDimensionNames();
        if (dimensionNames.size() == 0) {
            LOGGER.info("Table: {}, skip adding inverted index because it does not have any dimension column", tableName);
            continue;
        }
        // Skip tables without a proper time column
        TimeFieldSpec timeFieldSpec = tableSchema.getTimeFieldSpec();
        if (timeFieldSpec == null || timeFieldSpec.getDataType() == FieldSpec.DataType.STRING) {
            LOGGER.info("Table: {}, skip adding inverted index because it does not have a numeric time column", tableName);
            continue;
        }
        String timeColumnName = timeFieldSpec.getName();
        TimeUnit timeUnit = timeFieldSpec.getOutgoingGranularitySpec().getTimeType();
        if (timeUnit != TimeUnit.DAYS) {
            LOGGER.warn("Table: {}, time column {] has non-DAYS time unit: {}", timeColumnName, timeUnit);
        }
        // Only add inverted index to table larger than a threshold
        JSONObject queryResponse = sendQuery("SELECT COUNT(*) FROM " + tableName);
        long numTotalDocs = queryResponse.getLong("totalDocs");
        LOGGER.info("Table: {}, number of total documents: {}", tableName, numTotalDocs);
        if (numTotalDocs <= _tableSizeThreshold) {
            LOGGER.info("Table: {}, skip adding inverted index because the table is too small", tableName);
            continue;
        }
        // Get each dimension's cardinality on one timestamp's data
        queryResponse = sendQuery("SELECT Max(" + timeColumnName + ") FROM " + tableName);
        int maxTimeStamp = queryResponse.getJSONArray("aggregationResults").getJSONObject(0).getInt("value");
        LOGGER.info("Table: {}, max time column {}: {}", tableName, timeColumnName, maxTimeStamp);
        // Query DISTINCTCOUNT on all dimensions in one query might cause timeout, so query them separately
        List<ResultPair> resultPairs = new ArrayList<>();
        for (String dimensionName : dimensionNames) {
            String query = "SELECT DISTINCTCOUNT(" + dimensionName + ") FROM " + tableName + " WHERE " + timeColumnName + " = " + maxTimeStamp;
            queryResponse = sendQuery(query);
            JSONObject result = queryResponse.getJSONArray("aggregationResults").getJSONObject(0);
            resultPairs.add(new ResultPair(result.getString("function").substring("distinctCount_".length()), result.getLong("value")));
        }
        // Sort the dimensions based on their cardinalities
        Collections.sort(resultPairs);
        // Add the top dimensions into inverted index columns
        int numInvertedIndex = Math.min(_maxNumInvertedIndexAdded, resultPairs.size());
        for (int i = 0; i < numInvertedIndex; i++) {
            ResultPair resultPair = resultPairs.get(i);
            String columnName = resultPair._key;
            long cardinality = resultPair._value;
            if (cardinality > _cardinalityThreshold) {
                // Do not append inverted index if already exists
                if (!invertedIndexColumns.contains(columnName)) {
                    invertedIndexColumns.add(columnName);
                }
                LOGGER.info("Table: {}, add inverted index to column {} with cardinality: {}", tableName, columnName, cardinality);
            } else {
                LOGGER.info("Table: {}, skip adding inverted index to column {} with cardinality: {}", tableName, columnName, cardinality);
                break;
            }
        }
        // Update indexing config
        if (!invertedIndexColumns.isEmpty()) {
            indexingConfig.setAutoGeneratedInvertedIndex(true);
            if (updateIndexConfig(tableName, tableConfig)) {
                LOGGER.info("Table: {}, added inverted index to columns: {}", tableName, invertedIndexColumns);
            } else {
                LOGGER.error("Table: {}, failed to add inverted index to columns: {}", tableName, invertedIndexColumns);
            }
        } else {
            if (autoGeneratedInvertedIndex) {
                Preconditions.checkState(_mode == Mode.REFRESH);
                // Remove existing auto-generated inverted index because no column matches all the conditions
                indexingConfig.setAutoGeneratedInvertedIndex(false);
                if (updateIndexConfig(tableName, tableConfig)) {
                    LOGGER.info("Table: {}, removed auto-generated inverted index", tableName);
                } else {
                    LOGGER.error("Table: {}, failed to remove auto-generated inverted index", tableName);
                }
            }
        }
    }
}
Also used : IndexingConfig(com.linkedin.pinot.common.config.IndexingConfig) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) ArrayList(java.util.ArrayList) JSONObject(org.json.JSONObject) TimeUnit(java.util.concurrent.TimeUnit) AbstractTableConfig(com.linkedin.pinot.common.config.AbstractTableConfig)

Example 5 with TimeFieldSpec

use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.

the class PlainFieldExtractor method initTimeConverters.

private void initTimeConverters() {
    TimeFieldSpec timeFieldSpec = _schema.getTimeFieldSpec();
    if (timeFieldSpec != null) {
        TimeGranularitySpec incomingGranularitySpec = timeFieldSpec.getIncomingGranularitySpec();
        TimeGranularitySpec outgoingGranularitySpec = timeFieldSpec.getOutgoingGranularitySpec();
        _outgoingTimeColumnName = outgoingGranularitySpec.getName();
        if (!incomingGranularitySpec.equals(outgoingGranularitySpec)) {
            _incomingTimeColumnName = incomingGranularitySpec.getName();
            _timeConverter = TimeConverterProvider.getTimeConverter(incomingGranularitySpec, outgoingGranularitySpec);
        }
    }
}
Also used : TimeGranularitySpec(com.linkedin.pinot.common.data.TimeGranularitySpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec)

Aggregations

TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)17 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)12 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)12 Schema (com.linkedin.pinot.common.data.Schema)11 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)10 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)7 File (java.io.File)4 FieldType (com.linkedin.pinot.common.data.FieldSpec.FieldType)3 JSONObject (org.json.JSONObject)3 DataType (com.linkedin.pinot.common.data.FieldSpec.DataType)2 GenericRow (com.linkedin.pinot.core.data.GenericRow)2 HashMap (java.util.HashMap)2 TimeUnit (java.util.concurrent.TimeUnit)2 Field (org.apache.avro.Schema.Field)2 Test (org.testng.annotations.Test)2 AbstractTableConfig (com.linkedin.pinot.common.config.AbstractTableConfig)1 IndexingConfig (com.linkedin.pinot.common.config.IndexingConfig)1 StarTreeIndexSpec (com.linkedin.pinot.common.data.StarTreeIndexSpec)1 CSVRecordReaderConfig (com.linkedin.pinot.core.data.readers.CSVRecordReaderConfig)1 ColumnMetadata (com.linkedin.pinot.core.segment.index.ColumnMetadata)1