use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.
the class GenerateDataCommand method buildDataGeneratorSpec.
private DataGeneratorSpec buildDataGeneratorSpec(Schema schema, List<String> columns, HashMap<String, DataType> dataTypes, HashMap<String, FieldType> fieldTypes, HashMap<String, TimeUnit> timeUnits, HashMap<String, Integer> cardinality, HashMap<String, IntRange> range) {
for (final FieldSpec fs : schema.getAllFieldSpecs()) {
String col = fs.getName();
columns.add(col);
dataTypes.put(col, fs.getDataType());
fieldTypes.put(col, fs.getFieldType());
switch(fs.getFieldType()) {
case DIMENSION:
if (cardinality.get(col) == null) {
cardinality.put(col, 1000);
}
break;
case METRIC:
if (!range.containsKey(col)) {
range.put(col, new IntRange(1, 1000));
}
break;
case TIME:
if (!range.containsKey(col)) {
range.put(col, new IntRange(1, 1000));
}
TimeFieldSpec tfs = (TimeFieldSpec) fs;
timeUnits.put(col, tfs.getIncomingGranularitySpec().getTimeType());
break;
default:
throw new RuntimeException("Invalid field type.");
}
}
return new DataGeneratorSpec(columns, cardinality, range, dataTypes, fieldTypes, timeUnits, FileFormat.AVRO, _outDir, _overwrite);
}
use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.
the class DataGenerator method buildSpec.
private FieldSpec buildSpec(DataGeneratorSpec genSpec, String column) {
DataType dataType = genSpec.getDataTypesMap().get(column);
FieldType fieldType = genSpec.getFieldTypesMap().get(column);
FieldSpec spec;
switch(fieldType) {
case DIMENSION:
spec = new DimensionFieldSpec();
break;
case METRIC:
spec = new MetricFieldSpec();
break;
case TIME:
spec = new TimeFieldSpec(column, dataType, genSpec.getTimeUnitMap().get(column));
break;
default:
throw new RuntimeException("Invalid Field type.");
}
spec.setName(column);
spec.setDataType(dataType);
spec.setSingleValueField(true);
return spec;
}
use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.
the class ThirdEyeUtils method createSchema.
public static Schema createSchema(CollectionSchema collectionSchema) {
Schema schema = new Schema();
for (DimensionSpec dimensionSpec : collectionSchema.getDimensions()) {
FieldSpec fieldSpec = new DimensionFieldSpec();
String dimensionName = dimensionSpec.getName();
fieldSpec.setName(dimensionName);
fieldSpec.setDataType(DataType.STRING);
fieldSpec.setSingleValueField(true);
schema.addField(dimensionName, fieldSpec);
}
for (MetricSpec metricSpec : collectionSchema.getMetrics()) {
FieldSpec fieldSpec = new MetricFieldSpec();
String metricName = metricSpec.getName();
fieldSpec.setName(metricName);
fieldSpec.setDataType(DataType.valueOf(metricSpec.getType().toString()));
fieldSpec.setSingleValueField(true);
schema.addField(metricName, fieldSpec);
}
TimeSpec timeSpec = collectionSchema.getTime();
String timeFormat = timeSpec.getFormat().equals("sinceEpoch") ? TimeFormat.EPOCH.toString() : TimeFormat.SIMPLE_DATE_FORMAT.toString() + ":" + timeSpec.getFormat();
TimeGranularitySpec incoming = new TimeGranularitySpec(DataType.LONG, timeSpec.getDataGranularity().getSize(), timeSpec.getDataGranularity().getUnit(), timeFormat, timeSpec.getColumnName());
TimeGranularitySpec outgoing = new TimeGranularitySpec(DataType.LONG, timeSpec.getDataGranularity().getSize(), timeSpec.getDataGranularity().getUnit(), timeFormat, timeSpec.getColumnName());
schema.addField(timeSpec.getColumnName(), new TimeFieldSpec(incoming, outgoing));
schema.setSchemaName(collectionSchema.getCollection());
return schema;
}
use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.
the class AutoAddInvertedIndex method runQueryStrategy.
private void runQueryStrategy() throws Exception {
// Get all resources in cluster
List<String> resourcesInCluster = _helixAdmin.getResourcesInCluster(_clusterName);
for (String tableName : resourcesInCluster) {
// Skip non-table resources
if (!tableName.endsWith("_OFFLINE") && !tableName.endsWith("_REALTIME")) {
continue;
}
// Skip tables that do not match the defined name pattern
if (_tableNamePattern != null && !tableName.matches(_tableNamePattern)) {
continue;
}
LOGGER.info("Table: {} matches the table name pattern: {}", tableName, _tableNamePattern);
// Get the inverted index config
AbstractTableConfig tableConfig = getTableConfig(tableName);
IndexingConfig indexingConfig = tableConfig.getIndexingConfig();
List<String> invertedIndexColumns = indexingConfig.getInvertedIndexColumns();
boolean autoGeneratedInvertedIndex = indexingConfig.isAutoGeneratedInvertedIndex();
// Handle auto-generated inverted index
if (autoGeneratedInvertedIndex) {
Preconditions.checkState(!invertedIndexColumns.isEmpty(), "Auto-generated inverted index list is empty");
// NEW mode, skip
if (_mode == Mode.NEW) {
LOGGER.info("Table: {}, skip adding inverted index because it has auto-generated inverted index and under NEW mode", tableName);
continue;
}
// REMOVE mode, remove the inverted index and update
if (_mode == Mode.REMOVE) {
invertedIndexColumns.clear();
indexingConfig.setAutoGeneratedInvertedIndex(false);
if (updateIndexConfig(tableName, tableConfig)) {
LOGGER.info("Table: {}, removed auto-generated inverted index", tableName);
} else {
LOGGER.error("Table: {}, failed to remove auto-generated inverted index", tableName);
}
continue;
}
// REFRESH mode, remove auto-generated inverted index
if (_mode == Mode.REFRESH) {
invertedIndexColumns.clear();
}
} else {
// Handle null inverted index columns
if (invertedIndexColumns == null) {
invertedIndexColumns = new ArrayList<>();
indexingConfig.setInvertedIndexColumns(invertedIndexColumns);
}
// Remove empty strings
int emptyStringIndex;
while ((emptyStringIndex = invertedIndexColumns.indexOf("")) != -1) {
invertedIndexColumns.remove(emptyStringIndex);
}
// Skip non-empty non-auto-generated inverted index
if (!invertedIndexColumns.isEmpty()) {
LOGGER.info("Table: {}, skip adding inverted index because it has non-auto-generated inverted index", tableName);
continue;
}
}
// Skip tables without a schema
Schema tableSchema = getTableSchema(tableName);
if (tableSchema == null) {
LOGGER.info("Table: {}, skip adding inverted index because it does not have a schema", tableName);
continue;
}
// Skip tables without dimensions
List<String> dimensionNames = tableSchema.getDimensionNames();
if (dimensionNames.size() == 0) {
LOGGER.info("Table: {}, skip adding inverted index because it does not have any dimension column", tableName);
continue;
}
// Skip tables without a proper time column
TimeFieldSpec timeFieldSpec = tableSchema.getTimeFieldSpec();
if (timeFieldSpec == null || timeFieldSpec.getDataType() == FieldSpec.DataType.STRING) {
LOGGER.info("Table: {}, skip adding inverted index because it does not have a numeric time column", tableName);
continue;
}
String timeColumnName = timeFieldSpec.getName();
TimeUnit timeUnit = timeFieldSpec.getOutgoingGranularitySpec().getTimeType();
if (timeUnit != TimeUnit.DAYS) {
LOGGER.warn("Table: {}, time column {] has non-DAYS time unit: {}", timeColumnName, timeUnit);
}
// Only add inverted index to table larger than a threshold
JSONObject queryResponse = sendQuery("SELECT COUNT(*) FROM " + tableName);
long numTotalDocs = queryResponse.getLong("totalDocs");
LOGGER.info("Table: {}, number of total documents: {}", tableName, numTotalDocs);
if (numTotalDocs <= _tableSizeThreshold) {
LOGGER.info("Table: {}, skip adding inverted index because the table is too small", tableName);
continue;
}
// Get each dimension's cardinality on one timestamp's data
queryResponse = sendQuery("SELECT Max(" + timeColumnName + ") FROM " + tableName);
int maxTimeStamp = queryResponse.getJSONArray("aggregationResults").getJSONObject(0).getInt("value");
LOGGER.info("Table: {}, max time column {}: {}", tableName, timeColumnName, maxTimeStamp);
// Query DISTINCTCOUNT on all dimensions in one query might cause timeout, so query them separately
List<ResultPair> resultPairs = new ArrayList<>();
for (String dimensionName : dimensionNames) {
String query = "SELECT DISTINCTCOUNT(" + dimensionName + ") FROM " + tableName + " WHERE " + timeColumnName + " = " + maxTimeStamp;
queryResponse = sendQuery(query);
JSONObject result = queryResponse.getJSONArray("aggregationResults").getJSONObject(0);
resultPairs.add(new ResultPair(result.getString("function").substring("distinctCount_".length()), result.getLong("value")));
}
// Sort the dimensions based on their cardinalities
Collections.sort(resultPairs);
// Add the top dimensions into inverted index columns
int numInvertedIndex = Math.min(_maxNumInvertedIndexAdded, resultPairs.size());
for (int i = 0; i < numInvertedIndex; i++) {
ResultPair resultPair = resultPairs.get(i);
String columnName = resultPair._key;
long cardinality = resultPair._value;
if (cardinality > _cardinalityThreshold) {
// Do not append inverted index if already exists
if (!invertedIndexColumns.contains(columnName)) {
invertedIndexColumns.add(columnName);
}
LOGGER.info("Table: {}, add inverted index to column {} with cardinality: {}", tableName, columnName, cardinality);
} else {
LOGGER.info("Table: {}, skip adding inverted index to column {} with cardinality: {}", tableName, columnName, cardinality);
break;
}
}
// Update indexing config
if (!invertedIndexColumns.isEmpty()) {
indexingConfig.setAutoGeneratedInvertedIndex(true);
if (updateIndexConfig(tableName, tableConfig)) {
LOGGER.info("Table: {}, added inverted index to columns: {}", tableName, invertedIndexColumns);
} else {
LOGGER.error("Table: {}, failed to add inverted index to columns: {}", tableName, invertedIndexColumns);
}
} else {
if (autoGeneratedInvertedIndex) {
Preconditions.checkState(_mode == Mode.REFRESH);
// Remove existing auto-generated inverted index because no column matches all the conditions
indexingConfig.setAutoGeneratedInvertedIndex(false);
if (updateIndexConfig(tableName, tableConfig)) {
LOGGER.info("Table: {}, removed auto-generated inverted index", tableName);
} else {
LOGGER.error("Table: {}, failed to remove auto-generated inverted index", tableName);
}
}
}
}
}
use of com.linkedin.pinot.common.data.TimeFieldSpec in project pinot by linkedin.
the class PlainFieldExtractor method initTimeConverters.
private void initTimeConverters() {
TimeFieldSpec timeFieldSpec = _schema.getTimeFieldSpec();
if (timeFieldSpec != null) {
TimeGranularitySpec incomingGranularitySpec = timeFieldSpec.getIncomingGranularitySpec();
TimeGranularitySpec outgoingGranularitySpec = timeFieldSpec.getOutgoingGranularitySpec();
_outgoingTimeColumnName = outgoingGranularitySpec.getName();
if (!incomingGranularitySpec.equals(outgoingGranularitySpec)) {
_incomingTimeColumnName = incomingGranularitySpec.getName();
_timeConverter = TimeConverterProvider.getTimeConverter(incomingGranularitySpec, outgoingGranularitySpec);
}
}
}
Aggregations