use of java.util.concurrent.TimeUnit in project pinot by linkedin.
the class AnomaliesResource method getFormattedDateTime.
/**
* Get formatted date time for anomaly chart
* @param timestamp
* @param datasetConfig
* @param startEndDateFormatterHours
* @param startEndDateFormatterDays
* @return
*/
private String getFormattedDateTime(long timestamp, DatasetConfigDTO datasetConfig, DateTimeFormatter startEndDateFormatterHours, DateTimeFormatter startEndDateFormatterDays) {
TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
TimeUnit dataGranularityUnit = timeSpec.getDataGranularity().getUnit();
String formattedDateTime = null;
if (dataGranularityUnit.equals(TimeUnit.MINUTES) || dataGranularityUnit.equals(TimeUnit.HOURS)) {
formattedDateTime = startEndDateFormatterHours.print(timestamp);
} else if (dataGranularityUnit.equals(TimeUnit.DAYS)) {
formattedDateTime = startEndDateFormatterDays.print(timestamp);
}
return formattedDateTime;
}
use of java.util.concurrent.TimeUnit in project pinot by linkedin.
the class DataCompletenessTaskUtils method getDateTimeFormatterForDataset.
/**
* Get date time formatter according to granularity of dataset
* This is to store the date in the db, in the correct SDF
* @param timeSpec
* @return
*/
public static DateTimeFormatter getDateTimeFormatterForDataset(TimeSpec timeSpec, DateTimeZone zone) {
String pattern = null;
TimeUnit unit = timeSpec.getDataGranularity().getUnit();
switch(unit) {
case DAYS:
pattern = DAY_FORMAT;
break;
case MINUTES:
pattern = MINUTE_FORMAT;
break;
case HOURS:
default:
pattern = HOUR_FORMAT;
break;
}
DateTimeFormatter dateTimeFormatter = DateTimeFormat.forPattern(pattern).withZone(zone);
return dateTimeFormatter;
}
use of java.util.concurrent.TimeUnit in project pinot by linkedin.
the class MetricTransfer method rescaleMetric.
/**
* Use the scaling factor to normalize the input time series
*
* @param metricsToModify the data MetricTimeSeries to be modified by scaling factor
* @param windowStartTime the timestamp when the data belongs to current monitoring window
* @param scalingFactorList list of scaling factors
* @param metricName the metricName withing the timeseries to be modified
* @param properties the properties for scaling the values
*/
public static void rescaleMetric(MetricTimeSeries metricsToModify, long windowStartTime, List<ScalingFactor> scalingFactorList, String metricName, Properties properties) {
if (CollectionUtils.isEmpty(scalingFactorList)) {
// no transformation if there is no scaling factor in
return;
}
List<ScaledValue> scaledValues = null;
if (DEBUG) {
scaledValues = new ArrayList<>();
}
int seasonalSize = Integer.parseInt(properties.getProperty(SEASONAL_SIZE, DEFAULT_SEASONAL_SIZE));
TimeUnit seasonalUnit = TimeUnit.valueOf(properties.getProperty(SEASONAL_UNIT, DEFAULT_SEASONAL_UNIT));
long seasonalMillis = seasonalUnit.toMillis(seasonalSize);
int baselineSeasonalPeriod = Integer.parseInt(properties.getProperty(BASELINE_SEASONAL_PERIOD, DEFAULT_BASELINE_SEASONAL_PERIOD));
// The scaling model works as follows:
// 1. If the time range of scaling factor overlaps with current time series, then the scaling
// factor is applied to all corresponding baseline time series.
// 2. If the time range of scaling factor overlaps with a baseline time series, then baseline
// value that overlaps with the scaling factor is set to 0. Note that in current
// implementation of ThirdEye value 0 means missing data. Thus, the baseline value will be
// removed from baseline calculation.
//
// The model is implemented as follows:
// 1. Check if a timestamp located in any time range of scaling factor.
// 2. If it is, then check if it is a current value or baseline value.
// 3a. If it is a baseline value, then set its value to 0.
// 3b. If it is a current value, then apply the scaling factor to its corresponding baseline
// values.
Set<Long> timeWindowSet = metricsToModify.getTimeWindowSet();
for (long ts : timeWindowSet) {
for (ScalingFactor sf : scalingFactorList) {
if (sf.isInTimeWindow(ts)) {
if (ts < windowStartTime) {
// the timestamp belongs to a baseline time series
if (DEBUG) {
double originalValue = metricsToModify.get(ts, metricName).doubleValue();
scaledValues.add(new ScaledValue(ts, originalValue, 0.0));
}
// zero will be removed in analyze function
metricsToModify.set(ts, metricName, 0.0);
} else {
// the timestamp belongs to the current time series
for (int i = 1; i <= baselineSeasonalPeriod; ++i) {
long baseTs = ts - i * seasonalMillis;
if (timeWindowSet.contains(baseTs)) {
double originalValue = metricsToModify.get(baseTs, metricName).doubleValue();
double scaledValue = originalValue * sf.getScalingFactor();
metricsToModify.set(baseTs, metricName, scaledValue);
if (DEBUG) {
scaledValues.add(new ScaledValue(baseTs, originalValue, scaledValue));
}
}
}
}
}
}
}
if (DEBUG) {
if (CollectionUtils.isNotEmpty(scaledValues)) {
Collections.sort(scaledValues);
StringBuilder sb = new StringBuilder();
String separator = "";
for (ScaledValue scaledValue : scaledValues) {
sb.append(separator).append(scaledValue.toString());
separator = ", ";
}
LOG.info("Transformed values: {}", sb.toString());
}
}
}
use of java.util.concurrent.TimeUnit in project pinot by linkedin.
the class FieldSpecTest method testTimeFieldSpecConstructor.
/**
* Test {@link TimeFieldSpec} constructors.
*/
@Test
public void testTimeFieldSpecConstructor() {
String incomingName = "incoming";
DataType incomingDataType = DataType.LONG;
TimeUnit incomingTimeUnit = TimeUnit.HOURS;
int incomingTimeUnitSize = 1;
TimeGranularitySpec incomingTimeGranularitySpec = new TimeGranularitySpec(incomingDataType, incomingTimeUnitSize, incomingTimeUnit, incomingName);
String outgoingName = "outgoing";
DataType outgoingDataType = DataType.INT;
TimeUnit outgoingTimeUnit = TimeUnit.DAYS;
int outgoingTimeUnitSize = 1;
TimeGranularitySpec outgoingTimeGranularitySpec = new TimeGranularitySpec(outgoingDataType, outgoingTimeUnitSize, outgoingTimeUnit, outgoingName);
int defaultNullValue = 17050;
TimeFieldSpec timeFieldSpec1 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnit);
TimeFieldSpec timeFieldSpec2 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnit, defaultNullValue);
TimeFieldSpec timeFieldSpec3 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnit, outgoingName, outgoingDataType, outgoingTimeUnit);
TimeFieldSpec timeFieldSpec4 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnit, outgoingName, outgoingDataType, outgoingTimeUnit, defaultNullValue);
TimeFieldSpec timeFieldSpec5 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnitSize, incomingTimeUnit);
TimeFieldSpec timeFieldSpec6 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnitSize, incomingTimeUnit, defaultNullValue);
TimeFieldSpec timeFieldSpec7 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnitSize, incomingTimeUnit, outgoingName, outgoingDataType, outgoingTimeUnitSize, outgoingTimeUnit);
TimeFieldSpec timeFieldSpec8 = new TimeFieldSpec(incomingName, incomingDataType, incomingTimeUnitSize, incomingTimeUnit, outgoingName, outgoingDataType, outgoingTimeUnitSize, outgoingTimeUnit, defaultNullValue);
TimeFieldSpec timeFieldSpec9 = new TimeFieldSpec(incomingTimeGranularitySpec);
TimeFieldSpec timeFieldSpec10 = new TimeFieldSpec(incomingTimeGranularitySpec, defaultNullValue);
TimeFieldSpec timeFieldSpec11 = new TimeFieldSpec(incomingTimeGranularitySpec, outgoingTimeGranularitySpec);
TimeFieldSpec timeFieldSpec12 = new TimeFieldSpec(incomingTimeGranularitySpec, outgoingTimeGranularitySpec, defaultNullValue);
Assert.assertEquals(timeFieldSpec1, timeFieldSpec5);
Assert.assertEquals(timeFieldSpec1, timeFieldSpec9);
Assert.assertEquals(timeFieldSpec2, timeFieldSpec6);
Assert.assertEquals(timeFieldSpec2, timeFieldSpec10);
Assert.assertEquals(timeFieldSpec3, timeFieldSpec7);
Assert.assertEquals(timeFieldSpec3, timeFieldSpec11);
Assert.assertEquals(timeFieldSpec4, timeFieldSpec8);
Assert.assertEquals(timeFieldSpec4, timeFieldSpec12);
// Before adding default null value.
Assert.assertFalse(timeFieldSpec1.equals(timeFieldSpec2));
Assert.assertFalse(timeFieldSpec3.equals(timeFieldSpec4));
Assert.assertFalse(timeFieldSpec5.equals(timeFieldSpec6));
Assert.assertFalse(timeFieldSpec7.equals(timeFieldSpec8));
Assert.assertFalse(timeFieldSpec9.equals(timeFieldSpec10));
Assert.assertFalse(timeFieldSpec11.equals(timeFieldSpec12));
// After adding default null value.
timeFieldSpec1.setDefaultNullValue(defaultNullValue);
timeFieldSpec3.setDefaultNullValue(defaultNullValue);
timeFieldSpec5.setDefaultNullValue(defaultNullValue);
timeFieldSpec7.setDefaultNullValue(defaultNullValue);
timeFieldSpec9.setDefaultNullValue(defaultNullValue);
timeFieldSpec11.setDefaultNullValue(defaultNullValue);
Assert.assertEquals(timeFieldSpec1, timeFieldSpec2);
Assert.assertEquals(timeFieldSpec3, timeFieldSpec4);
Assert.assertEquals(timeFieldSpec5, timeFieldSpec6);
Assert.assertEquals(timeFieldSpec7, timeFieldSpec8);
Assert.assertEquals(timeFieldSpec9, timeFieldSpec10);
Assert.assertEquals(timeFieldSpec11, timeFieldSpec12);
}
use of java.util.concurrent.TimeUnit in project pinot by linkedin.
the class AutoAddInvertedIndex method runQueryStrategy.
private void runQueryStrategy() throws Exception {
// Get all resources in cluster
List<String> resourcesInCluster = _helixAdmin.getResourcesInCluster(_clusterName);
for (String tableName : resourcesInCluster) {
// Skip non-table resources
if (!tableName.endsWith("_OFFLINE") && !tableName.endsWith("_REALTIME")) {
continue;
}
// Skip tables that do not match the defined name pattern
if (_tableNamePattern != null && !tableName.matches(_tableNamePattern)) {
continue;
}
LOGGER.info("Table: {} matches the table name pattern: {}", tableName, _tableNamePattern);
// Get the inverted index config
AbstractTableConfig tableConfig = getTableConfig(tableName);
IndexingConfig indexingConfig = tableConfig.getIndexingConfig();
List<String> invertedIndexColumns = indexingConfig.getInvertedIndexColumns();
boolean autoGeneratedInvertedIndex = indexingConfig.isAutoGeneratedInvertedIndex();
// Handle auto-generated inverted index
if (autoGeneratedInvertedIndex) {
Preconditions.checkState(!invertedIndexColumns.isEmpty(), "Auto-generated inverted index list is empty");
// NEW mode, skip
if (_mode == Mode.NEW) {
LOGGER.info("Table: {}, skip adding inverted index because it has auto-generated inverted index and under NEW mode", tableName);
continue;
}
// REMOVE mode, remove the inverted index and update
if (_mode == Mode.REMOVE) {
invertedIndexColumns.clear();
indexingConfig.setAutoGeneratedInvertedIndex(false);
if (updateIndexConfig(tableName, tableConfig)) {
LOGGER.info("Table: {}, removed auto-generated inverted index", tableName);
} else {
LOGGER.error("Table: {}, failed to remove auto-generated inverted index", tableName);
}
continue;
}
// REFRESH mode, remove auto-generated inverted index
if (_mode == Mode.REFRESH) {
invertedIndexColumns.clear();
}
} else {
// Handle null inverted index columns
if (invertedIndexColumns == null) {
invertedIndexColumns = new ArrayList<>();
indexingConfig.setInvertedIndexColumns(invertedIndexColumns);
}
// Remove empty strings
int emptyStringIndex;
while ((emptyStringIndex = invertedIndexColumns.indexOf("")) != -1) {
invertedIndexColumns.remove(emptyStringIndex);
}
// Skip non-empty non-auto-generated inverted index
if (!invertedIndexColumns.isEmpty()) {
LOGGER.info("Table: {}, skip adding inverted index because it has non-auto-generated inverted index", tableName);
continue;
}
}
// Skip tables without a schema
Schema tableSchema = getTableSchema(tableName);
if (tableSchema == null) {
LOGGER.info("Table: {}, skip adding inverted index because it does not have a schema", tableName);
continue;
}
// Skip tables without dimensions
List<String> dimensionNames = tableSchema.getDimensionNames();
if (dimensionNames.size() == 0) {
LOGGER.info("Table: {}, skip adding inverted index because it does not have any dimension column", tableName);
continue;
}
// Skip tables without a proper time column
TimeFieldSpec timeFieldSpec = tableSchema.getTimeFieldSpec();
if (timeFieldSpec == null || timeFieldSpec.getDataType() == FieldSpec.DataType.STRING) {
LOGGER.info("Table: {}, skip adding inverted index because it does not have a numeric time column", tableName);
continue;
}
String timeColumnName = timeFieldSpec.getName();
TimeUnit timeUnit = timeFieldSpec.getOutgoingGranularitySpec().getTimeType();
if (timeUnit != TimeUnit.DAYS) {
LOGGER.warn("Table: {}, time column {] has non-DAYS time unit: {}", timeColumnName, timeUnit);
}
// Only add inverted index to table larger than a threshold
JSONObject queryResponse = sendQuery("SELECT COUNT(*) FROM " + tableName);
long numTotalDocs = queryResponse.getLong("totalDocs");
LOGGER.info("Table: {}, number of total documents: {}", tableName, numTotalDocs);
if (numTotalDocs <= _tableSizeThreshold) {
LOGGER.info("Table: {}, skip adding inverted index because the table is too small", tableName);
continue;
}
// Get each dimension's cardinality on one timestamp's data
queryResponse = sendQuery("SELECT Max(" + timeColumnName + ") FROM " + tableName);
int maxTimeStamp = queryResponse.getJSONArray("aggregationResults").getJSONObject(0).getInt("value");
LOGGER.info("Table: {}, max time column {}: {}", tableName, timeColumnName, maxTimeStamp);
// Query DISTINCTCOUNT on all dimensions in one query might cause timeout, so query them separately
List<ResultPair> resultPairs = new ArrayList<>();
for (String dimensionName : dimensionNames) {
String query = "SELECT DISTINCTCOUNT(" + dimensionName + ") FROM " + tableName + " WHERE " + timeColumnName + " = " + maxTimeStamp;
queryResponse = sendQuery(query);
JSONObject result = queryResponse.getJSONArray("aggregationResults").getJSONObject(0);
resultPairs.add(new ResultPair(result.getString("function").substring("distinctCount_".length()), result.getLong("value")));
}
// Sort the dimensions based on their cardinalities
Collections.sort(resultPairs);
// Add the top dimensions into inverted index columns
int numInvertedIndex = Math.min(_maxNumInvertedIndexAdded, resultPairs.size());
for (int i = 0; i < numInvertedIndex; i++) {
ResultPair resultPair = resultPairs.get(i);
String columnName = resultPair._key;
long cardinality = resultPair._value;
if (cardinality > _cardinalityThreshold) {
// Do not append inverted index if already exists
if (!invertedIndexColumns.contains(columnName)) {
invertedIndexColumns.add(columnName);
}
LOGGER.info("Table: {}, add inverted index to column {} with cardinality: {}", tableName, columnName, cardinality);
} else {
LOGGER.info("Table: {}, skip adding inverted index to column {} with cardinality: {}", tableName, columnName, cardinality);
break;
}
}
// Update indexing config
if (!invertedIndexColumns.isEmpty()) {
indexingConfig.setAutoGeneratedInvertedIndex(true);
if (updateIndexConfig(tableName, tableConfig)) {
LOGGER.info("Table: {}, added inverted index to columns: {}", tableName, invertedIndexColumns);
} else {
LOGGER.error("Table: {}, failed to add inverted index to columns: {}", tableName, invertedIndexColumns);
}
} else {
if (autoGeneratedInvertedIndex) {
Preconditions.checkState(_mode == Mode.REFRESH);
// Remove existing auto-generated inverted index because no column matches all the conditions
indexingConfig.setAutoGeneratedInvertedIndex(false);
if (updateIndexConfig(tableName, tableConfig)) {
LOGGER.info("Table: {}, removed auto-generated inverted index", tableName);
} else {
LOGGER.error("Table: {}, failed to remove auto-generated inverted index", tableName);
}
}
}
}
}
Aggregations