use of com.linkedin.pinot.common.data.Schema in project pinot by linkedin.
the class SchemaUtils method main.
/**
* An example on how to use this utility class.
*/
public static void main(String[] args) {
Schema schema = new Schema.SchemaBuilder().setSchemaName("testSchema").addSingleValueDimension("dimension", FieldSpec.DataType.DOUBLE).addMetric("metric", FieldSpec.DataType.INT).addTime("time", TimeUnit.DAYS, FieldSpec.DataType.INT).build();
System.out.println(postSchema("localhost", 8100, schema));
Schema fetchedSchema = getSchema("localhost", 8100, "testSchema");
Preconditions.checkNotNull(fetchedSchema);
System.out.println(fetchedSchema);
System.out.println(equalsIgnoreVersion(schema, fetchedSchema));
System.out.println(deleteSchema("localhost", 8100, "testSchema"));
}
use of com.linkedin.pinot.common.data.Schema in project pinot by linkedin.
the class PinotSchemaRestletResource method deleteSchema.
@HttpVerb("delete")
@Summary("Deletes a schema")
@Tags({ "schema" })
@Paths({ "/schemas/{schemaName}", "/schemas/{schemaName}/" })
@Responses({ @Response(statusCode = "200", description = "The schema was deleted"), @Response(statusCode = "404", description = "The schema does not exist"), @Response(statusCode = "409", description = "The schema could not be deleted due to being in use"), @Response(statusCode = "500", description = "There was an error while deleting the schema") })
StringRepresentation deleteSchema(@Parameter(name = "schemaName", in = "path", description = "The name of the schema to get", required = true) String schemaName) throws JSONException, IOException {
Schema schema = _pinotHelixResourceManager.getSchema(schemaName);
if (schema == null) {
LOGGER.error("Error: could not find schema {}", schemaName);
setStatus(Status.CLIENT_ERROR_NOT_FOUND);
return new StringRepresentation("Error: Could not find schema " + schemaName);
}
// If the schema is associated with a table, we should not delete it.
List<String> tableNames = _pinotHelixResourceManager.getAllRealtimeTables();
for (String tableName : tableNames) {
AbstractTableConfig config = _pinotHelixResourceManager.getTableConfig(tableName, CommonConstants.Helix.TableType.REALTIME);
String tableSchema = config.getValidationConfig().getSchemaName();
if (schemaName.equals(tableSchema)) {
LOGGER.error("Cannot delete schema {}, as it is associated with table {}", schemaName, tableName);
setStatus(Status.CLIENT_ERROR_CONFLICT);
return new StringRepresentation("Error: Cannot delete schema " + schemaName + " as it is associated with table: " + TableNameBuilder.extractRawTableName(tableName));
}
}
LOGGER.info("Trying to delete schema {}", schemaName);
if (_pinotHelixResourceManager.deleteSchema(schema)) {
LOGGER.info("Success: Deleted schema {}", schemaName);
setStatus(Status.SUCCESS_OK);
return new StringRepresentation("Success: Deleted schema " + schemaName);
} else {
LOGGER.error("Error: could not delete schema {}", schemaName);
ControllerRestApplication.getControllerMetrics().addMeteredGlobalValue(ControllerMeter.CONTROLLER_SCHEMA_DELETE_ERROR, 1L);
setStatus(Status.SERVER_ERROR_INTERNAL);
return new StringRepresentation("Error: Could not delete schema " + schemaName);
}
}
use of com.linkedin.pinot.common.data.Schema in project pinot by linkedin.
the class PinotSchemaRestletResource method uploadSchema.
@HttpVerb("put")
@Summary("Updates an existing schema")
@Tags({ "schema" })
@Paths({ "/schemas/{schemaName}", "/schemas/{schemaName}/" })
@Responses({ @Response(statusCode = "200", description = "The schema was updated"), @Response(statusCode = "500", description = "There was an error while updating the schema") })
private Representation uploadSchema(@Parameter(name = "schemaName", in = "path", description = "The name of the schema to get") String schemaName) throws Exception {
File dataFile = getUploadContents();
if (dataFile != null) {
Schema schema = Schema.fromFile(dataFile);
try {
if (schema.getSchemaName().equals(schemaName)) {
_pinotHelixResourceManager.addOrUpdateSchema(schema);
return new StringRepresentation(dataFile + " sucessfully added", MediaType.TEXT_PLAIN);
} else {
final String message = "Schema name mismatch for uploaded schema, tried to add schema with name " + schema.getSchemaName() + " as " + schemaName;
LOGGER.warn(message);
ControllerRestApplication.getControllerMetrics().addMeteredGlobalValue(ControllerMeter.CONTROLLER_SCHEMA_UPLOAD_ERROR, 1L);
setStatus(Status.SERVER_ERROR_INTERNAL);
return new StringRepresentation(message, MediaType.TEXT_PLAIN);
}
} catch (Exception e) {
LOGGER.error("error adding schema ", e);
LOGGER.error("Caught exception in file upload", e);
ControllerRestApplication.getControllerMetrics().addMeteredGlobalValue(ControllerMeter.CONTROLLER_SCHEMA_UPLOAD_ERROR, 1L);
setStatus(Status.SERVER_ERROR_INTERNAL);
return PinotSegmentUploadRestletResource.exceptionToStringRepresentation(e);
}
} else {
// Some problem occurs, send back a simple line of text.
LOGGER.warn("No file was uploaded");
ControllerRestApplication.getControllerMetrics().addMeteredGlobalValue(ControllerMeter.CONTROLLER_SCHEMA_UPLOAD_ERROR, 1L);
setStatus(Status.SERVER_ERROR_INTERNAL);
return new StringRepresentation("schema not added", MediaType.TEXT_PLAIN);
}
}
use of com.linkedin.pinot.common.data.Schema in project pinot by linkedin.
the class AutoAddInvertedIndex method runQueryStrategy.
private void runQueryStrategy() throws Exception {
// Get all resources in cluster
List<String> resourcesInCluster = _helixAdmin.getResourcesInCluster(_clusterName);
for (String tableName : resourcesInCluster) {
// Skip non-table resources
if (!tableName.endsWith("_OFFLINE") && !tableName.endsWith("_REALTIME")) {
continue;
}
// Skip tables that do not match the defined name pattern
if (_tableNamePattern != null && !tableName.matches(_tableNamePattern)) {
continue;
}
LOGGER.info("Table: {} matches the table name pattern: {}", tableName, _tableNamePattern);
// Get the inverted index config
AbstractTableConfig tableConfig = getTableConfig(tableName);
IndexingConfig indexingConfig = tableConfig.getIndexingConfig();
List<String> invertedIndexColumns = indexingConfig.getInvertedIndexColumns();
boolean autoGeneratedInvertedIndex = indexingConfig.isAutoGeneratedInvertedIndex();
// Handle auto-generated inverted index
if (autoGeneratedInvertedIndex) {
Preconditions.checkState(!invertedIndexColumns.isEmpty(), "Auto-generated inverted index list is empty");
// NEW mode, skip
if (_mode == Mode.NEW) {
LOGGER.info("Table: {}, skip adding inverted index because it has auto-generated inverted index and under NEW mode", tableName);
continue;
}
// REMOVE mode, remove the inverted index and update
if (_mode == Mode.REMOVE) {
invertedIndexColumns.clear();
indexingConfig.setAutoGeneratedInvertedIndex(false);
if (updateIndexConfig(tableName, tableConfig)) {
LOGGER.info("Table: {}, removed auto-generated inverted index", tableName);
} else {
LOGGER.error("Table: {}, failed to remove auto-generated inverted index", tableName);
}
continue;
}
// REFRESH mode, remove auto-generated inverted index
if (_mode == Mode.REFRESH) {
invertedIndexColumns.clear();
}
} else {
// Handle null inverted index columns
if (invertedIndexColumns == null) {
invertedIndexColumns = new ArrayList<>();
indexingConfig.setInvertedIndexColumns(invertedIndexColumns);
}
// Remove empty strings
int emptyStringIndex;
while ((emptyStringIndex = invertedIndexColumns.indexOf("")) != -1) {
invertedIndexColumns.remove(emptyStringIndex);
}
// Skip non-empty non-auto-generated inverted index
if (!invertedIndexColumns.isEmpty()) {
LOGGER.info("Table: {}, skip adding inverted index because it has non-auto-generated inverted index", tableName);
continue;
}
}
// Skip tables without a schema
Schema tableSchema = getTableSchema(tableName);
if (tableSchema == null) {
LOGGER.info("Table: {}, skip adding inverted index because it does not have a schema", tableName);
continue;
}
// Skip tables without dimensions
List<String> dimensionNames = tableSchema.getDimensionNames();
if (dimensionNames.size() == 0) {
LOGGER.info("Table: {}, skip adding inverted index because it does not have any dimension column", tableName);
continue;
}
// Skip tables without a proper time column
TimeFieldSpec timeFieldSpec = tableSchema.getTimeFieldSpec();
if (timeFieldSpec == null || timeFieldSpec.getDataType() == FieldSpec.DataType.STRING) {
LOGGER.info("Table: {}, skip adding inverted index because it does not have a numeric time column", tableName);
continue;
}
String timeColumnName = timeFieldSpec.getName();
TimeUnit timeUnit = timeFieldSpec.getOutgoingGranularitySpec().getTimeType();
if (timeUnit != TimeUnit.DAYS) {
LOGGER.warn("Table: {}, time column {] has non-DAYS time unit: {}", timeColumnName, timeUnit);
}
// Only add inverted index to table larger than a threshold
JSONObject queryResponse = sendQuery("SELECT COUNT(*) FROM " + tableName);
long numTotalDocs = queryResponse.getLong("totalDocs");
LOGGER.info("Table: {}, number of total documents: {}", tableName, numTotalDocs);
if (numTotalDocs <= _tableSizeThreshold) {
LOGGER.info("Table: {}, skip adding inverted index because the table is too small", tableName);
continue;
}
// Get each dimension's cardinality on one timestamp's data
queryResponse = sendQuery("SELECT Max(" + timeColumnName + ") FROM " + tableName);
int maxTimeStamp = queryResponse.getJSONArray("aggregationResults").getJSONObject(0).getInt("value");
LOGGER.info("Table: {}, max time column {}: {}", tableName, timeColumnName, maxTimeStamp);
// Query DISTINCTCOUNT on all dimensions in one query might cause timeout, so query them separately
List<ResultPair> resultPairs = new ArrayList<>();
for (String dimensionName : dimensionNames) {
String query = "SELECT DISTINCTCOUNT(" + dimensionName + ") FROM " + tableName + " WHERE " + timeColumnName + " = " + maxTimeStamp;
queryResponse = sendQuery(query);
JSONObject result = queryResponse.getJSONArray("aggregationResults").getJSONObject(0);
resultPairs.add(new ResultPair(result.getString("function").substring("distinctCount_".length()), result.getLong("value")));
}
// Sort the dimensions based on their cardinalities
Collections.sort(resultPairs);
// Add the top dimensions into inverted index columns
int numInvertedIndex = Math.min(_maxNumInvertedIndexAdded, resultPairs.size());
for (int i = 0; i < numInvertedIndex; i++) {
ResultPair resultPair = resultPairs.get(i);
String columnName = resultPair._key;
long cardinality = resultPair._value;
if (cardinality > _cardinalityThreshold) {
// Do not append inverted index if already exists
if (!invertedIndexColumns.contains(columnName)) {
invertedIndexColumns.add(columnName);
}
LOGGER.info("Table: {}, add inverted index to column {} with cardinality: {}", tableName, columnName, cardinality);
} else {
LOGGER.info("Table: {}, skip adding inverted index to column {} with cardinality: {}", tableName, columnName, cardinality);
break;
}
}
// Update indexing config
if (!invertedIndexColumns.isEmpty()) {
indexingConfig.setAutoGeneratedInvertedIndex(true);
if (updateIndexConfig(tableName, tableConfig)) {
LOGGER.info("Table: {}, added inverted index to columns: {}", tableName, invertedIndexColumns);
} else {
LOGGER.error("Table: {}, failed to add inverted index to columns: {}", tableName, invertedIndexColumns);
}
} else {
if (autoGeneratedInvertedIndex) {
Preconditions.checkState(_mode == Mode.REFRESH);
// Remove existing auto-generated inverted index because no column matches all the conditions
indexingConfig.setAutoGeneratedInvertedIndex(false);
if (updateIndexConfig(tableName, tableConfig)) {
LOGGER.info("Table: {}, removed auto-generated inverted index", tableName);
} else {
LOGGER.error("Table: {}, failed to remove auto-generated inverted index", tableName);
}
}
}
}
}
use of com.linkedin.pinot.common.data.Schema in project pinot by linkedin.
the class AvroUtils method getPinotSchemaFromAvroSchema.
/**
* Given an avro schema object along with column field types and time unit, return the equivalent
* pinot schema object.
*
* @param avroSchema Avro schema for which to get the Pinot schema.
* @param fieldTypes Map containing fieldTypes for each column.
* @param timeUnit Time unit to be used for the time column.
* @return Return the equivalent pinot schema for the given avro schema.
*/
private static Schema getPinotSchemaFromAvroSchema(org.apache.avro.Schema avroSchema, Map<String, FieldSpec.FieldType> fieldTypes, TimeUnit timeUnit) {
Schema pinotSchema = new Schema();
for (final Field field : avroSchema.getFields()) {
String fieldName = field.name();
FieldSpec.DataType dataType;
try {
dataType = AvroRecordReader.getColumnType(field);
} catch (UnsupportedOperationException e) {
LOGGER.warn("Unsupported field type for field {} schema {}, using String instead.", fieldName, field.schema());
dataType = FieldSpec.DataType.STRING;
}
FieldSpec.FieldType fieldType = fieldTypes.get(fieldName);
boolean isSingleValueField = AvroRecordReader.isSingleValueField(field);
switch(fieldType) {
case DIMENSION:
pinotSchema.addField(new DimensionFieldSpec(fieldName, dataType, isSingleValueField));
break;
case METRIC:
Preconditions.checkState(isSingleValueField, "Unsupported multi-value for metric field.");
pinotSchema.addField(new MetricFieldSpec(fieldName, dataType));
break;
case TIME:
Preconditions.checkState(isSingleValueField, "Unsupported multi-value for time field.");
pinotSchema.addField(new TimeFieldSpec(field.name(), dataType, timeUnit));
break;
default:
throw new UnsupportedOperationException("Unsupported field type: " + fieldType + " for field: " + fieldName);
}
}
return pinotSchema;
}
Aggregations