Search in sources :

Example 1 with Schema

use of com.linkedin.pinot.common.data.Schema in project pinot by linkedin.

the class SchemaUtils method main.

/**
   * An example on how to use this utility class.
   */
public static void main(String[] args) {
    Schema schema = new Schema.SchemaBuilder().setSchemaName("testSchema").addSingleValueDimension("dimension", FieldSpec.DataType.DOUBLE).addMetric("metric", FieldSpec.DataType.INT).addTime("time", TimeUnit.DAYS, FieldSpec.DataType.INT).build();
    System.out.println(postSchema("localhost", 8100, schema));
    Schema fetchedSchema = getSchema("localhost", 8100, "testSchema");
    Preconditions.checkNotNull(fetchedSchema);
    System.out.println(fetchedSchema);
    System.out.println(equalsIgnoreVersion(schema, fetchedSchema));
    System.out.println(deleteSchema("localhost", 8100, "testSchema"));
}
Also used : Schema(com.linkedin.pinot.common.data.Schema)

Example 2 with Schema

use of com.linkedin.pinot.common.data.Schema in project pinot by linkedin.

the class PinotSchemaRestletResource method deleteSchema.

@HttpVerb("delete")
@Summary("Deletes a schema")
@Tags({ "schema" })
@Paths({ "/schemas/{schemaName}", "/schemas/{schemaName}/" })
@Responses({ @Response(statusCode = "200", description = "The schema was deleted"), @Response(statusCode = "404", description = "The schema does not exist"), @Response(statusCode = "409", description = "The schema could not be deleted due to being in use"), @Response(statusCode = "500", description = "There was an error while deleting the schema") })
StringRepresentation deleteSchema(@Parameter(name = "schemaName", in = "path", description = "The name of the schema to get", required = true) String schemaName) throws JSONException, IOException {
    Schema schema = _pinotHelixResourceManager.getSchema(schemaName);
    if (schema == null) {
        LOGGER.error("Error: could not find schema {}", schemaName);
        setStatus(Status.CLIENT_ERROR_NOT_FOUND);
        return new StringRepresentation("Error: Could not find schema " + schemaName);
    }
    // If the schema is associated with a table, we should not delete it.
    List<String> tableNames = _pinotHelixResourceManager.getAllRealtimeTables();
    for (String tableName : tableNames) {
        AbstractTableConfig config = _pinotHelixResourceManager.getTableConfig(tableName, CommonConstants.Helix.TableType.REALTIME);
        String tableSchema = config.getValidationConfig().getSchemaName();
        if (schemaName.equals(tableSchema)) {
            LOGGER.error("Cannot delete schema {}, as it is associated with table {}", schemaName, tableName);
            setStatus(Status.CLIENT_ERROR_CONFLICT);
            return new StringRepresentation("Error: Cannot delete schema " + schemaName + " as it is associated with table: " + TableNameBuilder.extractRawTableName(tableName));
        }
    }
    LOGGER.info("Trying to delete schema {}", schemaName);
    if (_pinotHelixResourceManager.deleteSchema(schema)) {
        LOGGER.info("Success: Deleted schema {}", schemaName);
        setStatus(Status.SUCCESS_OK);
        return new StringRepresentation("Success: Deleted schema " + schemaName);
    } else {
        LOGGER.error("Error: could not delete schema {}", schemaName);
        ControllerRestApplication.getControllerMetrics().addMeteredGlobalValue(ControllerMeter.CONTROLLER_SCHEMA_DELETE_ERROR, 1L);
        setStatus(Status.SERVER_ERROR_INTERNAL);
        return new StringRepresentation("Error: Could not delete schema " + schemaName);
    }
}
Also used : StringRepresentation(org.restlet.representation.StringRepresentation) Schema(com.linkedin.pinot.common.data.Schema) AbstractTableConfig(com.linkedin.pinot.common.config.AbstractTableConfig) Summary(com.linkedin.pinot.common.restlet.swagger.Summary) HttpVerb(com.linkedin.pinot.common.restlet.swagger.HttpVerb) Paths(com.linkedin.pinot.common.restlet.swagger.Paths) Tags(com.linkedin.pinot.common.restlet.swagger.Tags) Responses(com.linkedin.pinot.common.restlet.swagger.Responses)

Example 3 with Schema

use of com.linkedin.pinot.common.data.Schema in project pinot by linkedin.

the class PinotSchemaRestletResource method uploadSchema.

@HttpVerb("put")
@Summary("Updates an existing schema")
@Tags({ "schema" })
@Paths({ "/schemas/{schemaName}", "/schemas/{schemaName}/" })
@Responses({ @Response(statusCode = "200", description = "The schema was updated"), @Response(statusCode = "500", description = "There was an error while updating the schema") })
private Representation uploadSchema(@Parameter(name = "schemaName", in = "path", description = "The name of the schema to get") String schemaName) throws Exception {
    File dataFile = getUploadContents();
    if (dataFile != null) {
        Schema schema = Schema.fromFile(dataFile);
        try {
            if (schema.getSchemaName().equals(schemaName)) {
                _pinotHelixResourceManager.addOrUpdateSchema(schema);
                return new StringRepresentation(dataFile + " sucessfully added", MediaType.TEXT_PLAIN);
            } else {
                final String message = "Schema name mismatch for uploaded schema, tried to add schema with name " + schema.getSchemaName() + " as " + schemaName;
                LOGGER.warn(message);
                ControllerRestApplication.getControllerMetrics().addMeteredGlobalValue(ControllerMeter.CONTROLLER_SCHEMA_UPLOAD_ERROR, 1L);
                setStatus(Status.SERVER_ERROR_INTERNAL);
                return new StringRepresentation(message, MediaType.TEXT_PLAIN);
            }
        } catch (Exception e) {
            LOGGER.error("error adding schema ", e);
            LOGGER.error("Caught exception in file upload", e);
            ControllerRestApplication.getControllerMetrics().addMeteredGlobalValue(ControllerMeter.CONTROLLER_SCHEMA_UPLOAD_ERROR, 1L);
            setStatus(Status.SERVER_ERROR_INTERNAL);
            return PinotSegmentUploadRestletResource.exceptionToStringRepresentation(e);
        }
    } else {
        // Some problem occurs, send back a simple line of text.
        LOGGER.warn("No file was uploaded");
        ControllerRestApplication.getControllerMetrics().addMeteredGlobalValue(ControllerMeter.CONTROLLER_SCHEMA_UPLOAD_ERROR, 1L);
        setStatus(Status.SERVER_ERROR_INTERNAL);
        return new StringRepresentation("schema not added", MediaType.TEXT_PLAIN);
    }
}
Also used : StringRepresentation(org.restlet.representation.StringRepresentation) Schema(com.linkedin.pinot.common.data.Schema) File(java.io.File) JSONException(org.json.JSONException) IOException(java.io.IOException) Summary(com.linkedin.pinot.common.restlet.swagger.Summary) HttpVerb(com.linkedin.pinot.common.restlet.swagger.HttpVerb) Paths(com.linkedin.pinot.common.restlet.swagger.Paths) Tags(com.linkedin.pinot.common.restlet.swagger.Tags) Responses(com.linkedin.pinot.common.restlet.swagger.Responses)

Example 4 with Schema

use of com.linkedin.pinot.common.data.Schema in project pinot by linkedin.

the class AutoAddInvertedIndex method runQueryStrategy.

private void runQueryStrategy() throws Exception {
    // Get all resources in cluster
    List<String> resourcesInCluster = _helixAdmin.getResourcesInCluster(_clusterName);
    for (String tableName : resourcesInCluster) {
        // Skip non-table resources
        if (!tableName.endsWith("_OFFLINE") && !tableName.endsWith("_REALTIME")) {
            continue;
        }
        // Skip tables that do not match the defined name pattern
        if (_tableNamePattern != null && !tableName.matches(_tableNamePattern)) {
            continue;
        }
        LOGGER.info("Table: {} matches the table name pattern: {}", tableName, _tableNamePattern);
        // Get the inverted index config
        AbstractTableConfig tableConfig = getTableConfig(tableName);
        IndexingConfig indexingConfig = tableConfig.getIndexingConfig();
        List<String> invertedIndexColumns = indexingConfig.getInvertedIndexColumns();
        boolean autoGeneratedInvertedIndex = indexingConfig.isAutoGeneratedInvertedIndex();
        // Handle auto-generated inverted index
        if (autoGeneratedInvertedIndex) {
            Preconditions.checkState(!invertedIndexColumns.isEmpty(), "Auto-generated inverted index list is empty");
            // NEW mode, skip
            if (_mode == Mode.NEW) {
                LOGGER.info("Table: {}, skip adding inverted index because it has auto-generated inverted index and under NEW mode", tableName);
                continue;
            }
            // REMOVE mode, remove the inverted index and update
            if (_mode == Mode.REMOVE) {
                invertedIndexColumns.clear();
                indexingConfig.setAutoGeneratedInvertedIndex(false);
                if (updateIndexConfig(tableName, tableConfig)) {
                    LOGGER.info("Table: {}, removed auto-generated inverted index", tableName);
                } else {
                    LOGGER.error("Table: {}, failed to remove auto-generated inverted index", tableName);
                }
                continue;
            }
            // REFRESH mode, remove auto-generated inverted index
            if (_mode == Mode.REFRESH) {
                invertedIndexColumns.clear();
            }
        } else {
            // Handle null inverted index columns
            if (invertedIndexColumns == null) {
                invertedIndexColumns = new ArrayList<>();
                indexingConfig.setInvertedIndexColumns(invertedIndexColumns);
            }
            // Remove empty strings
            int emptyStringIndex;
            while ((emptyStringIndex = invertedIndexColumns.indexOf("")) != -1) {
                invertedIndexColumns.remove(emptyStringIndex);
            }
            // Skip non-empty non-auto-generated inverted index
            if (!invertedIndexColumns.isEmpty()) {
                LOGGER.info("Table: {}, skip adding inverted index because it has non-auto-generated inverted index", tableName);
                continue;
            }
        }
        // Skip tables without a schema
        Schema tableSchema = getTableSchema(tableName);
        if (tableSchema == null) {
            LOGGER.info("Table: {}, skip adding inverted index because it does not have a schema", tableName);
            continue;
        }
        // Skip tables without dimensions
        List<String> dimensionNames = tableSchema.getDimensionNames();
        if (dimensionNames.size() == 0) {
            LOGGER.info("Table: {}, skip adding inverted index because it does not have any dimension column", tableName);
            continue;
        }
        // Skip tables without a proper time column
        TimeFieldSpec timeFieldSpec = tableSchema.getTimeFieldSpec();
        if (timeFieldSpec == null || timeFieldSpec.getDataType() == FieldSpec.DataType.STRING) {
            LOGGER.info("Table: {}, skip adding inverted index because it does not have a numeric time column", tableName);
            continue;
        }
        String timeColumnName = timeFieldSpec.getName();
        TimeUnit timeUnit = timeFieldSpec.getOutgoingGranularitySpec().getTimeType();
        if (timeUnit != TimeUnit.DAYS) {
            LOGGER.warn("Table: {}, time column {] has non-DAYS time unit: {}", timeColumnName, timeUnit);
        }
        // Only add inverted index to table larger than a threshold
        JSONObject queryResponse = sendQuery("SELECT COUNT(*) FROM " + tableName);
        long numTotalDocs = queryResponse.getLong("totalDocs");
        LOGGER.info("Table: {}, number of total documents: {}", tableName, numTotalDocs);
        if (numTotalDocs <= _tableSizeThreshold) {
            LOGGER.info("Table: {}, skip adding inverted index because the table is too small", tableName);
            continue;
        }
        // Get each dimension's cardinality on one timestamp's data
        queryResponse = sendQuery("SELECT Max(" + timeColumnName + ") FROM " + tableName);
        int maxTimeStamp = queryResponse.getJSONArray("aggregationResults").getJSONObject(0).getInt("value");
        LOGGER.info("Table: {}, max time column {}: {}", tableName, timeColumnName, maxTimeStamp);
        // Query DISTINCTCOUNT on all dimensions in one query might cause timeout, so query them separately
        List<ResultPair> resultPairs = new ArrayList<>();
        for (String dimensionName : dimensionNames) {
            String query = "SELECT DISTINCTCOUNT(" + dimensionName + ") FROM " + tableName + " WHERE " + timeColumnName + " = " + maxTimeStamp;
            queryResponse = sendQuery(query);
            JSONObject result = queryResponse.getJSONArray("aggregationResults").getJSONObject(0);
            resultPairs.add(new ResultPair(result.getString("function").substring("distinctCount_".length()), result.getLong("value")));
        }
        // Sort the dimensions based on their cardinalities
        Collections.sort(resultPairs);
        // Add the top dimensions into inverted index columns
        int numInvertedIndex = Math.min(_maxNumInvertedIndexAdded, resultPairs.size());
        for (int i = 0; i < numInvertedIndex; i++) {
            ResultPair resultPair = resultPairs.get(i);
            String columnName = resultPair._key;
            long cardinality = resultPair._value;
            if (cardinality > _cardinalityThreshold) {
                // Do not append inverted index if already exists
                if (!invertedIndexColumns.contains(columnName)) {
                    invertedIndexColumns.add(columnName);
                }
                LOGGER.info("Table: {}, add inverted index to column {} with cardinality: {}", tableName, columnName, cardinality);
            } else {
                LOGGER.info("Table: {}, skip adding inverted index to column {} with cardinality: {}", tableName, columnName, cardinality);
                break;
            }
        }
        // Update indexing config
        if (!invertedIndexColumns.isEmpty()) {
            indexingConfig.setAutoGeneratedInvertedIndex(true);
            if (updateIndexConfig(tableName, tableConfig)) {
                LOGGER.info("Table: {}, added inverted index to columns: {}", tableName, invertedIndexColumns);
            } else {
                LOGGER.error("Table: {}, failed to add inverted index to columns: {}", tableName, invertedIndexColumns);
            }
        } else {
            if (autoGeneratedInvertedIndex) {
                Preconditions.checkState(_mode == Mode.REFRESH);
                // Remove existing auto-generated inverted index because no column matches all the conditions
                indexingConfig.setAutoGeneratedInvertedIndex(false);
                if (updateIndexConfig(tableName, tableConfig)) {
                    LOGGER.info("Table: {}, removed auto-generated inverted index", tableName);
                } else {
                    LOGGER.error("Table: {}, failed to remove auto-generated inverted index", tableName);
                }
            }
        }
    }
}
Also used : IndexingConfig(com.linkedin.pinot.common.config.IndexingConfig) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) ArrayList(java.util.ArrayList) JSONObject(org.json.JSONObject) TimeUnit(java.util.concurrent.TimeUnit) AbstractTableConfig(com.linkedin.pinot.common.config.AbstractTableConfig)

Example 5 with Schema

use of com.linkedin.pinot.common.data.Schema in project pinot by linkedin.

the class AvroUtils method getPinotSchemaFromAvroSchema.

/**
   * Given an avro schema object along with column field types and time unit, return the equivalent
   * pinot schema object.
   *
   * @param avroSchema Avro schema for which to get the Pinot schema.
   * @param fieldTypes Map containing fieldTypes for each column.
   * @param timeUnit Time unit to be used for the time column.
   * @return Return the equivalent pinot schema for the given avro schema.
   */
private static Schema getPinotSchemaFromAvroSchema(org.apache.avro.Schema avroSchema, Map<String, FieldSpec.FieldType> fieldTypes, TimeUnit timeUnit) {
    Schema pinotSchema = new Schema();
    for (final Field field : avroSchema.getFields()) {
        String fieldName = field.name();
        FieldSpec.DataType dataType;
        try {
            dataType = AvroRecordReader.getColumnType(field);
        } catch (UnsupportedOperationException e) {
            LOGGER.warn("Unsupported field type for field {} schema {}, using String instead.", fieldName, field.schema());
            dataType = FieldSpec.DataType.STRING;
        }
        FieldSpec.FieldType fieldType = fieldTypes.get(fieldName);
        boolean isSingleValueField = AvroRecordReader.isSingleValueField(field);
        switch(fieldType) {
            case DIMENSION:
                pinotSchema.addField(new DimensionFieldSpec(fieldName, dataType, isSingleValueField));
                break;
            case METRIC:
                Preconditions.checkState(isSingleValueField, "Unsupported multi-value for metric field.");
                pinotSchema.addField(new MetricFieldSpec(fieldName, dataType));
                break;
            case TIME:
                Preconditions.checkState(isSingleValueField, "Unsupported multi-value for time field.");
                pinotSchema.addField(new TimeFieldSpec(field.name(), dataType, timeUnit));
                break;
            default:
                throw new UnsupportedOperationException("Unsupported field type: " + fieldType + " for field: " + fieldName);
        }
    }
    return pinotSchema;
}
Also used : Field(org.apache.avro.Schema.Field) Schema(com.linkedin.pinot.common.data.Schema) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) MetricFieldSpec(com.linkedin.pinot.common.data.MetricFieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec) TimeFieldSpec(com.linkedin.pinot.common.data.TimeFieldSpec) FieldSpec(com.linkedin.pinot.common.data.FieldSpec) DimensionFieldSpec(com.linkedin.pinot.common.data.DimensionFieldSpec)

Aggregations

Schema (com.linkedin.pinot.common.data.Schema)68 File (java.io.File)23 DimensionFieldSpec (com.linkedin.pinot.common.data.DimensionFieldSpec)19 MetricFieldSpec (com.linkedin.pinot.common.data.MetricFieldSpec)14 GenericRow (com.linkedin.pinot.core.data.GenericRow)14 TimeFieldSpec (com.linkedin.pinot.common.data.TimeFieldSpec)13 HashMap (java.util.HashMap)13 SegmentGeneratorConfig (com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig)12 FieldSpec (com.linkedin.pinot.common.data.FieldSpec)11 Test (org.testng.annotations.Test)9 IOException (java.io.IOException)8 SegmentIndexCreationDriverImpl (com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl)7 BeforeClass (org.testng.annotations.BeforeClass)7 AbstractTableConfig (com.linkedin.pinot.common.config.AbstractTableConfig)6 ArrayList (java.util.ArrayList)6 TimeGranularitySpec (com.linkedin.pinot.common.data.TimeGranularitySpec)5 HttpVerb (com.linkedin.pinot.common.restlet.swagger.HttpVerb)5 Paths (com.linkedin.pinot.common.restlet.swagger.Paths)5 Summary (com.linkedin.pinot.common.restlet.swagger.Summary)5 Tags (com.linkedin.pinot.common.restlet.swagger.Tags)5