Search in sources :

Example 61 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonDataProcessorUtil method getLocalDataFolderLocation.

/**
   * This method will form the local data folder store location
   *
   * @param databaseName
   * @param tableName
   * @param taskId
   * @param partitionId
   * @param segmentId
   * @return
   */
public static String getLocalDataFolderLocation(String databaseName, String tableName, String taskId, String partitionId, String segmentId, boolean isCompactionFlow) {
    String tempLocationKey = getTempStoreLocationKey(databaseName, tableName, taskId, isCompactionFlow);
    String baseStorePath = CarbonProperties.getInstance().getProperty(tempLocationKey, CarbonCommonConstants.STORE_LOCATION_DEFAULT_VAL);
    CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(databaseName + CarbonCommonConstants.UNDERSCORE + tableName);
    CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath(baseStorePath, carbonTable.getCarbonTableIdentifier());
    String carbonDataDirectoryPath = carbonTablePath.getCarbonDataDirectoryPath(partitionId, segmentId + "");
    return carbonDataDirectoryPath + File.separator + taskId;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath)

Example 62 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonFactDataHandlerModel method createCarbonFactDataHandlerModel.

/**
   * Create the model using @{@link CarbonDataLoadConfiguration}
   */
public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel(CarbonDataLoadConfiguration configuration, String storeLocation, int bucketId, int taskExtension) {
    CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
    boolean[] isUseInvertedIndex = CarbonDataProcessorUtil.getIsUseInvertedIndex(configuration.getDataFields());
    int[] dimLensWithComplex = configuration.getCardinalityFinder().getCardinality();
    if (!configuration.isSortTable()) {
        for (int i = 0; i < dimLensWithComplex.length; i++) {
            if (dimLensWithComplex[i] != 0) {
                dimLensWithComplex[i] = Integer.MAX_VALUE;
            }
        }
    }
    CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(identifier.getDatabaseName() + CarbonCommonConstants.UNDERSCORE + identifier.getTableName());
    List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(identifier.getTableName()), carbonTable.getMeasureByTableName(identifier.getTableName()));
    int[] colCardinality = CarbonUtil.getFormattedCardinality(dimLensWithComplex, wrapperColumnSchema);
    SegmentProperties segmentProperties = new SegmentProperties(wrapperColumnSchema, colCardinality);
    int[] dimLens = configuration.calcDimensionLengths();
    int dimensionCount = configuration.getDimensionCount();
    int noDictionaryCount = configuration.getNoDictionaryCount();
    int complexDimensionCount = configuration.getComplexColumnCount();
    int measureCount = configuration.getMeasureCount();
    int simpleDimsCount = dimensionCount - noDictionaryCount - complexDimensionCount;
    int[] simpleDimsLen = new int[simpleDimsCount];
    for (int i = 0; i < simpleDimsCount; i++) {
        simpleDimsLen[i] = dimLens[i];
    }
    //To Set MDKey Index of each primitive type in complex type
    int surrIndex = simpleDimsCount;
    Iterator<Map.Entry<String, GenericDataType>> complexMap = CarbonDataProcessorUtil.getComplexTypesMap(configuration.getDataFields()).entrySet().iterator();
    Map<Integer, GenericDataType> complexIndexMap = new HashMap<>(complexDimensionCount);
    while (complexMap.hasNext()) {
        Map.Entry<String, GenericDataType> complexDataType = complexMap.next();
        complexDataType.getValue().setOutputArrayIndex(0);
        complexIndexMap.put(simpleDimsCount, complexDataType.getValue());
        simpleDimsCount++;
        List<GenericDataType> primitiveTypes = new ArrayList<GenericDataType>();
        complexDataType.getValue().getAllPrimitiveChildren(primitiveTypes);
        for (GenericDataType eachPrimitive : primitiveTypes) {
            eachPrimitive.setSurrogateIndex(surrIndex++);
        }
    }
    CarbonDataFileAttributes carbonDataFileAttributes = new CarbonDataFileAttributes(Integer.parseInt(configuration.getTaskNo()), (Long) configuration.getDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP));
    String carbonDataDirectoryPath = getCarbonDataFolderLocation(configuration);
    CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
    carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(configuration.getSchemaUpdatedTimeStamp());
    carbonFactDataHandlerModel.setDatabaseName(identifier.getDatabaseName());
    carbonFactDataHandlerModel.setTableName(identifier.getTableName());
    carbonFactDataHandlerModel.setMeasureCount(measureCount);
    carbonFactDataHandlerModel.setStoreLocation(storeLocation);
    carbonFactDataHandlerModel.setDimLens(dimLens);
    carbonFactDataHandlerModel.setNoDictionaryCount(noDictionaryCount);
    carbonFactDataHandlerModel.setDimensionCount(configuration.getDimensionCount() - noDictionaryCount);
    carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
    carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
    carbonFactDataHandlerModel.setColCardinality(colCardinality);
    carbonFactDataHandlerModel.setMeasureDataType(configuration.getMeasureDataType());
    carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
    carbonFactDataHandlerModel.setPrimitiveDimLens(simpleDimsLen);
    carbonFactDataHandlerModel.setCarbonDataFileAttributes(carbonDataFileAttributes);
    carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
    carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndex);
    carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
    carbonFactDataHandlerModel.setComplexDimensionKeyGenerator(configuration.createKeyGeneratorForComplexDimension());
    carbonFactDataHandlerModel.bucketId = bucketId;
    carbonFactDataHandlerModel.segmentId = configuration.getSegmentId();
    carbonFactDataHandlerModel.taskExtension = taskExtension;
    return carbonFactDataHandlerModel;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) HashMap(java.util.HashMap) Map(java.util.Map)

Example 63 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class DataLoadProcessBuilder method createConfiguration.

private CarbonDataLoadConfiguration createConfiguration(CarbonLoadModel loadModel, String storeLocation) throws Exception {
    if (!new File(storeLocation).mkdirs()) {
        LOGGER.error("Error while creating the temp store path: " + storeLocation);
    }
    CarbonDataLoadConfiguration configuration = new CarbonDataLoadConfiguration();
    String databaseName = loadModel.getDatabaseName();
    String tableName = loadModel.getTableName();
    String tempLocationKey = CarbonDataProcessorUtil.getTempStoreLocationKey(databaseName, tableName, loadModel.getTaskNo(), false);
    CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, loadModel.getStorePath());
    CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    configuration.setTableIdentifier(identifier);
    configuration.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
    configuration.setHeader(loadModel.getCsvHeaderColumns());
    configuration.setPartitionId(loadModel.getPartitionId());
    configuration.setSegmentId(loadModel.getSegmentId());
    configuration.setTaskNo(loadModel.getTaskNo());
    configuration.setDataLoadProperty(DataLoadProcessorConstants.COMPLEX_DELIMITERS, new String[] { loadModel.getComplexDelimiterLevel1(), loadModel.getComplexDelimiterLevel2() });
    configuration.setDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT, loadModel.getSerializationNullFormat().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP, loadModel.getFactTimeStamp());
    configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE, loadModel.getBadRecordsLoggerEnable().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION, loadModel.getBadRecordsAction().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD, loadModel.getIsEmptyDataBadRecord().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_FILE_PATH, loadModel.getFactFilePath());
    configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, loadModel.getSortScope());
    configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, loadModel.getBatchSortSizeInMb());
    CarbonMetadata.getInstance().addCarbonTable(carbonTable);
    List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getFactTableName());
    List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getFactTableName());
    Map<String, String> dateFormatMap = CarbonDataProcessorUtil.getDateFormatMap(loadModel.getDateFormat());
    List<DataField> dataFields = new ArrayList<>();
    List<DataField> complexDataFields = new ArrayList<>();
    // And then add complex data types and measures.
    for (CarbonColumn column : dimensions) {
        DataField dataField = new DataField(column);
        dataField.setDateFormat(dateFormatMap.get(column.getColName()));
        if (column.isComplex()) {
            complexDataFields.add(dataField);
        } else {
            dataFields.add(dataField);
        }
    }
    dataFields.addAll(complexDataFields);
    for (CarbonColumn column : measures) {
        // This dummy measure is added when no measure was present. We no need to load it.
        if (!(column.getColName().equals("default_dummy_measure"))) {
            dataFields.add(new DataField(column));
        }
    }
    configuration.setDataFields(dataFields.toArray(new DataField[dataFields.size()]));
    configuration.setBucketingInfo(carbonTable.getBucketingInfo(carbonTable.getFactTableName()));
    // configuration for one pass load: dictionary server info
    configuration.setUseOnePass(loadModel.getUseOnePass());
    configuration.setDictionaryServerHost(loadModel.getDictionaryServerHost());
    configuration.setDictionaryServerPort(loadModel.getDictionaryServerPort());
    configuration.setPreFetch(loadModel.isPreFetch());
    configuration.setNumberOfSortColumns(carbonTable.getNumberOfSortColumns());
    configuration.setNumberOfNoDictSortColumns(carbonTable.getNumberOfNoDictSortColumns());
    return configuration;
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ArrayList(java.util.ArrayList) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) File(java.io.File)

Example 64 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonInputFormat method populateCarbonTable.

/**
   * this method will read the schema from the physical file and populate into CARBON_TABLE
   * @param configuration
   * @throws IOException
   */
private static void populateCarbonTable(Configuration configuration) throws IOException {
    String dirs = configuration.get(INPUT_DIR, "");
    String[] inputPaths = StringUtils.split(dirs);
    if (inputPaths.length == 0) {
        throw new InvalidPathException("No input paths specified in job");
    }
    AbsoluteTableIdentifier absoluteTableIdentifier = AbsoluteTableIdentifier.fromTablePath(inputPaths[0]);
    // read the schema file to get the absoluteTableIdentifier having the correct table id
    // persisted in the schema
    CarbonTable carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier);
    setCarbonTable(configuration, carbonTable);
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) InvalidPathException(org.apache.hadoop.fs.InvalidPathException)

Example 65 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonFactDataHandlerModel method createCarbonFactDataHandlerModel.

/**
 * Create the model using @{@link CarbonDataLoadConfiguration}
 */
public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel(CarbonDataLoadConfiguration configuration, String[] storeLocation, int bucketId, int taskExtension) {
    CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
    boolean[] isUseInvertedIndex = CarbonDataProcessorUtil.getIsUseInvertedIndex(configuration.getDataFields());
    int[] dimLensWithComplex = configuration.getCardinalityFinder().getCardinality();
    if (!configuration.isSortTable()) {
        for (int i = 0; i < dimLensWithComplex.length; i++) {
            if (dimLensWithComplex[i] != 0) {
                dimLensWithComplex[i] = Integer.MAX_VALUE;
            }
        }
    }
    CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(identifier.getDatabaseName(), identifier.getTableName());
    List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(identifier.getTableName()), carbonTable.getMeasureByTableName(identifier.getTableName()));
    int[] colCardinality = CarbonUtil.getFormattedCardinality(dimLensWithComplex, wrapperColumnSchema);
    SegmentProperties segmentProperties = new SegmentProperties(wrapperColumnSchema, colCardinality);
    int[] dimLens = configuration.calcDimensionLengths();
    int dimensionCount = configuration.getDimensionCount();
    int noDictionaryCount = configuration.getNoDictionaryCount();
    int complexDimensionCount = configuration.getComplexColumnCount();
    int measureCount = configuration.getMeasureCount();
    int simpleDimsCount = dimensionCount - noDictionaryCount - complexDimensionCount;
    int[] simpleDimsLen = new int[simpleDimsCount];
    for (int i = 0; i < simpleDimsCount; i++) {
        simpleDimsLen[i] = dimLens[i];
    }
    // To Set MDKey Index of each primitive type in complex type
    int surrIndex = simpleDimsCount;
    Iterator<Map.Entry<String, GenericDataType>> complexMap = CarbonDataProcessorUtil.getComplexTypesMap(configuration.getDataFields()).entrySet().iterator();
    Map<Integer, GenericDataType> complexIndexMap = new HashMap<>(complexDimensionCount);
    while (complexMap.hasNext()) {
        Map.Entry<String, GenericDataType> complexDataType = complexMap.next();
        complexDataType.getValue().setOutputArrayIndex(0);
        complexIndexMap.put(simpleDimsCount, complexDataType.getValue());
        simpleDimsCount++;
        List<GenericDataType> primitiveTypes = new ArrayList<GenericDataType>();
        complexDataType.getValue().getAllPrimitiveChildren(primitiveTypes);
        for (GenericDataType eachPrimitive : primitiveTypes) {
            eachPrimitive.setSurrogateIndex(surrIndex++);
        }
    }
    CarbonDataFileAttributes carbonDataFileAttributes = new CarbonDataFileAttributes(Long.parseLong(configuration.getTaskNo()), (Long) configuration.getDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP));
    String carbonDataDirectoryPath = getCarbonDataFolderLocation(configuration);
    CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
    carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(configuration.getSchemaUpdatedTimeStamp());
    carbonFactDataHandlerModel.setDatabaseName(identifier.getDatabaseName());
    carbonFactDataHandlerModel.setTableName(identifier.getTableName());
    carbonFactDataHandlerModel.setMeasureCount(measureCount);
    carbonFactDataHandlerModel.setStoreLocation(storeLocation);
    carbonFactDataHandlerModel.setDimLens(dimLens);
    carbonFactDataHandlerModel.setNoDictionaryCount(noDictionaryCount);
    carbonFactDataHandlerModel.setDimensionCount(configuration.getDimensionCount() - noDictionaryCount);
    carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
    carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
    carbonFactDataHandlerModel.setColCardinality(colCardinality);
    carbonFactDataHandlerModel.setMeasureDataType(configuration.getMeasureDataType());
    carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
    carbonFactDataHandlerModel.setPrimitiveDimLens(simpleDimsLen);
    carbonFactDataHandlerModel.setCarbonDataFileAttributes(carbonDataFileAttributes);
    carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
    carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndex);
    carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
    carbonFactDataHandlerModel.setComplexDimensionKeyGenerator(configuration.createKeyGeneratorForComplexDimension());
    carbonFactDataHandlerModel.bucketId = bucketId;
    carbonFactDataHandlerModel.segmentId = configuration.getSegmentId();
    carbonFactDataHandlerModel.taskExtension = taskExtension;
    carbonFactDataHandlerModel.tableSpec = configuration.getTableSpec();
    carbonFactDataHandlerModel.sortScope = CarbonDataProcessorUtil.getSortScope(configuration);
    DataMapWriterListener listener = new DataMapWriterListener();
    listener.registerAllWriter(configuration.getTableSpec().getCarbonTable(), configuration.getSegmentId(), storeLocation[new Random().nextInt(storeLocation.length)]);
    carbonFactDataHandlerModel.dataMapWriterlistener = listener;
    carbonFactDataHandlerModel.writingCoresCount = configuration.getWritingCoresCount();
    return carbonFactDataHandlerModel;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) DataMapWriterListener(org.apache.carbondata.processing.datamap.DataMapWriterListener) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) Random(java.util.Random) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)101 ArrayList (java.util.ArrayList)36 IOException (java.io.IOException)31 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)19 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)18 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)16 Configuration (org.apache.hadoop.conf.Configuration)15 TableInfo (org.apache.carbondata.core.metadata.schema.table.TableInfo)14 Map (java.util.Map)13 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)13 List (java.util.List)12 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)12 HashMap (java.util.HashMap)11 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)11 File (java.io.File)9 Expression (org.apache.carbondata.core.scan.expression.Expression)9 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)8 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)8 InputSplit (org.apache.hadoop.mapreduce.InputSplit)8 Test (org.junit.Test)8