use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class DataLoadProcessBuilder method createConfiguration.
public static CarbonDataLoadConfiguration createConfiguration(CarbonLoadModel loadModel) {
CarbonDataLoadConfiguration configuration = new CarbonDataLoadConfiguration();
CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
configuration.setTableIdentifier(identifier);
configuration.setCarbonTransactionalTable(loadModel.isCarbonTransactionalTable());
configuration.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
configuration.setHeader(loadModel.getCsvHeaderColumns());
configuration.setSegmentId(loadModel.getSegmentId());
configuration.setNonSchemaColumnsPresent(loadModel.isNonSchemaColumnsPresent());
List<LoadMetadataDetails> loadMetadataDetails = loadModel.getLoadMetadataDetails();
if (loadMetadataDetails != null) {
for (LoadMetadataDetails detail : loadMetadataDetails) {
if (detail.getLoadName().equals(loadModel.getSegmentId()) && StringUtils.isNotEmpty(detail.getPath())) {
configuration.setSegmentPath(detail.getPath());
}
}
}
configuration.setSkipParsers(loadModel.isSkipParsers());
configuration.setTaskNo(loadModel.getTaskNo());
configuration.setMetrics(loadModel.getMetrics());
String[] complexDelimiters = new String[loadModel.getComplexDelimiters().size()];
loadModel.getComplexDelimiters().toArray(complexDelimiters);
configuration.setDataLoadProperty(DataLoadProcessorConstants.COMPLEX_DELIMITERS, complexDelimiters);
configuration.setDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT, loadModel.getSerializationNullFormat().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP, loadModel.getFactTimeStamp());
configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE, loadModel.getBadRecordsLoggerEnable().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION, loadModel.getBadRecordsAction().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD, loadModel.getIsEmptyDataBadRecord().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.SKIP_EMPTY_LINE, loadModel.getSkipEmptyLine());
configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_FILE_PATH, loadModel.getFactFilePath());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, loadModel.getSortScope());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_GLOBAL_SORT_PARTITIONS, loadModel.getGlobalSortPartitions());
configuration.setDataLoadProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORD_PATH, loadModel.getBadRecordsLocation());
configuration.setDataLoadProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BINARY_DECODER, loadModel.getBinaryDecoder());
if (loadModel.isLoadWithoutConverterWithoutReArrangeStep()) {
configuration.setDataLoadProperty(DataLoadProcessorConstants.NO_REARRANGE_OF_ROWS, loadModel.isLoadWithoutConverterWithoutReArrangeStep());
}
List<CarbonDimension> dimensions = carbonTable.getVisibleDimensions();
List<CarbonMeasure> measures = carbonTable.getVisibleMeasures();
List<DataField> dataFields = new ArrayList<>();
List<DataField> complexDataFields = new ArrayList<>();
List<DataField> partitionColumns = new ArrayList<>();
configuration.setNumberOfSortColumns(carbonTable.getNumberOfSortColumns());
if (loadModel.isLoadWithoutConverterWithoutReArrangeStep()) {
// To avoid, reArranging of the data for each row, re arrange the schema itself.
getReArrangedDataFields(loadModel, carbonTable, dimensions, measures, complexDataFields, partitionColumns, dataFields);
} else {
getDataFields(loadModel, dimensions, measures, complexDataFields, dataFields);
if (!(!configuration.isSortTable() || SortScopeOptions.getSortScope(loadModel.getSortScope()).equals(SortScopeOptions.SortScope.NO_SORT))) {
dataFields = updateDataFieldsBasedOnSortColumns(dataFields);
}
}
configuration.setDataFields(dataFields.toArray(new DataField[0]));
configuration.setBucketingInfo(carbonTable.getBucketingInfo());
configuration.setBucketHashMethod(carbonTable.getBucketHashMethod());
configuration.setPreFetch(loadModel.isPreFetch());
configuration.setNumberOfNoDictSortColumns(carbonTable.getNumberOfNoDictSortColumns());
configuration.setDataWritePath(loadModel.getDataWritePath());
setSortColumnInfo(carbonTable, loadModel, configuration);
// threads per partition
if (carbonTable.isHivePartitionTable()) {
configuration.setWritingCoresCount((short) 1);
}
TableSpec tableSpec = new TableSpec(carbonTable, false);
configuration.setTableSpec(tableSpec);
if (loadModel.getSdkWriterCores() > 0) {
configuration.setWritingCoresCount(loadModel.getSdkWriterCores());
}
configuration.setNumberOfLoadingCores(CarbonProperties.getInstance().getNumberOfLoadingCores());
configuration.setColumnCompressor(loadModel.getColumnCompressor());
return configuration;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class AbstractFactDataWriter method writeIndexFile.
/**
* Below method will be used to write the idex file
*
* @throws IOException throws io exception if any problem while writing
* @throws CarbonDataWriterException data writing
*/
protected void writeIndexFile() throws IOException, CarbonDataWriterException {
if (blockIndexInfoList.size() == 0) {
// no need to write index file, if data file is not there.
return;
}
// get the header
IndexHeader indexHeader = CarbonMetadataUtil.getIndexHeader(thriftColumnSchemaList, model.getBucketId(), model.getSchemaUpdatedTimeStamp());
indexHeader.setIs_sort(model.getSortScope() != null && model.getSortScope() != NO_SORT);
// get the block index info thrift
List<BlockIndex> blockIndexThrift = CarbonMetadataUtil.getBlockIndexInfo(blockIndexInfoList);
// get all block minmax and add to segmentMinMaxMap
CarbonTable carbonTable = model.getTableSpec().getCarbonTable();
if (null != model.getSegmentId() && !carbonTable.isHivePartitionTable() && !carbonTable.isIndexTable()) {
for (BlockIndexInfo blockIndex : blockIndexInfoList) {
byte[][] min = blockIndex.getBlockletIndex().getMinMaxIndex().getMinValues();
byte[][] max = blockIndex.getBlockletIndex().getMinMaxIndex().getMaxValues();
BlockColumnMetaDataInfo blockColumnMetaDataInfo = new BlockColumnMetaDataInfo(thriftColumnSchemaList, min, max);
SegmentMetaDataInfoStats.getInstance().setBlockMetaDataInfo(model.getTableName(), model.getSegmentId(), blockColumnMetaDataInfo, this.model.getWrapperColumnSchema());
}
}
String indexFileName;
if (enableDirectlyWriteDataToStorePath) {
String rawFileName = model.getCarbonDataDirectoryPath() + CarbonCommonConstants.FILE_SEPARATOR + CarbonTablePath.getCarbonIndexFileName(model.getCarbonDataFileAttributes().getTaskId(), model.getBucketId(), model.getTaskExtension(), "" + model.getCarbonDataFileAttributes().getFactTimeStamp(), model.getSegmentId());
indexFileName = FileFactory.getUpdatedFilePath(rawFileName);
} else {
// randomly choose a temp location for index file
String[] tempLocations = model.getStoreLocation();
String chosenTempLocation = tempLocations[new Random().nextInt(tempLocations.length)];
LOGGER.info("Randomly choose index file location: " + chosenTempLocation);
indexFileName = chosenTempLocation + File.separator + CarbonTablePath.getCarbonIndexFileName(model.getCarbonDataFileAttributes().getTaskId(), model.getBucketId(), model.getTaskExtension(), "" + model.getCarbonDataFileAttributes().getFactTimeStamp(), model.getSegmentId());
}
CarbonIndexFileWriter writer = new CarbonIndexFileWriter();
// open file
writer.openThriftWriter(indexFileName);
// write the header first
writer.writeThrift(indexHeader);
// write the indexes
for (BlockIndex blockIndex : blockIndexThrift) {
writer.writeThrift(blockIndex);
}
writer.close();
if (!enableDirectlyWriteDataToStorePath) {
CarbonUtil.copyCarbonDataFileToCarbonStorePath(indexFileName, model.getCarbonDataDirectoryPath(), fileSizeInBytes, metrics);
FileFactory.deleteFile(indexFileName);
} else if (model.getTableSpec().getCarbonTable().isHivePartitionTable() && model.getCarbonDataDirectoryPath().endsWith(".tmp")) {
if (metrics != null) {
addOutputFilesInfoToMetrics(indexFileName);
}
}
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class SortParameters method changeDataFieldForSortAndPartition.
private static DataField[] changeDataFieldForSortAndPartition(CarbonDataLoadConfiguration configuration) {
DataField[] dataFields = configuration.getDataFields();
CarbonTable carbonTable = configuration.getTableSpec().getCarbonTable();
String[] sortColumns = carbonTable.getTableInfo().getFactTable().getTableProperties().getOrDefault("sort_columns", "").split(",");
DataField[] changedDataField = new DataField[dataFields.length];
int i = 0;
for (String col : sortColumns) {
for (DataField dataField : dataFields) {
if (dataField.getColumn().getColName().equalsIgnoreCase(col)) {
changedDataField[i++] = dataField;
}
}
}
for (DataField dataField : dataFields) {
if (!dataField.getColumn().getColumnSchema().isSortColumn()) {
changedDataField[i++] = dataField;
}
}
return changedDataField;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonFactDataHandlerModel method createCarbonFactDataHandlerModel.
/**
* Create the model using @{@link CarbonDataLoadConfiguration}
*/
public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel(CarbonDataLoadConfiguration configuration, String[] storeLocation, int bucketId, int taskExtension, IndexWriterListener listener) {
CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
CarbonTable carbonTable = configuration.getTableSpec().getCarbonTable();
List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getVisibleDimensions(), carbonTable.getVisibleMeasures());
SegmentProperties segmentProperties = new SegmentProperties(wrapperColumnSchema);
int complexDimensionCount = segmentProperties.getNumberOfComplexDimensions();
int simpleDimsCount = segmentProperties.getNumberOfPrimitiveDimensions();
int surrIndex = simpleDimsCount;
Iterator<Map.Entry<String, GenericDataType>> complexMap = CarbonDataProcessorUtil.getComplexTypesMap(configuration.getDataFields(), configuration.getDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT).toString()).entrySet().iterator();
Map<Integer, GenericDataType> complexIndexMap = new HashMap<>(complexDimensionCount);
while (complexMap.hasNext()) {
Map.Entry<String, GenericDataType> complexDataType = complexMap.next();
complexDataType.getValue().setOutputArrayIndex(0);
complexIndexMap.put(simpleDimsCount, complexDataType.getValue());
simpleDimsCount++;
List<GenericDataType> primitiveTypes = new ArrayList<GenericDataType>();
complexDataType.getValue().getAllPrimitiveChildren(primitiveTypes);
for (GenericDataType eachPrimitive : primitiveTypes) {
if (eachPrimitive.getIsColumnDictionary()) {
eachPrimitive.setSurrogateIndex(surrIndex++);
}
}
}
List<DataType> noDictDataTypesList = new ArrayList<>();
for (DataField dataField : configuration.getDataFields()) {
if (!dataField.isDateDataType() && dataField.getColumn().isDimension()) {
noDictDataTypesList.add(dataField.getColumn().getDataType());
}
}
CarbonDataFileAttributes carbonDataFileAttributes = new CarbonDataFileAttributes(configuration.getTaskNo(), (Long) configuration.getDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP));
String carbonDataDirectoryPath = getCarbonDataFolderLocation(configuration);
CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(configuration.getSchemaUpdatedTimeStamp());
carbonFactDataHandlerModel.setDatabaseName(identifier.getDatabaseName());
carbonFactDataHandlerModel.setTableName(identifier.getTableName());
carbonFactDataHandlerModel.setStoreLocation(storeLocation);
carbonFactDataHandlerModel.setNoDictDataTypesList(noDictDataTypesList);
carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
carbonFactDataHandlerModel.setMeasureDataType(configuration.getMeasureDataType());
carbonFactDataHandlerModel.setNoDictAndComplexColumns(configuration.getNoDictAndComplexDimensions());
carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
carbonFactDataHandlerModel.setCarbonDataFileAttributes(carbonDataFileAttributes);
carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
carbonFactDataHandlerModel.bucketId = bucketId;
carbonFactDataHandlerModel.segmentId = configuration.getSegmentId();
carbonFactDataHandlerModel.taskExtension = taskExtension;
carbonFactDataHandlerModel.tableSpec = configuration.getTableSpec();
carbonFactDataHandlerModel.sortScope = CarbonDataProcessorUtil.getSortScope(configuration);
carbonFactDataHandlerModel.columnCompressor = configuration.getColumnCompressor();
if (listener == null) {
listener = new IndexWriterListener();
listener.registerAllWriter(configuration.getTableSpec().getCarbonTable(), configuration.getSegmentId(), CarbonTablePath.getShardName(carbonDataFileAttributes.getTaskId(), bucketId, taskExtension, String.valueOf(carbonDataFileAttributes.getFactTimeStamp()), configuration.getSegmentId()), segmentProperties);
}
carbonFactDataHandlerModel.indexWriterlistener = listener;
carbonFactDataHandlerModel.writingCoresCount = configuration.getWritingCoresCount();
carbonFactDataHandlerModel.initNumberOfCores();
carbonFactDataHandlerModel.setMetrics(configuration.getMetrics());
return carbonFactDataHandlerModel;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonDataMergerUtil method updateLoadMetadataWithMergeStatus.
/**
* method to update table status in case of IUD Update Delta Compaction.
* @param loadsToMerge
* @param metaDataFilepath
* @param mergedLoadNumber
* @param carbonLoadModel
* @param compactionType
* @return
*/
public static boolean updateLoadMetadataWithMergeStatus(List<LoadMetadataDetails> loadsToMerge, String metaDataFilepath, String mergedLoadNumber, CarbonLoadModel carbonLoadModel, CompactionType compactionType, String segmentFile, MVManager viewManager) throws IOException, NoSuchMVException {
boolean tableStatusUpdationStatus = false;
AbsoluteTableIdentifier identifier = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getAbsoluteTableIdentifier();
SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
ICarbonLock carbonLock = segmentStatusManager.getTableStatusLock();
try {
int retryCount = CarbonLockUtil.getLockProperty(CarbonCommonConstants.NUMBER_OF_TRIES_FOR_CONCURRENT_LOCK, CarbonCommonConstants.NUMBER_OF_TRIES_FOR_CONCURRENT_LOCK_DEFAULT);
int maxTimeout = CarbonLockUtil.getLockProperty(CarbonCommonConstants.MAX_TIMEOUT_FOR_CONCURRENT_LOCK, CarbonCommonConstants.MAX_TIMEOUT_FOR_CONCURRENT_LOCK_DEFAULT);
if (carbonLock.lockWithRetries(retryCount, maxTimeout)) {
LOGGER.info("Acquired lock for the table " + carbonLoadModel.getDatabaseName() + "." + carbonLoadModel.getTableName() + " for table status updation ");
String statusFilePath = CarbonTablePath.getTableStatusFilePath(identifier.getTablePath());
LoadMetadataDetails[] loadDetails = SegmentStatusManager.readLoadMetadata(metaDataFilepath);
long modificationOrDeletionTimeStamp = CarbonUpdateUtil.readCurrentTime();
for (LoadMetadataDetails loadDetail : loadDetails) {
// check if this segment is merged.
if (loadsToMerge.contains(loadDetail)) {
// then need to discard the compaction process and treat it as failed compaction.
if (loadDetail.getSegmentStatus() == SegmentStatus.MARKED_FOR_DELETE) {
LOGGER.error("Compaction is aborted as the segment " + loadDetail.getLoadName() + " is deleted after the compaction is started.");
return false;
}
loadDetail.setSegmentStatus(SegmentStatus.COMPACTED);
loadDetail.setModificationOrDeletionTimestamp(modificationOrDeletionTimeStamp);
loadDetail.setMergedLoadName(mergedLoadNumber);
}
}
// create entry for merged one.
LoadMetadataDetails loadMetadataDetails = new LoadMetadataDetails();
loadMetadataDetails.setSegmentStatus(SegmentStatus.SUCCESS);
long loadEnddate = CarbonUpdateUtil.readCurrentTime();
loadMetadataDetails.setLoadEndTime(loadEnddate);
CarbonTable carbonTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable();
loadMetadataDetails.setLoadName(mergedLoadNumber);
loadMetadataDetails.setSegmentFile(segmentFile);
CarbonLoaderUtil.addDataIndexSizeIntoMetaEntry(loadMetadataDetails, mergedLoadNumber, carbonTable);
loadMetadataDetails.setLoadStartTime(carbonLoadModel.getFactTimeStamp());
// if this is a major compaction then set the segment as major compaction.
if (CompactionType.MAJOR == compactionType) {
loadMetadataDetails.setMajorCompacted("true");
}
if (carbonTable.isMV()) {
// If table is mv table, then get segment mapping and set to extraInfo
MVSchema viewSchema = viewManager.getSchema(carbonTable.getDatabaseName(), carbonTable.getTableName());
if (null != viewSchema) {
String segmentMap = MVManager.getUpdatedSegmentMap(mergedLoadNumber, viewSchema, loadDetails);
loadMetadataDetails.setExtraInfo(segmentMap);
} else {
throw new NoSuchMVException(carbonTable.getDatabaseName(), carbonTable.getTableName());
}
}
List<LoadMetadataDetails> updatedDetailsList = new ArrayList<>(Arrays.asList(loadDetails));
// put the merged folder entry
updatedDetailsList.add(loadMetadataDetails);
try {
SegmentStatusManager.writeLoadDetailsIntoFile(statusFilePath, updatedDetailsList.toArray(new LoadMetadataDetails[updatedDetailsList.size()]));
tableStatusUpdationStatus = true;
} catch (IOException e) {
LOGGER.error("Error while writing metadata");
tableStatusUpdationStatus = false;
}
} else {
LOGGER.error("Could not able to obtain lock for table" + carbonLoadModel.getDatabaseName() + "." + carbonLoadModel.getTableName() + "for table status updation");
}
} finally {
if (carbonLock.unlock()) {
LOGGER.info("Table unlocked successfully after table status updation" + carbonLoadModel.getDatabaseName() + "." + carbonLoadModel.getTableName());
} else {
LOGGER.error("Unable to unlock Table lock for table" + carbonLoadModel.getDatabaseName() + "." + carbonLoadModel.getTableName() + " during table status updation");
}
}
return tableStatusUpdationStatus;
}
Aggregations