use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonTableReader method parseCarbonMetadata.
/**
* Read the metadata of the given table
* and cache it in this.carbonCache (CarbonTableReader cache).
*
* @param table name of the given table.
* @return the CarbonTableCacheModel instance which contains all the needed metadata for a table.
*/
private CarbonTableCacheModel parseCarbonMetadata(SchemaTableName table, String tablePath, Configuration config) {
try {
CarbonTableCacheModel cache = getValidCacheBySchemaTableName(table);
if (cache != null) {
return cache;
}
// multiple tasks can be launched in a worker concurrently. Hence need to synchronize this.
synchronized (this) {
// cache might be filled by another thread, so if filled use that cache.
CarbonTableCacheModel cacheModel = getValidCacheBySchemaTableName(table);
if (cacheModel != null) {
return cacheModel;
}
// Step 1: get store path of the table and cache it.
String schemaFilePath = CarbonTablePath.getSchemaFilePath(tablePath, config);
// If metadata folder exists, it is a transactional table
CarbonFile schemaFile = FileFactory.getCarbonFile(schemaFilePath, config);
boolean isTransactionalTable = schemaFile.exists();
org.apache.carbondata.format.TableInfo tableInfo;
long modifiedTime = System.currentTimeMillis();
if (isTransactionalTable) {
// Step 2: read the metadata (tableInfo) of the table.
ThriftReader.TBaseCreator createTBase = new ThriftReader.TBaseCreator() {
// TBase is used to read and write thrift objects.
// TableInfo is a kind of TBase used to read and write table information.
// TableInfo is generated by thrift,
// see schema.thrift under format/src/main/thrift for details.
public TBase create() {
return new org.apache.carbondata.format.TableInfo();
}
};
ThriftReader thriftReader = new ThriftReader(schemaFilePath, createTBase, config);
thriftReader.open();
tableInfo = (org.apache.carbondata.format.TableInfo) thriftReader.read();
thriftReader.close();
modifiedTime = schemaFile.getLastModifiedTime();
} else {
tableInfo = CarbonUtil.inferSchema(tablePath, table.getTableName(), false, config);
}
// Step 3: convert format level TableInfo to code level TableInfo
SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl();
// wrapperTableInfo is the code level information of a table in carbondata core,
// different from the Thrift TableInfo.
TableInfo wrapperTableInfo = schemaConverter.fromExternalToWrapperTableInfo(tableInfo, table.getSchemaName(), table.getTableName(), tablePath);
wrapperTableInfo.setTransactionalTable(isTransactionalTable);
CarbonMetadata.getInstance().removeTable(wrapperTableInfo.getTableUniqueName());
// Step 4: Load metadata info into CarbonMetadata
CarbonMetadata.getInstance().loadTableMetadata(wrapperTableInfo);
CarbonTable carbonTable = Objects.requireNonNull(CarbonMetadata.getInstance().getCarbonTable(table.getSchemaName(), table.getTableName()), "carbontable is null");
refreshIndexInfo(carbonTable, config);
cache = new CarbonTableCacheModel(modifiedTime, carbonTable);
// cache the table
carbonCache.get().put(table, cache);
cache.setCarbonTable(carbonTable);
}
return cache;
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonInternalLoaderUtil method recordLoadMetadata.
/**
* This API will write the load level metadata for the load management module inorder to
* manage the load and query execution management smoothly.
*
* @return boolean which determines whether status update is done or not.
*/
public static boolean recordLoadMetadata(List<LoadMetadataDetails> newLoadMetadataDetails, List<String> validSegments, CarbonTable carbonTable, List<CarbonTable> indexCarbonTables, String databaseName, String tableName) {
boolean status = false;
String metaDataFilepath = carbonTable.getMetadataPath();
AbsoluteTableIdentifier absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier();
SegmentStatusManager segmentStatusManager = new SegmentStatusManager(absoluteTableIdentifier);
ICarbonLock carbonLock = segmentStatusManager.getTableStatusLock();
try {
int retryCount = CarbonLockUtil.getLockProperty(CarbonCommonConstants.NUMBER_OF_TRIES_FOR_CONCURRENT_LOCK, CarbonCommonConstants.NUMBER_OF_TRIES_FOR_CONCURRENT_LOCK_DEFAULT);
int maxTimeout = CarbonLockUtil.getLockProperty(CarbonCommonConstants.MAX_TIMEOUT_FOR_CONCURRENT_LOCK, CarbonCommonConstants.MAX_TIMEOUT_FOR_CONCURRENT_LOCK_DEFAULT);
if (carbonLock.lockWithRetries(retryCount, maxTimeout)) {
LOGGER.info("Acquired lock for table" + databaseName + "." + tableName + " for table status update");
if (isSegmentsAlreadyCompactedForNewMetaDataDetails(indexCarbonTables, tableName, newLoadMetadataDetails)) {
return false;
}
LoadMetadataDetails[] currentLoadMetadataDetails = SegmentStatusManager.readLoadMetadata(metaDataFilepath);
List<LoadMetadataDetails> updatedLoadMetadataDetails = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
// check which load needs to be overwritten which are in in progress state
boolean found = false;
for (int i = 0; i < currentLoadMetadataDetails.length; i++) {
for (LoadMetadataDetails newLoadMetadataDetail : newLoadMetadataDetails) {
if (currentLoadMetadataDetails[i].getLoadName().equals(newLoadMetadataDetail.getLoadName())) {
currentLoadMetadataDetails[i] = newLoadMetadataDetail;
found = true;
break;
}
}
updatedLoadMetadataDetails.add(currentLoadMetadataDetails[i]);
}
// check if newLoadMetadataDetail has segments which are not in currentLoadMetaDetails
// and add them to the updatedLoadMetadataDetails
boolean foundNext = false;
for (int i = 0; i < newLoadMetadataDetails.size(); i++) {
foundNext = false;
for (int j = 0; j < currentLoadMetadataDetails.length; j++) {
if (newLoadMetadataDetails.get(i).getLoadName().equals(currentLoadMetadataDetails[j].getLoadName())) {
foundNext = true;
break;
}
if (j == currentLoadMetadataDetails.length - 1 && !foundNext) {
// if not found in the list then add it
updatedLoadMetadataDetails.add(newLoadMetadataDetails.get(i));
found = true;
}
}
}
// when data load is done for first time, add all the details
if (currentLoadMetadataDetails.length == 0 || !found) {
updatedLoadMetadataDetails.addAll(newLoadMetadataDetails);
}
List<String> indexTables = CarbonIndexUtil.getSecondaryIndexes(carbonTable);
if (!indexTables.isEmpty()) {
List<LoadMetadataDetails> newSegmentDetailsListForIndexTable = new ArrayList<>(validSegments.size());
for (String segmentId : validSegments) {
LoadMetadataDetails newSegmentDetailsObject = new LoadMetadataDetails();
newSegmentDetailsObject.setLoadName(segmentId);
newSegmentDetailsListForIndexTable.add(newSegmentDetailsObject);
}
for (CarbonTable indexTable : indexCarbonTables) {
List<LoadMetadataDetails> indexTableDetailsList = CarbonIndexUtil.getTableStatusDetailsForIndexTable(updatedLoadMetadataDetails, indexTable, newSegmentDetailsListForIndexTable);
SegmentStatusManager.writeLoadDetailsIntoFile(CarbonTablePath.getTableStatusFilePath(indexTable.getTablePath()), indexTableDetailsList.toArray(new LoadMetadataDetails[0]));
}
} else if (carbonTable.isIndexTable()) {
SegmentStatusManager.writeLoadDetailsIntoFile(metaDataFilepath + CarbonCommonConstants.FILE_SEPARATOR + CarbonTablePath.TABLE_STATUS_FILE, updatedLoadMetadataDetails.toArray(new LoadMetadataDetails[0]));
}
status = true;
} else {
LOGGER.error("Not able to acquire the lock for Table status update for table " + databaseName + "." + tableName);
throw new RuntimeException("Not able to acquire the lock for Table status updation for table " + databaseName + "." + tableName);
}
} catch (IOException e) {
LOGGER.error("Not able to acquire the lock for Table status update for table " + databaseName + "." + tableName);
} finally {
if (carbonLock.unlock()) {
LOGGER.info("Table unlocked successfully after table status update" + databaseName + "." + tableName);
} else {
LOGGER.error("Unable to unlock Table lock for table" + databaseName + "." + tableName + " during table status update");
}
}
return status;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbondataPageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle table, List<ColumnHandle> columns, TupleDomain<ColumnHandle> dynamicFilter) {
HiveSplit carbonSplit = checkType(split, HiveSplit.class, "split is not class HiveSplit");
this.queryId = carbonSplit.getSchema().getProperty("queryId");
if (this.queryId == null) {
// Fall back to hive pagesource.
return super.createPageSource(transaction, session, split, table, columns, dynamicFilter);
}
// TODO: check and use dynamicFilter in CarbondataPageSource
Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, carbonSplit.getDatabase(), carbonSplit.getTable()), new Path(carbonSplit.getSchema().getProperty("tablePath")));
configuration = carbonTableReader.updateS3Properties(configuration);
for (Map.Entry<Object, Object> entry : carbonSplit.getSchema().entrySet()) {
configuration.set(entry.getKey().toString(), entry.getValue().toString());
}
CarbonTable carbonTable = getCarbonTable(carbonSplit, configuration);
boolean isDirectVectorFill = carbonTableReader.config.getPushRowFilter() == null || carbonTableReader.config.getPushRowFilter().equalsIgnoreCase("false");
return new CarbondataPageSource(carbonTable, queryId, carbonSplit, columns, table, configuration, isDirectVectorFill);
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class StoreCreator method createCarbonStore.
/**
* Create store without any restructure
*/
public static void createCarbonStore() {
try {
String factFilePath = new File("../hadoop/src/test/resources/data.csv").getCanonicalPath();
File storeDir = new File(absoluteTableIdentifier.getStorePath());
CarbonUtil.deleteFoldersAndFiles(storeDir);
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, absoluteTableIdentifier.getStorePath());
CarbonTable table = createTable();
writeDictionary(factFilePath, table);
CarbonDataLoadSchema schema = new CarbonDataLoadSchema(table);
CarbonLoadModel loadModel = new CarbonLoadModel();
String partitionId = "0";
loadModel.setCarbonDataLoadSchema(schema);
loadModel.setDatabaseName(absoluteTableIdentifier.getCarbonTableIdentifier().getDatabaseName());
loadModel.setTableName(absoluteTableIdentifier.getCarbonTableIdentifier().getTableName());
loadModel.setTableName(absoluteTableIdentifier.getCarbonTableIdentifier().getTableName());
loadModel.setFactFilePath(factFilePath);
loadModel.setLoadMetadataDetails(new ArrayList<LoadMetadataDetails>());
loadModel.setStorePath(absoluteTableIdentifier.getStorePath());
loadModel.setDateFormat(null);
loadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT));
loadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT));
loadModel.setSerializationNullFormat(TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName() + "," + "\\N");
loadModel.setBadRecordsLoggerEnable(TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName() + "," + "false");
loadModel.setBadRecordsAction(TableOptionConstant.BAD_RECORDS_ACTION.getName() + "," + "FORCE");
loadModel.setIsEmptyDataBadRecord(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD + "," + "false");
loadModel.setCsvHeader("ID,date,country,name,phonetype,serialname,salary");
loadModel.setCsvHeaderColumns(loadModel.getCsvHeader().split(","));
loadModel.setTaskNo("0");
loadModel.setSegmentId("0");
loadModel.setPartitionId("0");
loadModel.setFactTimeStamp(System.currentTimeMillis());
loadModel.setMaxColumns("10");
executeGraph(loadModel, absoluteTableIdentifier.getStorePath());
} catch (Exception e) {
e.printStackTrace();
}
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class MapredCarbonInputFormat method getQueryModel.
public QueryModel getQueryModel(Configuration configuration) throws IOException {
CarbonTable carbonTable = getCarbonTable(configuration);
// getting the table absoluteTableIdentifier from the carbonTable
// to avoid unnecessary deserialization
AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
// query plan includes projection column
String projection = getColumnProjection(configuration);
if (projection == null) {
projection = configuration.get("hive.io.file.readcolumn.names");
}
CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection);
QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable);
// set the filter to the query model in order to filter blocklet before scan
Expression filter = getFilterPredicates(configuration);
CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
FilterResolverIntf filterIntf = CarbonInputFormatUtil.resolveFilter(filter, identifier);
queryModel.setFilterExpressionResolverTree(filterIntf);
return queryModel;
}
Aggregations