Search in sources :

Example 56 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonTableReader method parseCarbonMetadata.

/**
 * Read the metadata of the given table
 * and cache it in this.carbonCache (CarbonTableReader cache).
 *
 * @param table name of the given table.
 * @return the CarbonTableCacheModel instance which contains all the needed metadata for a table.
 */
private CarbonTableCacheModel parseCarbonMetadata(SchemaTableName table, String tablePath, Configuration config) {
    try {
        CarbonTableCacheModel cache = getValidCacheBySchemaTableName(table);
        if (cache != null) {
            return cache;
        }
        // multiple tasks can be launched in a worker concurrently. Hence need to synchronize this.
        synchronized (this) {
            // cache might be filled by another thread, so if filled use that cache.
            CarbonTableCacheModel cacheModel = getValidCacheBySchemaTableName(table);
            if (cacheModel != null) {
                return cacheModel;
            }
            // Step 1: get store path of the table and cache it.
            String schemaFilePath = CarbonTablePath.getSchemaFilePath(tablePath, config);
            // If metadata folder exists, it is a transactional table
            CarbonFile schemaFile = FileFactory.getCarbonFile(schemaFilePath, config);
            boolean isTransactionalTable = schemaFile.exists();
            org.apache.carbondata.format.TableInfo tableInfo;
            long modifiedTime = System.currentTimeMillis();
            if (isTransactionalTable) {
                // Step 2: read the metadata (tableInfo) of the table.
                ThriftReader.TBaseCreator createTBase = new ThriftReader.TBaseCreator() {

                    // TBase is used to read and write thrift objects.
                    // TableInfo is a kind of TBase used to read and write table information.
                    // TableInfo is generated by thrift,
                    // see schema.thrift under format/src/main/thrift for details.
                    public TBase create() {
                        return new org.apache.carbondata.format.TableInfo();
                    }
                };
                ThriftReader thriftReader = new ThriftReader(schemaFilePath, createTBase, config);
                thriftReader.open();
                tableInfo = (org.apache.carbondata.format.TableInfo) thriftReader.read();
                thriftReader.close();
                modifiedTime = schemaFile.getLastModifiedTime();
            } else {
                tableInfo = CarbonUtil.inferSchema(tablePath, table.getTableName(), false, config);
            }
            // Step 3: convert format level TableInfo to code level TableInfo
            SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl();
            // wrapperTableInfo is the code level information of a table in carbondata core,
            // different from the Thrift TableInfo.
            TableInfo wrapperTableInfo = schemaConverter.fromExternalToWrapperTableInfo(tableInfo, table.getSchemaName(), table.getTableName(), tablePath);
            wrapperTableInfo.setTransactionalTable(isTransactionalTable);
            CarbonMetadata.getInstance().removeTable(wrapperTableInfo.getTableUniqueName());
            // Step 4: Load metadata info into CarbonMetadata
            CarbonMetadata.getInstance().loadTableMetadata(wrapperTableInfo);
            CarbonTable carbonTable = Objects.requireNonNull(CarbonMetadata.getInstance().getCarbonTable(table.getSchemaName(), table.getTableName()), "carbontable is null");
            refreshIndexInfo(carbonTable, config);
            cache = new CarbonTableCacheModel(modifiedTime, carbonTable);
            // cache the table
            carbonCache.get().put(table, cache);
            cache.setCarbonTable(carbonTable);
        }
        return cache;
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
}
Also used : CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) IOException(java.io.IOException) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ThriftReader(org.apache.carbondata.core.reader.ThriftReader) SchemaConverter(org.apache.carbondata.core.metadata.converter.SchemaConverter) IndexTableInfo(org.apache.carbondata.core.metadata.schema.indextable.IndexTableInfo) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) ThriftWrapperSchemaConverterImpl(org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl)

Example 57 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonInternalLoaderUtil method recordLoadMetadata.

/**
 * This API will write the load level metadata for the load management module inorder to
 * manage the load and query execution management smoothly.
 *
 * @return boolean which determines whether status update is done or not.
 */
public static boolean recordLoadMetadata(List<LoadMetadataDetails> newLoadMetadataDetails, List<String> validSegments, CarbonTable carbonTable, List<CarbonTable> indexCarbonTables, String databaseName, String tableName) {
    boolean status = false;
    String metaDataFilepath = carbonTable.getMetadataPath();
    AbsoluteTableIdentifier absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier();
    SegmentStatusManager segmentStatusManager = new SegmentStatusManager(absoluteTableIdentifier);
    ICarbonLock carbonLock = segmentStatusManager.getTableStatusLock();
    try {
        int retryCount = CarbonLockUtil.getLockProperty(CarbonCommonConstants.NUMBER_OF_TRIES_FOR_CONCURRENT_LOCK, CarbonCommonConstants.NUMBER_OF_TRIES_FOR_CONCURRENT_LOCK_DEFAULT);
        int maxTimeout = CarbonLockUtil.getLockProperty(CarbonCommonConstants.MAX_TIMEOUT_FOR_CONCURRENT_LOCK, CarbonCommonConstants.MAX_TIMEOUT_FOR_CONCURRENT_LOCK_DEFAULT);
        if (carbonLock.lockWithRetries(retryCount, maxTimeout)) {
            LOGGER.info("Acquired lock for table" + databaseName + "." + tableName + " for table status update");
            if (isSegmentsAlreadyCompactedForNewMetaDataDetails(indexCarbonTables, tableName, newLoadMetadataDetails)) {
                return false;
            }
            LoadMetadataDetails[] currentLoadMetadataDetails = SegmentStatusManager.readLoadMetadata(metaDataFilepath);
            List<LoadMetadataDetails> updatedLoadMetadataDetails = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
            // check which load needs to be overwritten which are in in progress state
            boolean found = false;
            for (int i = 0; i < currentLoadMetadataDetails.length; i++) {
                for (LoadMetadataDetails newLoadMetadataDetail : newLoadMetadataDetails) {
                    if (currentLoadMetadataDetails[i].getLoadName().equals(newLoadMetadataDetail.getLoadName())) {
                        currentLoadMetadataDetails[i] = newLoadMetadataDetail;
                        found = true;
                        break;
                    }
                }
                updatedLoadMetadataDetails.add(currentLoadMetadataDetails[i]);
            }
            // check if newLoadMetadataDetail has segments which are not in  currentLoadMetaDetails
            // and add them to the updatedLoadMetadataDetails
            boolean foundNext = false;
            for (int i = 0; i < newLoadMetadataDetails.size(); i++) {
                foundNext = false;
                for (int j = 0; j < currentLoadMetadataDetails.length; j++) {
                    if (newLoadMetadataDetails.get(i).getLoadName().equals(currentLoadMetadataDetails[j].getLoadName())) {
                        foundNext = true;
                        break;
                    }
                    if (j == currentLoadMetadataDetails.length - 1 && !foundNext) {
                        // if not found in the list then add it
                        updatedLoadMetadataDetails.add(newLoadMetadataDetails.get(i));
                        found = true;
                    }
                }
            }
            // when data load is done for first time, add all the details
            if (currentLoadMetadataDetails.length == 0 || !found) {
                updatedLoadMetadataDetails.addAll(newLoadMetadataDetails);
            }
            List<String> indexTables = CarbonIndexUtil.getSecondaryIndexes(carbonTable);
            if (!indexTables.isEmpty()) {
                List<LoadMetadataDetails> newSegmentDetailsListForIndexTable = new ArrayList<>(validSegments.size());
                for (String segmentId : validSegments) {
                    LoadMetadataDetails newSegmentDetailsObject = new LoadMetadataDetails();
                    newSegmentDetailsObject.setLoadName(segmentId);
                    newSegmentDetailsListForIndexTable.add(newSegmentDetailsObject);
                }
                for (CarbonTable indexTable : indexCarbonTables) {
                    List<LoadMetadataDetails> indexTableDetailsList = CarbonIndexUtil.getTableStatusDetailsForIndexTable(updatedLoadMetadataDetails, indexTable, newSegmentDetailsListForIndexTable);
                    SegmentStatusManager.writeLoadDetailsIntoFile(CarbonTablePath.getTableStatusFilePath(indexTable.getTablePath()), indexTableDetailsList.toArray(new LoadMetadataDetails[0]));
                }
            } else if (carbonTable.isIndexTable()) {
                SegmentStatusManager.writeLoadDetailsIntoFile(metaDataFilepath + CarbonCommonConstants.FILE_SEPARATOR + CarbonTablePath.TABLE_STATUS_FILE, updatedLoadMetadataDetails.toArray(new LoadMetadataDetails[0]));
            }
            status = true;
        } else {
            LOGGER.error("Not able to acquire the lock for Table status update for table " + databaseName + "." + tableName);
            throw new RuntimeException("Not able to acquire the lock for Table status updation for table " + databaseName + "." + tableName);
        }
    } catch (IOException e) {
        LOGGER.error("Not able to acquire the lock for Table status update for table " + databaseName + "." + tableName);
    } finally {
        if (carbonLock.unlock()) {
            LOGGER.info("Table unlocked successfully after table status update" + databaseName + "." + tableName);
        } else {
            LOGGER.error("Unable to unlock Table lock for table" + databaseName + "." + tableName + " during table status update");
        }
    }
    return status;
}
Also used : ICarbonLock(org.apache.carbondata.core.locks.ICarbonLock) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)

Example 58 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbondataPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle table, List<ColumnHandle> columns, TupleDomain<ColumnHandle> dynamicFilter) {
    HiveSplit carbonSplit = checkType(split, HiveSplit.class, "split is not class HiveSplit");
    this.queryId = carbonSplit.getSchema().getProperty("queryId");
    if (this.queryId == null) {
        // Fall back to hive pagesource.
        return super.createPageSource(transaction, session, split, table, columns, dynamicFilter);
    }
    // TODO: check and use dynamicFilter in CarbondataPageSource
    Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, carbonSplit.getDatabase(), carbonSplit.getTable()), new Path(carbonSplit.getSchema().getProperty("tablePath")));
    configuration = carbonTableReader.updateS3Properties(configuration);
    for (Map.Entry<Object, Object> entry : carbonSplit.getSchema().entrySet()) {
        configuration.set(entry.getKey().toString(), entry.getValue().toString());
    }
    CarbonTable carbonTable = getCarbonTable(carbonSplit, configuration);
    boolean isDirectVectorFill = carbonTableReader.config.getPushRowFilter() == null || carbonTableReader.config.getPushRowFilter().equalsIgnoreCase("false");
    return new CarbondataPageSource(carbonTable, queryId, carbonSplit, columns, table, configuration, isDirectVectorFill);
}
Also used : Path(org.apache.hadoop.fs.Path) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) HiveSplit(io.prestosql.plugin.hive.HiveSplit) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment)

Example 59 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class StoreCreator method createCarbonStore.

/**
   * Create store without any restructure
   */
public static void createCarbonStore() {
    try {
        String factFilePath = new File("../hadoop/src/test/resources/data.csv").getCanonicalPath();
        File storeDir = new File(absoluteTableIdentifier.getStorePath());
        CarbonUtil.deleteFoldersAndFiles(storeDir);
        CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, absoluteTableIdentifier.getStorePath());
        CarbonTable table = createTable();
        writeDictionary(factFilePath, table);
        CarbonDataLoadSchema schema = new CarbonDataLoadSchema(table);
        CarbonLoadModel loadModel = new CarbonLoadModel();
        String partitionId = "0";
        loadModel.setCarbonDataLoadSchema(schema);
        loadModel.setDatabaseName(absoluteTableIdentifier.getCarbonTableIdentifier().getDatabaseName());
        loadModel.setTableName(absoluteTableIdentifier.getCarbonTableIdentifier().getTableName());
        loadModel.setTableName(absoluteTableIdentifier.getCarbonTableIdentifier().getTableName());
        loadModel.setFactFilePath(factFilePath);
        loadModel.setLoadMetadataDetails(new ArrayList<LoadMetadataDetails>());
        loadModel.setStorePath(absoluteTableIdentifier.getStorePath());
        loadModel.setDateFormat(null);
        loadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT));
        loadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT));
        loadModel.setSerializationNullFormat(TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName() + "," + "\\N");
        loadModel.setBadRecordsLoggerEnable(TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName() + "," + "false");
        loadModel.setBadRecordsAction(TableOptionConstant.BAD_RECORDS_ACTION.getName() + "," + "FORCE");
        loadModel.setIsEmptyDataBadRecord(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD + "," + "false");
        loadModel.setCsvHeader("ID,date,country,name,phonetype,serialname,salary");
        loadModel.setCsvHeaderColumns(loadModel.getCsvHeader().split(","));
        loadModel.setTaskNo("0");
        loadModel.setSegmentId("0");
        loadModel.setPartitionId("0");
        loadModel.setFactTimeStamp(System.currentTimeMillis());
        loadModel.setMaxColumns("10");
        executeGraph(loadModel, absoluteTableIdentifier.getStorePath());
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) CarbonLoadModel(org.apache.carbondata.processing.model.CarbonLoadModel) CarbonDataLoadSchema(org.apache.carbondata.processing.model.CarbonDataLoadSchema) File(java.io.File) IOException(java.io.IOException)

Example 60 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class MapredCarbonInputFormat method getQueryModel.

public QueryModel getQueryModel(Configuration configuration) throws IOException {
    CarbonTable carbonTable = getCarbonTable(configuration);
    // getting the table absoluteTableIdentifier from the carbonTable
    // to avoid unnecessary deserialization
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    // query plan includes projection column
    String projection = getColumnProjection(configuration);
    if (projection == null) {
        projection = configuration.get("hive.io.file.readcolumn.names");
    }
    CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection);
    QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable);
    // set the filter to the query model in order to filter blocklet before scan
    Expression filter = getFilterPredicates(configuration);
    CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
    FilterResolverIntf filterIntf = CarbonInputFormatUtil.resolveFilter(filter, identifier);
    queryModel.setFilterExpressionResolverTree(filterIntf);
    return queryModel;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonQueryPlan(org.apache.carbondata.core.scan.model.CarbonQueryPlan) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) QueryModel(org.apache.carbondata.core.scan.model.QueryModel) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Aggregations

CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)101 ArrayList (java.util.ArrayList)36 IOException (java.io.IOException)31 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)19 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)18 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)16 Configuration (org.apache.hadoop.conf.Configuration)15 TableInfo (org.apache.carbondata.core.metadata.schema.table.TableInfo)14 Map (java.util.Map)13 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)13 List (java.util.List)12 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)12 HashMap (java.util.HashMap)11 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)11 File (java.io.File)9 Expression (org.apache.carbondata.core.scan.expression.Expression)9 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)8 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)8 InputSplit (org.apache.hadoop.mapreduce.InputSplit)8 Test (org.junit.Test)8