Search in sources :

Example 86 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonTableReader method getInputSplits.

/**
 * Get a carbon muti-block input splits
 *
 * @param tableCacheModel cached table
 * @param filters carbonData filters
 * @param filteredPartitions matched partitionSpec for the filter
 * @param config hadoop conf
 * @return list of multiblock split
 * @throws IOException
 */
public List<CarbonLocalMultiBlockSplit> getInputSplits(CarbonTableCacheModel tableCacheModel, Expression filters, List<PartitionSpec> filteredPartitions, Configuration config) throws IOException {
    List<CarbonLocalInputSplit> result = new ArrayList<>();
    List<CarbonLocalMultiBlockSplit> multiBlockSplitList = new ArrayList<>();
    CarbonTable carbonTable = tableCacheModel.getCarbonTable();
    TableInfo tableInfo = tableCacheModel.getCarbonTable().getTableInfo();
    config.set("presto.cli.query.id", prestoQueryId);
    config.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, "");
    String carbonTablePath = carbonTable.getAbsoluteTableIdentifier().getTablePath();
    config.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath);
    config.set(CarbonTableInputFormat.DATABASE_NAME, carbonTable.getDatabaseName());
    config.set(CarbonTableInputFormat.TABLE_NAME, carbonTable.getTableName());
    config.set("query.id", queryId);
    CarbonInputFormat.setTransactionalTable(config, carbonTable.isTransactionalTable());
    CarbonInputFormat.setTableInfo(config, carbonTable.getTableInfo());
    if (CarbonProperties.getInstance().isCoarseGrainSecondaryIndex(tableInfo.getDatabaseName(), tableInfo.getFactTable().getTableName(), "true")) {
        CarbonInputFormat.checkAndSetSecondaryIndexPruning(carbonTable.getTableInfo(), filters, config);
    }
    JobConf jobConf = new JobConf(config);
    try {
        CarbonTableInputFormat.setTableInfo(config, tableInfo);
        CarbonTableInputFormat<Object> carbonTableInputFormat = createInputFormat(jobConf, carbonTable.getAbsoluteTableIdentifier(), new IndexFilter(carbonTable, filters, true), filteredPartitions);
        Job job = Job.getInstance(jobConf);
        List<InputSplit> splits = carbonTableInputFormat.getSplits(job);
        Gson gson = new Gson();
        if (splits != null && splits.size() > 0) {
            for (InputSplit inputSplit : splits) {
                CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
                result.add(new CarbonLocalInputSplit(carbonInputSplit.getSegmentId(), carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), carbonInputSplit.getLength(), Arrays.asList(carbonInputSplit.getLocations()), carbonInputSplit.getNumberOfBlocklets(), carbonInputSplit.getVersion().number(), carbonInputSplit.getDeleteDeltaFiles(), carbonInputSplit.getBlockletId(), gson.toJson(carbonInputSplit.getDetailInfo()), carbonInputSplit.getFileFormat().ordinal()));
            }
            // Use block distribution
            List<List<CarbonLocalInputSplit>> inputSplits = new ArrayList<>(result.stream().collect(Collectors.groupingBy(carbonInput -> {
                if (FileFormat.ROW_V1.equals(carbonInput.getFileFormat())) {
                    return carbonInput.getSegmentId().concat(carbonInput.getPath()).concat(carbonInput.getStart() + "");
                }
                return carbonInput.getSegmentId().concat(carbonInput.getPath());
            })).values());
            // TODO : try to optimize the below loic as it may slowdown for huge splits
            for (int j = 0; j < inputSplits.size(); j++) {
                multiBlockSplitList.add(new CarbonLocalMultiBlockSplit(inputSplits.get(j), inputSplits.get(j).stream().flatMap(f -> Arrays.stream(getLocations(f))).distinct().toArray(String[]::new)));
            }
            LOGGER.error("Size fo MultiblockList   " + multiBlockSplitList.size());
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return multiBlockSplitList;
}
Also used : CarbonMetadata(org.apache.carbondata.core.metadata.CarbonMetadata) Arrays(java.util.Arrays) Inject(com.google.inject.Inject) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) SegmentFileStore(org.apache.carbondata.core.metadata.SegmentFileStore) Logger(org.apache.log4j.Logger) Gson(com.google.gson.Gson) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) TBase(org.apache.thrift.TBase) ACCESS_KEY(org.apache.hadoop.fs.s3a.Constants.ACCESS_KEY) IndexMetadata(org.apache.carbondata.core.metadata.schema.indextable.IndexMetadata) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) Expression(org.apache.carbondata.core.scan.expression.Expression) ThriftWrapperSchemaConverterImpl(org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl) PrestoFilterUtil(org.apache.carbondata.presto.PrestoFilterUtil) CarbonInputFormat(org.apache.carbondata.hadoop.api.CarbonInputFormat) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) Collectors(java.util.stream.Collectors) IndexStatus(org.apache.carbondata.core.index.status.IndexStatus) IndexType(org.apache.carbondata.core.metadata.index.IndexType) Objects(java.util.Objects) List(java.util.List) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat) ThriftReader(org.apache.carbondata.core.reader.ThriftReader) Job(org.apache.hadoop.mapreduce.Job) CarbonProperties(org.apache.carbondata.core.util.CarbonProperties) CarbonUtil(org.apache.carbondata.core.util.CarbonUtil) IndexTableInfo(org.apache.carbondata.core.metadata.schema.indextable.IndexTableInfo) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) HashMap(java.util.HashMap) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) AtomicReference(java.util.concurrent.atomic.AtomicReference) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) CollectionUtils(org.apache.commons.collections.CollectionUtils) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) IndexStoreManager(org.apache.carbondata.core.index.IndexStoreManager) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) TupleDomain(io.prestosql.spi.predicate.TupleDomain) IOException(java.io.IOException) JobConf(org.apache.hadoop.mapred.JobConf) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) FileFormat(org.apache.carbondata.core.statusmanager.FileFormat) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) SECRET_KEY(org.apache.hadoop.fs.s3a.Constants.SECRET_KEY) IndexFilter(org.apache.carbondata.core.index.IndexFilter) ENDPOINT(org.apache.hadoop.fs.s3a.Constants.ENDPOINT) SchemaConverter(org.apache.carbondata.core.metadata.converter.SchemaConverter) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) IOException(java.io.IOException) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) IndexTableInfo(org.apache.carbondata.core.metadata.schema.indextable.IndexTableInfo) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) List(java.util.List) ArrayList(java.util.ArrayList) IndexFilter(org.apache.carbondata.core.index.IndexFilter) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit)

Example 87 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class IndexProvider method createIndexFactory.

private IndexFactory<? extends Index> createIndexFactory() throws MalformedIndexCommandException {
    CarbonTable mainTable = getMainTable();
    IndexSchema indexSchema = getIndexSchema();
    IndexFactory<? extends Index> indexFactory;
    try {
        // try to create IndexClassProvider instance by taking providerName as class name
        indexFactory = (IndexFactory<? extends Index>) Class.forName(indexSchema.getProviderName()).getConstructors()[0].newInstance(mainTable, indexSchema);
    } catch (ClassNotFoundException e) {
        // try to create IndexClassProvider instance by taking providerName as short name
        indexFactory = IndexRegistry.getIndexFactoryByShortName(mainTable, indexSchema);
    } catch (Throwable e) {
        throw new MetadataProcessException("failed to create IndexClassProvider '" + indexSchema.getProviderName() + "'", e);
    }
    return indexFactory;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) MetadataProcessException(org.apache.carbondata.common.exceptions.MetadataProcessException) IndexSchema(org.apache.carbondata.core.metadata.schema.table.IndexSchema)

Example 88 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class PrestoFilterUtil method getPartitionFilters.

/**
 * Return partition filters using domain constraints
 * @param carbonTable
 * @param originalConstraint
 * @return
 */
public static List<String> getPartitionFilters(CarbonTable carbonTable, TupleDomain<HiveColumnHandle> originalConstraint) {
    List<ColumnSchema> columnSchemas = carbonTable.getPartitionInfo().getColumnSchemaList();
    List<String> filter = new ArrayList<>();
    for (HiveColumnHandle columnHandle : originalConstraint.getDomains().get().keySet()) {
        List<ColumnSchema> partitionedColumnSchema = columnSchemas.stream().filter(columnSchema -> columnHandle.getName().equals(columnSchema.getColumnName())).collect(toList());
        if (partitionedColumnSchema.size() != 0) {
            filter.addAll(createPartitionFilters(originalConstraint, columnHandle));
        }
    }
    return filter;
}
Also used : LessThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression) Slice(io.airlift.slice.Slice) Date(java.util.Date) DataTypes(org.apache.carbondata.core.metadata.datatype.DataTypes) HiveType(com.facebook.presto.hive.HiveType) HashMap(java.util.HashMap) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) PrestoException(com.facebook.presto.spi.PrestoException) GreaterThanExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression) ArrayList(java.util.ArrayList) BigDecimal(java.math.BigDecimal) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Calendar(java.util.Calendar) Type(com.facebook.presto.spi.type.Type) Map(java.util.Map) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) GreaterThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression) BigInteger(java.math.BigInteger) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) Expression(org.apache.carbondata.core.scan.expression.Expression) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) Decimals(com.facebook.presto.spi.type.Decimals) DataType(org.apache.carbondata.core.metadata.datatype.DataType) ListExpression(org.apache.carbondata.core.scan.expression.conditional.ListExpression) Timestamp(java.sql.Timestamp) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) Range(com.facebook.presto.spi.predicate.Range) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) String.format(java.lang.String.format) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) Domain(com.facebook.presto.spi.predicate.Domain) InExpression(org.apache.carbondata.core.scan.expression.conditional.InExpression) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) LessThanExpression(org.apache.carbondata.core.scan.expression.conditional.LessThanExpression) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle)

Example 89 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class MetaCachedCarbonStore method getTable.

CarbonTable getTable(String path) throws IOException {
    if (cache.containsKey(path)) {
        return cache.get(path);
    }
    String schemaPath = CarbonTablePath.getSchemaFilePath(path);
    TableInfo tableInfo;
    if (!FileFactory.isFileExist(schemaPath)) {
        tableInfo = SchemaReader.inferSchema(AbsoluteTableIdentifier.from(path), false);
    } else {
        org.apache.carbondata.format.TableInfo tableInfoFormat;
        tableInfoFormat = CarbonUtil.readSchemaFile(CarbonTablePath.getSchemaFilePath(path));
        SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl();
        tableInfo = schemaConverter.fromExternalToWrapperTableInfo(tableInfoFormat, "", "", "");
        tableInfo.setTablePath(path);
    }
    CarbonTable table = CarbonTable.buildFromTableInfo(tableInfo);
    cache.put(path, table);
    return table;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) SchemaConverter(org.apache.carbondata.core.metadata.converter.SchemaConverter) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) ThriftWrapperSchemaConverterImpl(org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl)

Example 90 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class SegmentFileStore method writeSegmentFileForOthers.

public static boolean writeSegmentFileForOthers(CarbonTable carbonTable, Segment segment, PartitionSpec partitionSpec, List<FileStatus> partitionDataFiles) throws IOException {
    String tablePath = carbonTable.getTablePath();
    CarbonFile[] dataFiles = null;
    if (partitionDataFiles.isEmpty()) {
        CarbonFile segmentFolder = FileFactory.getCarbonFile(segment.getSegmentPath());
        dataFiles = segmentFolder.listFiles(file -> (!file.getName().equals("_SUCCESS") && !file.getName().endsWith(".crc")));
    } else {
        dataFiles = partitionDataFiles.stream().map(fileStatus -> FileFactory.getCarbonFile(fileStatus.getPath().toString())).toArray(CarbonFile[]::new);
    }
    if (dataFiles != null && dataFiles.length > 0) {
        SegmentFile segmentFile = new SegmentFile();
        segmentFile.setOptions(segment.getOptions());
        FolderDetails folderDetails = new FolderDetails();
        folderDetails.setStatus(SegmentStatus.SUCCESS.getMessage());
        folderDetails.setRelative(false);
        if (!partitionDataFiles.isEmpty()) {
            folderDetails.setPartitions(partitionSpec.getPartitions());
            segmentFile.addPath(partitionSpec.getLocation().toString(), folderDetails);
        } else {
            segmentFile.addPath(segment.getSegmentPath(), folderDetails);
        }
        for (CarbonFile file : dataFiles) {
            folderDetails.getFiles().add(file.getName());
        }
        String segmentFileFolder = CarbonTablePath.getSegmentFilesLocation(tablePath);
        CarbonFile carbonFile = FileFactory.getCarbonFile(segmentFileFolder);
        if (!carbonFile.exists()) {
            carbonFile.mkdirs();
        }
        // write segment info to new file.
        writeSegmentFile(segmentFile, segmentFileFolder + File.separator + segment.getSegmentFileName());
        return true;
    }
    return false;
}
Also used : Arrays(java.util.Arrays) AtomicFileOperationFactory(org.apache.carbondata.core.fileoperations.AtomicFileOperationFactory) FileStatus(org.apache.hadoop.fs.FileStatus) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) Logger(org.apache.log4j.Logger) DataOutputStream(java.io.DataOutputStream) SegmentColumnMetaDataInfo(org.apache.carbondata.core.segmentmeta.SegmentColumnMetaDataInfo) Gson(com.google.gson.Gson) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) Set(java.util.Set) SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) Collectors(java.util.stream.Collectors) CarbonLockUtil(org.apache.carbondata.core.locks.CarbonLockUtil) Serializable(java.io.Serializable) Objects(java.util.Objects) List(java.util.List) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) CarbonUtil(org.apache.carbondata.core.util.CarbonUtil) DataInputStream(java.io.DataInputStream) TrashUtil(org.apache.carbondata.core.util.TrashUtil) Segment(org.apache.carbondata.core.index.Segment) HashMap(java.util.HashMap) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) SegmentStatus(org.apache.carbondata.core.statusmanager.SegmentStatus) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ObjectSerializationUtil(org.apache.carbondata.core.util.ObjectSerializationUtil) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SegmentMetaDataInfo(org.apache.carbondata.core.segmentmeta.SegmentMetaDataInfo) Charset(java.nio.charset.Charset) FileWriteOperation(org.apache.carbondata.core.fileoperations.FileWriteOperation) OutputStreamWriter(java.io.OutputStreamWriter) CarbonUpdateUtil(org.apache.carbondata.core.mutate.CarbonUpdateUtil) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) IndexStoreManager(org.apache.carbondata.core.index.IndexStoreManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) LinkedHashSet(java.util.LinkedHashSet) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) BufferedWriter(java.io.BufferedWriter) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) ICarbonLock(org.apache.carbondata.core.locks.ICarbonLock) IOException(java.io.IOException) InputStreamReader(java.io.InputStreamReader) File(java.io.File) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) DataFileFooterConverter(org.apache.carbondata.core.util.DataFileFooterConverter) SegmentMetaDataInfoStats(org.apache.carbondata.core.segmentmeta.SegmentMetaDataInfoStats) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) AtomicFileOperations(org.apache.carbondata.core.fileoperations.AtomicFileOperations) TableIndex(org.apache.carbondata.core.index.TableIndex) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile)

Aggregations

CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)101 ArrayList (java.util.ArrayList)36 IOException (java.io.IOException)31 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)19 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)18 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)16 Configuration (org.apache.hadoop.conf.Configuration)15 TableInfo (org.apache.carbondata.core.metadata.schema.table.TableInfo)14 Map (java.util.Map)13 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)13 List (java.util.List)12 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)12 HashMap (java.util.HashMap)11 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)11 File (java.io.File)9 Expression (org.apache.carbondata.core.scan.expression.Expression)9 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)8 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)8 InputSplit (org.apache.hadoop.mapreduce.InputSplit)8 Test (org.junit.Test)8