Search in sources :

Example 46 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class TestBlockletIndex method testaddBlockBasedOnMinMaxValue.

@Test
public void testaddBlockBasedOnMinMaxValue() throws Exception {
    new MockUp<ImplicitIncludeFilterExecutorImpl>() {

        @Mock
        BitSet isFilterValuesPresentInBlockOrBlocklet(byte[][] maxValue, byte[][] minValue, String uniqueBlockPath, boolean[] isMinMaxSet) {
            BitSet bitSet = new BitSet(1);
            bitSet.set(8);
            return bitSet;
        }
    };
    BlockIndex blockIndex = new BlockletIndex();
    new MockUp<CarbonTable>() {

        @Mock
        public boolean isHivePartitionTable() {
            return false;
        }
    };
    blockIndex.setSegmentPropertiesWrapper(new SegmentPropertiesAndSchemaHolder.SegmentPropertiesWrapper(new CarbonTable(), new ArrayList<>()));
    Method method = BlockIndex.class.getDeclaredMethod("addBlockBasedOnMinMaxValue", FilterExecutor.class, byte[][].class, byte[][].class, boolean[].class, String.class, int.class);
    method.setAccessible(true);
    byte[][] minValue = { ByteUtil.toBytes("sfds") };
    byte[][] maxValue = { ByteUtil.toBytes("resa") };
    boolean[] minMaxFlag = new boolean[] { true };
    Object result = method.invoke(blockIndex, implicitIncludeFilterExecutor, minValue, maxValue, minMaxFlag, "/opt/store/default/carbon_table/Fact/Part0/Segment_0/part-0-0_batchno0-0-1514989110586.carbondata", 0);
    assert ((boolean) result);
}
Also used : BitSet(java.util.BitSet) ArrayList(java.util.ArrayList) MockUp(mockit.MockUp) Method(java.lang.reflect.Method) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) SegmentPropertiesAndSchemaHolder(org.apache.carbondata.core.datastore.block.SegmentPropertiesAndSchemaHolder) Test(org.junit.Test)

Example 47 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonReaderBuilder method prepareFileInputFormat.

private CarbonFileInputFormat prepareFileInputFormat(Job job, boolean enableBlockletDistribution, boolean disableLoadBlockIndex) throws IOException {
    if (inputSplit != null && inputSplit instanceof CarbonInputSplit) {
        tablePath = ((CarbonInputSplit) inputSplit).getSegment().getReadCommittedScope().getFilePath();
        tableName = "UnknownTable" + UUID.randomUUID();
    }
    if (null == this.fileLists && null == tablePath) {
        throw new IllegalArgumentException("Please set table path first.");
    }
    // infer schema
    CarbonTable table;
    if (null != this.fileLists) {
        if (fileLists.size() < 1) {
            throw new IllegalArgumentException("fileLists must have one file in list as least!");
        }
        String commonString = String.valueOf(fileLists.get(0));
        for (int i = 1; i < fileLists.size(); i++) {
            commonString = commonString.substring(0, StringUtils.indexOfDifference(commonString, String.valueOf(fileLists.get(i))));
        }
        int index = commonString.lastIndexOf("/");
        commonString = commonString.substring(0, index);
        table = CarbonTable.buildTable(commonString, tableName, hadoopConf);
    } else {
        table = CarbonTable.buildTable(tablePath, tableName, hadoopConf);
    }
    if (enableBlockletDistribution) {
        // set cache level to blocklet level
        Map<String, String> tableProperties = table.getTableInfo().getFactTable().getTableProperties();
        tableProperties.put(CarbonCommonConstants.CACHE_LEVEL, "BLOCKLET");
        table.getTableInfo().getFactTable().setTableProperties(tableProperties);
    }
    final CarbonFileInputFormat format = new CarbonFileInputFormat();
    format.setTableInfo(job.getConfiguration(), table.getTableInfo());
    format.setTablePath(job.getConfiguration(), table.getTablePath());
    format.setTableName(job.getConfiguration(), table.getTableName());
    format.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
    if (filterExpression != null) {
        format.setFilterPredicates(job.getConfiguration(), new IndexFilter(table, filterExpression, true));
    }
    if (null != this.fileLists) {
        format.setFileLists(this.fileLists);
    }
    if (projectionColumns != null) {
        // set the user projection
        int len = projectionColumns.length;
        for (int i = 0; i < len; i++) {
            if (projectionColumns[i].contains(".")) {
                throw new UnsupportedOperationException("Complex child columns projection NOT supported through CarbonReader");
            }
        }
        format.setColumnProjection(job.getConfiguration(), projectionColumns);
    }
    if ((disableLoadBlockIndex) && (filterExpression == null)) {
        job.getConfiguration().set("filter_blocks", "false");
    }
    return format;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonFileInputFormat(org.apache.carbondata.hadoop.api.CarbonFileInputFormat) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) IndexFilter(org.apache.carbondata.core.index.IndexFilter)

Example 48 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonWriterBuilder method buildCarbonTable.

/**
 * Build a {@link CarbonTable}
 */
private CarbonTable buildCarbonTable() {
    TableSchemaBuilder tableSchemaBuilder = TableSchema.builder();
    if (blockSize > 0) {
        tableSchemaBuilder = tableSchemaBuilder.blockSize(blockSize);
    }
    if (blockletSize > 0) {
        tableSchemaBuilder = tableSchemaBuilder.blockletSize(blockletSize);
    }
    if (pageSizeInMb > 0) {
        tableSchemaBuilder = tableSchemaBuilder.pageSizeInMb(pageSizeInMb);
    }
    tableSchemaBuilder.enableLocalDictionary(isLocalDictionaryEnabled);
    tableSchemaBuilder.localDictionaryThreshold(localDictionaryThreshold);
    List<String> sortColumnsList = new ArrayList<>();
    if (sortColumns == null) {
        // user passed size 4 but supplied only 2 fileds
        for (Field field : schema.getFields()) {
            if (null != field) {
                if (field.getDataType() == DataTypes.STRING || field.getDataType() == DataTypes.DATE || field.getDataType() == DataTypes.TIMESTAMP) {
                    sortColumnsList.add(field.getFieldName());
                }
            }
        }
        sortColumns = new String[sortColumnsList.size()];
        sortColumns = sortColumnsList.toArray(sortColumns);
    } else {
        sortColumnsList = Arrays.asList(sortColumns);
    }
    ColumnSchema[] sortColumnsSchemaList = new ColumnSchema[sortColumnsList.size()];
    List<String> invertedIdxColumnsList = new ArrayList<>();
    if (null != invertedIndexColumns) {
        invertedIdxColumnsList = Arrays.asList(invertedIndexColumns);
    }
    Field[] fields = schema.getFields();
    buildTableSchema(fields, tableSchemaBuilder, sortColumnsList, sortColumnsSchemaList, invertedIdxColumnsList);
    tableSchemaBuilder.setSortColumns(Arrays.asList(sortColumnsSchemaList));
    String tableName;
    String dbName;
    dbName = "";
    tableName = "_tempTable-" + UUID.randomUUID().toString() + "_" + timestamp;
    TableSchema schema = tableSchemaBuilder.build();
    schema.setTableName(tableName);
    CarbonTable table = CarbonTable.builder().tableName(schema.getTableName()).databaseName(dbName).tablePath(path).tableSchema(schema).isTransactionalTable(false).build();
    return table;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) TableSchema(org.apache.carbondata.core.metadata.schema.table.TableSchema) ArrayList(java.util.ArrayList) TableSchemaBuilder(org.apache.carbondata.core.metadata.schema.table.TableSchemaBuilder) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)

Example 49 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class CarbonSchemaReader method readSchemaFromFolder.

/**
 * Read schema from carbon file folder path
 *
 * @param folderPath carbon file folder path
 * @param conf       hadoop configuration support, can set s3a AK,SK,
 *                   end point and other conf with this
 * @return carbon data Schema
 * @throws IOException
 */
private static Schema readSchemaFromFolder(String folderPath, Configuration conf) throws IOException {
    String tableName = "UnknownTable" + UUID.randomUUID();
    CarbonTable table = CarbonTable.buildTable(folderPath, tableName, conf);
    List<ColumnSchema> columnSchemaList = table.getTableInfo().getFactTable().getListOfColumns();
    int numOfChildren = 0;
    for (ColumnSchema columnSchema : columnSchemaList) {
        if (!(columnSchema.getColumnName().contains(CarbonCommonConstants.POINT))) {
            numOfChildren++;
        }
    }
    Field[] fields = new Field[numOfChildren];
    int indexOfFields = 0;
    for (ColumnSchema columnSchema : columnSchemaList) {
        if (!columnSchema.getColumnName().contains(CarbonCommonConstants.POINT)) {
            if (DataTypes.isStructType(columnSchema.getDataType())) {
                StructField structField = getStructChildren(table, columnSchema.getColumnName());
                List<StructField> list = new ArrayList<>();
                list.add(structField);
                fields[indexOfFields] = new Field(columnSchema.getColumnName(), DataTypes.createStructType(list));
                fields[indexOfFields].setSchemaOrdinal(columnSchema.getSchemaOrdinal());
                indexOfFields++;
            } else if (DataTypes.isArrayType(columnSchema.getDataType())) {
                StructField structField = getArrayChildren(table, columnSchema.getColumnName());
                List<StructField> list = new ArrayList<>();
                list.add(structField);
                fields[indexOfFields] = new Field(columnSchema.getColumnName(), "array", list);
                fields[indexOfFields].setSchemaOrdinal(columnSchema.getSchemaOrdinal());
                indexOfFields++;
            } else if (DataTypes.isMapType(columnSchema.getDataType())) {
            // TODO
            } else {
                fields[indexOfFields] = new Field(columnSchema);
                fields[indexOfFields].setSchemaOrdinal(columnSchema.getSchemaOrdinal());
                indexOfFields++;
            }
        }
    }
    return new Schema(fields);
}
Also used : CarbonUtil.thriftColumnSchemaToWrapperColumnSchema(org.apache.carbondata.core.util.CarbonUtil.thriftColumnSchemaToWrapperColumnSchema) TableSchema(org.apache.carbondata.core.metadata.schema.table.TableSchema) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) ArrayList(java.util.ArrayList) CarbonUtil.thriftColumnSchemaToWrapperColumnSchema(org.apache.carbondata.core.util.CarbonUtil.thriftColumnSchemaToWrapperColumnSchema) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) StructField(org.apache.carbondata.core.metadata.datatype.StructField) ArrayList(java.util.ArrayList) List(java.util.List)

Example 50 with CarbonTable

use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.

the class LocalCarbonStore method scan.

@Override
public Iterator<CarbonRow> scan(AbsoluteTableIdentifier tableIdentifier, String[] projectColumns, Expression filter) throws IOException {
    Objects.requireNonNull(tableIdentifier);
    Objects.requireNonNull(projectColumns);
    CarbonTable table = getTable(tableIdentifier.getTablePath());
    if (table.isStreamingSink() || table.isHivePartitionTable()) {
        throw new UnsupportedOperationException("streaming and partition table is not supported");
    }
    // TODO: use InputFormat to prune data and read data
    final CarbonTableInputFormat format = new CarbonTableInputFormat();
    final Job job = new Job(new Configuration());
    CarbonInputFormat.setTableInfo(job.getConfiguration(), table.getTableInfo());
    CarbonInputFormat.setTablePath(job.getConfiguration(), table.getTablePath());
    CarbonInputFormat.setTableName(job.getConfiguration(), table.getTableName());
    CarbonInputFormat.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
    CarbonInputFormat.setCarbonReadSupport(job.getConfiguration(), CarbonRowReadSupport.class);
    CarbonInputFormat.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectColumns));
    if (filter != null) {
        CarbonInputFormat.setFilterPredicates(job.getConfiguration(), new IndexFilter(table, filter));
    }
    final List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
    List<RecordReader<Void, Object>> readers = new ArrayList<>(splits.size());
    List<CarbonRow> rows = new ArrayList<>();
    try {
        for (InputSplit split : splits) {
            TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
            RecordReader reader = format.createRecordReader(split, attempt);
            reader.initialize(split, attempt);
            readers.add(reader);
        }
        for (RecordReader<Void, Object> reader : readers) {
            while (reader.nextKeyValue()) {
                rows.add((CarbonRow) reader.getCurrentValue());
            }
            try {
                reader.close();
            } catch (IOException e) {
                LOGGER.error(e.getMessage(), e);
            }
        }
    } catch (InterruptedException e) {
        throw new IOException(e);
    } finally {
        for (RecordReader<Void, Object> reader : readers) {
            try {
                reader.close();
            } catch (IOException e) {
                LOGGER.error(e.getMessage(), e);
            }
        }
    }
    return rows.iterator();
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) CarbonRow(org.apache.carbondata.core.datastore.row.CarbonRow) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) RecordReader(org.apache.hadoop.mapreduce.RecordReader) ArrayList(java.util.ArrayList) IOException(java.io.IOException) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonProjection(org.apache.carbondata.hadoop.CarbonProjection) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat) IndexFilter(org.apache.carbondata.core.index.IndexFilter) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) JobID(org.apache.hadoop.mapreduce.JobID)

Aggregations

CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)101 ArrayList (java.util.ArrayList)36 IOException (java.io.IOException)31 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)19 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)18 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)16 Configuration (org.apache.hadoop.conf.Configuration)15 TableInfo (org.apache.carbondata.core.metadata.schema.table.TableInfo)14 Map (java.util.Map)13 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)13 List (java.util.List)12 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)12 HashMap (java.util.HashMap)11 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)11 File (java.io.File)9 Expression (org.apache.carbondata.core.scan.expression.Expression)9 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)8 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)8 InputSplit (org.apache.hadoop.mapreduce.InputSplit)8 Test (org.junit.Test)8