use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class TestBlockletIndex method testaddBlockBasedOnMinMaxValue.
@Test
public void testaddBlockBasedOnMinMaxValue() throws Exception {
new MockUp<ImplicitIncludeFilterExecutorImpl>() {
@Mock
BitSet isFilterValuesPresentInBlockOrBlocklet(byte[][] maxValue, byte[][] minValue, String uniqueBlockPath, boolean[] isMinMaxSet) {
BitSet bitSet = new BitSet(1);
bitSet.set(8);
return bitSet;
}
};
BlockIndex blockIndex = new BlockletIndex();
new MockUp<CarbonTable>() {
@Mock
public boolean isHivePartitionTable() {
return false;
}
};
blockIndex.setSegmentPropertiesWrapper(new SegmentPropertiesAndSchemaHolder.SegmentPropertiesWrapper(new CarbonTable(), new ArrayList<>()));
Method method = BlockIndex.class.getDeclaredMethod("addBlockBasedOnMinMaxValue", FilterExecutor.class, byte[][].class, byte[][].class, boolean[].class, String.class, int.class);
method.setAccessible(true);
byte[][] minValue = { ByteUtil.toBytes("sfds") };
byte[][] maxValue = { ByteUtil.toBytes("resa") };
boolean[] minMaxFlag = new boolean[] { true };
Object result = method.invoke(blockIndex, implicitIncludeFilterExecutor, minValue, maxValue, minMaxFlag, "/opt/store/default/carbon_table/Fact/Part0/Segment_0/part-0-0_batchno0-0-1514989110586.carbondata", 0);
assert ((boolean) result);
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonReaderBuilder method prepareFileInputFormat.
private CarbonFileInputFormat prepareFileInputFormat(Job job, boolean enableBlockletDistribution, boolean disableLoadBlockIndex) throws IOException {
if (inputSplit != null && inputSplit instanceof CarbonInputSplit) {
tablePath = ((CarbonInputSplit) inputSplit).getSegment().getReadCommittedScope().getFilePath();
tableName = "UnknownTable" + UUID.randomUUID();
}
if (null == this.fileLists && null == tablePath) {
throw new IllegalArgumentException("Please set table path first.");
}
// infer schema
CarbonTable table;
if (null != this.fileLists) {
if (fileLists.size() < 1) {
throw new IllegalArgumentException("fileLists must have one file in list as least!");
}
String commonString = String.valueOf(fileLists.get(0));
for (int i = 1; i < fileLists.size(); i++) {
commonString = commonString.substring(0, StringUtils.indexOfDifference(commonString, String.valueOf(fileLists.get(i))));
}
int index = commonString.lastIndexOf("/");
commonString = commonString.substring(0, index);
table = CarbonTable.buildTable(commonString, tableName, hadoopConf);
} else {
table = CarbonTable.buildTable(tablePath, tableName, hadoopConf);
}
if (enableBlockletDistribution) {
// set cache level to blocklet level
Map<String, String> tableProperties = table.getTableInfo().getFactTable().getTableProperties();
tableProperties.put(CarbonCommonConstants.CACHE_LEVEL, "BLOCKLET");
table.getTableInfo().getFactTable().setTableProperties(tableProperties);
}
final CarbonFileInputFormat format = new CarbonFileInputFormat();
format.setTableInfo(job.getConfiguration(), table.getTableInfo());
format.setTablePath(job.getConfiguration(), table.getTablePath());
format.setTableName(job.getConfiguration(), table.getTableName());
format.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
if (filterExpression != null) {
format.setFilterPredicates(job.getConfiguration(), new IndexFilter(table, filterExpression, true));
}
if (null != this.fileLists) {
format.setFileLists(this.fileLists);
}
if (projectionColumns != null) {
// set the user projection
int len = projectionColumns.length;
for (int i = 0; i < len; i++) {
if (projectionColumns[i].contains(".")) {
throw new UnsupportedOperationException("Complex child columns projection NOT supported through CarbonReader");
}
}
format.setColumnProjection(job.getConfiguration(), projectionColumns);
}
if ((disableLoadBlockIndex) && (filterExpression == null)) {
job.getConfiguration().set("filter_blocks", "false");
}
return format;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonWriterBuilder method buildCarbonTable.
/**
* Build a {@link CarbonTable}
*/
private CarbonTable buildCarbonTable() {
TableSchemaBuilder tableSchemaBuilder = TableSchema.builder();
if (blockSize > 0) {
tableSchemaBuilder = tableSchemaBuilder.blockSize(blockSize);
}
if (blockletSize > 0) {
tableSchemaBuilder = tableSchemaBuilder.blockletSize(blockletSize);
}
if (pageSizeInMb > 0) {
tableSchemaBuilder = tableSchemaBuilder.pageSizeInMb(pageSizeInMb);
}
tableSchemaBuilder.enableLocalDictionary(isLocalDictionaryEnabled);
tableSchemaBuilder.localDictionaryThreshold(localDictionaryThreshold);
List<String> sortColumnsList = new ArrayList<>();
if (sortColumns == null) {
// user passed size 4 but supplied only 2 fileds
for (Field field : schema.getFields()) {
if (null != field) {
if (field.getDataType() == DataTypes.STRING || field.getDataType() == DataTypes.DATE || field.getDataType() == DataTypes.TIMESTAMP) {
sortColumnsList.add(field.getFieldName());
}
}
}
sortColumns = new String[sortColumnsList.size()];
sortColumns = sortColumnsList.toArray(sortColumns);
} else {
sortColumnsList = Arrays.asList(sortColumns);
}
ColumnSchema[] sortColumnsSchemaList = new ColumnSchema[sortColumnsList.size()];
List<String> invertedIdxColumnsList = new ArrayList<>();
if (null != invertedIndexColumns) {
invertedIdxColumnsList = Arrays.asList(invertedIndexColumns);
}
Field[] fields = schema.getFields();
buildTableSchema(fields, tableSchemaBuilder, sortColumnsList, sortColumnsSchemaList, invertedIdxColumnsList);
tableSchemaBuilder.setSortColumns(Arrays.asList(sortColumnsSchemaList));
String tableName;
String dbName;
dbName = "";
tableName = "_tempTable-" + UUID.randomUUID().toString() + "_" + timestamp;
TableSchema schema = tableSchemaBuilder.build();
schema.setTableName(tableName);
CarbonTable table = CarbonTable.builder().tableName(schema.getTableName()).databaseName(dbName).tablePath(path).tableSchema(schema).isTransactionalTable(false).build();
return table;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonSchemaReader method readSchemaFromFolder.
/**
* Read schema from carbon file folder path
*
* @param folderPath carbon file folder path
* @param conf hadoop configuration support, can set s3a AK,SK,
* end point and other conf with this
* @return carbon data Schema
* @throws IOException
*/
private static Schema readSchemaFromFolder(String folderPath, Configuration conf) throws IOException {
String tableName = "UnknownTable" + UUID.randomUUID();
CarbonTable table = CarbonTable.buildTable(folderPath, tableName, conf);
List<ColumnSchema> columnSchemaList = table.getTableInfo().getFactTable().getListOfColumns();
int numOfChildren = 0;
for (ColumnSchema columnSchema : columnSchemaList) {
if (!(columnSchema.getColumnName().contains(CarbonCommonConstants.POINT))) {
numOfChildren++;
}
}
Field[] fields = new Field[numOfChildren];
int indexOfFields = 0;
for (ColumnSchema columnSchema : columnSchemaList) {
if (!columnSchema.getColumnName().contains(CarbonCommonConstants.POINT)) {
if (DataTypes.isStructType(columnSchema.getDataType())) {
StructField structField = getStructChildren(table, columnSchema.getColumnName());
List<StructField> list = new ArrayList<>();
list.add(structField);
fields[indexOfFields] = new Field(columnSchema.getColumnName(), DataTypes.createStructType(list));
fields[indexOfFields].setSchemaOrdinal(columnSchema.getSchemaOrdinal());
indexOfFields++;
} else if (DataTypes.isArrayType(columnSchema.getDataType())) {
StructField structField = getArrayChildren(table, columnSchema.getColumnName());
List<StructField> list = new ArrayList<>();
list.add(structField);
fields[indexOfFields] = new Field(columnSchema.getColumnName(), "array", list);
fields[indexOfFields].setSchemaOrdinal(columnSchema.getSchemaOrdinal());
indexOfFields++;
} else if (DataTypes.isMapType(columnSchema.getDataType())) {
// TODO
} else {
fields[indexOfFields] = new Field(columnSchema);
fields[indexOfFields].setSchemaOrdinal(columnSchema.getSchemaOrdinal());
indexOfFields++;
}
}
}
return new Schema(fields);
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class LocalCarbonStore method scan.
@Override
public Iterator<CarbonRow> scan(AbsoluteTableIdentifier tableIdentifier, String[] projectColumns, Expression filter) throws IOException {
Objects.requireNonNull(tableIdentifier);
Objects.requireNonNull(projectColumns);
CarbonTable table = getTable(tableIdentifier.getTablePath());
if (table.isStreamingSink() || table.isHivePartitionTable()) {
throw new UnsupportedOperationException("streaming and partition table is not supported");
}
// TODO: use InputFormat to prune data and read data
final CarbonTableInputFormat format = new CarbonTableInputFormat();
final Job job = new Job(new Configuration());
CarbonInputFormat.setTableInfo(job.getConfiguration(), table.getTableInfo());
CarbonInputFormat.setTablePath(job.getConfiguration(), table.getTablePath());
CarbonInputFormat.setTableName(job.getConfiguration(), table.getTableName());
CarbonInputFormat.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
CarbonInputFormat.setCarbonReadSupport(job.getConfiguration(), CarbonRowReadSupport.class);
CarbonInputFormat.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectColumns));
if (filter != null) {
CarbonInputFormat.setFilterPredicates(job.getConfiguration(), new IndexFilter(table, filter));
}
final List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
List<RecordReader<Void, Object>> readers = new ArrayList<>(splits.size());
List<CarbonRow> rows = new ArrayList<>();
try {
for (InputSplit split : splits) {
TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader reader = format.createRecordReader(split, attempt);
reader.initialize(split, attempt);
readers.add(reader);
}
for (RecordReader<Void, Object> reader : readers) {
while (reader.nextKeyValue()) {
rows.add((CarbonRow) reader.getCurrentValue());
}
try {
reader.close();
} catch (IOException e) {
LOGGER.error(e.getMessage(), e);
}
}
} catch (InterruptedException e) {
throw new IOException(e);
} finally {
for (RecordReader<Void, Object> reader : readers) {
try {
reader.close();
} catch (IOException e) {
LOGGER.error(e.getMessage(), e);
}
}
}
return rows.iterator();
}
Aggregations