use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class StoreCreator method createCarbonStore.
/**
* Create store without any restructure
*/
public CarbonLoadModel createCarbonStore() throws Exception {
CarbonLoadModel loadModel = createTableAndLoadModel();
loadData(loadModel, storePath);
return loadModel;
}
use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class MapredCarbonOutputFormat method getHiveRecordWriter.
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
ThreadLocalSessionInfo.setConfigurationToCurrentThread(jc);
CarbonLoadModel carbonLoadModel = null;
// Try to get loadmodel from JobConf.
String encodedString = jc.get(LOAD_MODEL);
if (encodedString != null) {
carbonLoadModel = (CarbonLoadModel) ObjectSerializationUtil.convertStringToObject(encodedString);
} else {
// Try to get loadmodel from Container environment.
encodedString = System.getenv("carbon");
if (encodedString != null) {
carbonLoadModel = (CarbonLoadModel) ObjectSerializationUtil.convertStringToObject(encodedString);
} else {
carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(tableProperties, jc);
}
}
for (Map.Entry<Object, Object> entry : tableProperties.entrySet()) {
carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getTableInfo().getFactTable().getTableProperties().put(entry.getKey().toString().toLowerCase(), entry.getValue().toString().toLowerCase());
}
String tablePath = FileFactory.getCarbonFile(carbonLoadModel.getTablePath()).getAbsolutePath();
TaskAttemptID taskAttemptID = TaskAttemptID.forName(jc.get("mapred.task.id"));
// null, so prepare a new ID.
if (taskAttemptID == null) {
SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmm");
String jobTrackerId = formatter.format(new Date());
taskAttemptID = new TaskAttemptID(jobTrackerId, 0, TaskType.MAP, 0, 0);
// update the app name here, as in this class by default it will written by Hive
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, "presto");
} else {
carbonLoadModel.setTaskNo("" + taskAttemptID.getTaskID().getId());
}
TaskAttemptContextImpl context = new TaskAttemptContextImpl(jc, taskAttemptID);
final boolean isHivePartitionedTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().isHivePartitionTable();
PartitionInfo partitionInfo = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getPartitionInfo();
final int partitionColumn = partitionInfo != null ? partitionInfo.getColumnSchemaList().size() : 0;
if (carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().isHivePartitionTable()) {
carbonLoadModel.getMetrics().addToPartitionPath(finalOutPath.toString());
context.getConfiguration().set("carbon.outputformat.writepath", finalOutPath.toString());
}
CarbonTableOutputFormat.setLoadModel(jc, carbonLoadModel);
org.apache.hadoop.mapreduce.RecordWriter<NullWritable, ObjectArrayWritable> re = super.getRecordWriter(context);
return new FileSinkOperator.RecordWriter() {
@Override
public void write(Writable writable) throws IOException {
try {
ObjectArrayWritable objectArrayWritable = new ObjectArrayWritable();
if (isHivePartitionedTable) {
Object[] actualRow = ((CarbonHiveRow) writable).getData();
Object[] newData = Arrays.copyOf(actualRow, actualRow.length + partitionColumn);
String[] partitionValues = finalOutPath.toString().substring(tablePath.length()).split("/");
for (int j = 0, i = actualRow.length; j < partitionValues.length; j++) {
if (partitionValues[j].contains("=")) {
newData[i++] = partitionValues[j].split("=")[1];
}
}
objectArrayWritable.set(newData);
} else {
objectArrayWritable.set(((CarbonHiveRow) writable).getData());
}
re.write(NullWritable.get(), objectArrayWritable);
} catch (InterruptedException e) {
throw new IOException(e.getCause());
}
}
@Override
public void close(boolean b) throws IOException {
try {
re.close(context);
ThreadLocalSessionInfo.setConfigurationToCurrentThread(context.getConfiguration());
} catch (InterruptedException e) {
throw new IOException(e);
}
}
};
}
use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class HiveCarbonUtil method getCarbonLoadModel.
public static CarbonLoadModel getCarbonLoadModel(String tableName, String databaseName, String location, String sortColumnsString, String[] columns, String[] columnTypes, Configuration configuration) {
CarbonLoadModel loadModel;
CarbonTable carbonTable;
try {
String schemaFilePath = CarbonTablePath.getSchemaFilePath(location, configuration);
AbsoluteTableIdentifier absoluteTableIdentifier = AbsoluteTableIdentifier.from(location, databaseName, tableName, "");
if (FileFactory.getCarbonFile(schemaFilePath).exists()) {
carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier);
carbonTable.setTransactionalTable(true);
} else {
String carbonDataFile = CarbonUtil.getFilePathExternalFilePath(location, configuration);
if (carbonDataFile == null) {
carbonTable = CarbonTable.buildFromTableInfo(getTableInfo(tableName, databaseName, location, sortColumnsString, columns, columnTypes, new ArrayList<>()));
} else {
carbonTable = CarbonTable.buildFromTableInfo(SchemaReader.inferSchema(absoluteTableIdentifier, false, configuration));
}
carbonTable.setTransactionalTable(false);
}
} catch (SQLException | IOException e) {
throw new RuntimeException("Unable to fetch schema for the table: " + tableName, e);
}
CarbonLoadModelBuilder carbonLoadModelBuilder = new CarbonLoadModelBuilder(carbonTable);
Map<String, String> options = new HashMap<>();
options.put("fileheader", Strings.mkString(columns, ","));
try {
loadModel = carbonLoadModelBuilder.build(options, System.currentTimeMillis(), "");
} catch (InvalidLoadOptionException | IOException e) {
throw new RuntimeException(e);
}
loadModel.setSkipParsers();
loadModel.setMetrics(new DataLoadMetrics());
return loadModel;
}
use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class HiveCarbonUtil method getCarbonLoadModel.
public static CarbonLoadModel getCarbonLoadModel(Configuration tableProperties) {
String[] tableUniqueName = tableProperties.get("name").split("\\.");
String databaseName = tableUniqueName[0];
String tableName = tableUniqueName[1];
String tablePath = tableProperties.get(hive_metastoreConstants.META_TABLE_LOCATION);
String columns = tableProperties.get(hive_metastoreConstants.META_TABLE_COLUMNS);
String sortColumns = tableProperties.get("sort_columns");
String columnTypes = tableProperties.get(hive_metastoreConstants.META_TABLE_COLUMN_TYPES);
String partitionColumns = tableProperties.get(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
String partitionColumnTypes = tableProperties.get(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
if (partitionColumns != null) {
columns = columns + "," + partitionColumns;
columnTypes = columnTypes + ":" + partitionColumnTypes;
}
String[] columnTypeArray = splitSchemaStringToArray(columnTypes);
String complexDelim = tableProperties.get("complex_delimiter", "");
CarbonLoadModel carbonLoadModel = getCarbonLoadModel(tableName, databaseName, tablePath, sortColumns, columns.split(","), columnTypeArray, tableProperties);
carbonLoadModel.setCarbonTransactionalTable(true);
carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().setTransactionalTable(true);
for (String delim : complexDelim.split(",")) {
carbonLoadModel.setComplexDelimiter(delim);
}
return carbonLoadModel;
}
use of org.apache.carbondata.processing.loading.model.CarbonLoadModel in project carbondata by apache.
the class HiveCarbonUtil method getCarbonLoadModel.
public static CarbonLoadModel getCarbonLoadModel(Properties tableProperties, Configuration configuration) {
String[] tableUniqueName = tableProperties.getProperty("name").split("\\.");
String databaseName = tableUniqueName[0];
String tableName = tableUniqueName[1];
String tablePath = tableProperties.getProperty(hive_metastoreConstants.META_TABLE_LOCATION);
String columns = tableProperties.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS);
String sortColumns = tableProperties.getProperty("sort_columns");
String[] columnTypes = splitSchemaStringToArray(tableProperties.getProperty("columns.types"));
String complexDelim = tableProperties.getProperty("complex_delimiter", "");
CarbonLoadModel carbonLoadModel = getCarbonLoadModel(tableName, databaseName, tablePath, sortColumns, columns.split(","), columnTypes, configuration);
for (String delim : complexDelim.split(",")) {
carbonLoadModel.setComplexDelimiter(delim);
}
return carbonLoadModel;
}
Aggregations