use of org.apache.carbondata.hadoop.internal.ObjectArrayWritable in project carbondata by apache.
the class MapredCarbonOutputFormat method getHiveRecordWriter.
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
ThreadLocalSessionInfo.setConfigurationToCurrentThread(jc);
CarbonLoadModel carbonLoadModel = null;
// Try to get loadmodel from JobConf.
String encodedString = jc.get(LOAD_MODEL);
if (encodedString != null) {
carbonLoadModel = (CarbonLoadModel) ObjectSerializationUtil.convertStringToObject(encodedString);
} else {
// Try to get loadmodel from Container environment.
encodedString = System.getenv("carbon");
if (encodedString != null) {
carbonLoadModel = (CarbonLoadModel) ObjectSerializationUtil.convertStringToObject(encodedString);
} else {
carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(tableProperties, jc);
}
}
for (Map.Entry<Object, Object> entry : tableProperties.entrySet()) {
carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getTableInfo().getFactTable().getTableProperties().put(entry.getKey().toString().toLowerCase(), entry.getValue().toString().toLowerCase());
}
String tablePath = FileFactory.getCarbonFile(carbonLoadModel.getTablePath()).getAbsolutePath();
TaskAttemptID taskAttemptID = TaskAttemptID.forName(jc.get("mapred.task.id"));
// null, so prepare a new ID.
if (taskAttemptID == null) {
SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmm");
String jobTrackerId = formatter.format(new Date());
taskAttemptID = new TaskAttemptID(jobTrackerId, 0, TaskType.MAP, 0, 0);
// update the app name here, as in this class by default it will written by Hive
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, "presto");
} else {
carbonLoadModel.setTaskNo("" + taskAttemptID.getTaskID().getId());
}
TaskAttemptContextImpl context = new TaskAttemptContextImpl(jc, taskAttemptID);
final boolean isHivePartitionedTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().isHivePartitionTable();
PartitionInfo partitionInfo = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getPartitionInfo();
final int partitionColumn = partitionInfo != null ? partitionInfo.getColumnSchemaList().size() : 0;
if (carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().isHivePartitionTable()) {
carbonLoadModel.getMetrics().addToPartitionPath(finalOutPath.toString());
context.getConfiguration().set("carbon.outputformat.writepath", finalOutPath.toString());
}
CarbonTableOutputFormat.setLoadModel(jc, carbonLoadModel);
org.apache.hadoop.mapreduce.RecordWriter<NullWritable, ObjectArrayWritable> re = super.getRecordWriter(context);
return new FileSinkOperator.RecordWriter() {
@Override
public void write(Writable writable) throws IOException {
try {
ObjectArrayWritable objectArrayWritable = new ObjectArrayWritable();
if (isHivePartitionedTable) {
Object[] actualRow = ((CarbonHiveRow) writable).getData();
Object[] newData = Arrays.copyOf(actualRow, actualRow.length + partitionColumn);
String[] partitionValues = finalOutPath.toString().substring(tablePath.length()).split("/");
for (int j = 0, i = actualRow.length; j < partitionValues.length; j++) {
if (partitionValues[j].contains("=")) {
newData[i++] = partitionValues[j].split("=")[1];
}
}
objectArrayWritable.set(newData);
} else {
objectArrayWritable.set(((CarbonHiveRow) writable).getData());
}
re.write(NullWritable.get(), objectArrayWritable);
} catch (InterruptedException e) {
throw new IOException(e.getCause());
}
}
@Override
public void close(boolean b) throws IOException {
try {
re.close(context);
ThreadLocalSessionInfo.setConfigurationToCurrentThread(context.getConfiguration());
} catch (InterruptedException e) {
throw new IOException(e);
}
}
};
}
use of org.apache.carbondata.hadoop.internal.ObjectArrayWritable in project carbondata by apache.
the class CarbonIUD method delete.
/**
* This method deletes the rows at given path by applying the filterExpression
*
* @param path is the table path on which delete is performed
* @param filterExpression is the expression to delete the records
* @throws IOException
* @throws InterruptedException
*/
public void delete(String path, Expression filterExpression) throws IOException, InterruptedException {
CarbonReader reader = CarbonReader.builder(path).projection(new String[] { CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID }).withHadoopConf(configuration).filter(filterExpression).build();
RecordWriter<NullWritable, ObjectArrayWritable> deleteDeltaWriter = CarbonTableOutputFormat.getDeleteDeltaRecordWriter(path);
ObjectArrayWritable writable = new ObjectArrayWritable();
while (reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
writable.set(row);
deleteDeltaWriter.write(NullWritable.get(), writable);
}
deleteDeltaWriter.close(null);
reader.close();
}
use of org.apache.carbondata.hadoop.internal.ObjectArrayWritable in project carbondata by apache.
the class CarbonIUD method update.
/**
* This method updates the rows at given path by applying the filterExpression
*
* @param path is the table path on which update is performed.
* @param filterExpression is the expression object to update the records
* @param updatedColumnToValueMapping contains the mapping of updatedColumns to updatedValues
* @throws IOException
* @throws InterruptedException
* @throws InvalidLoadOptionException
*/
public void update(String path, Expression filterExpression, Map<String, String> updatedColumnToValueMapping) throws IOException, InterruptedException, InvalidLoadOptionException {
List<String> indexFiles = getCarbonIndexFile(path);
Schema schema = CarbonSchemaReader.readSchema(indexFiles.get(0)).asOriginOrder();
Field[] fields = schema.getFields();
String[] projectionColumns = new String[fields.length + 1];
for (int i = 0; i < fields.length; i++) {
projectionColumns[i] = (fields[i].getFieldName());
}
projectionColumns[projectionColumns.length - 1] = CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID;
CarbonWriter writer = CarbonWriter.builder().outputPath(path).withHadoopConf(configuration).withCsvInput(schema).writtenBy("CarbonIUD").build();
CarbonReader reader = CarbonReader.builder(path).projection(projectionColumns).withHadoopConf(configuration).filter(filterExpression).build();
RecordWriter<NullWritable, ObjectArrayWritable> deleteDeltaWriter = CarbonTableOutputFormat.getDeleteDeltaRecordWriter(path);
ObjectArrayWritable writable = new ObjectArrayWritable();
while (reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
writable.set(Arrays.copyOfRange(row, row.length - 1, row.length));
for (Map.Entry<String, String> column : updatedColumnToValueMapping.entrySet()) {
row[getColumnIndex(fields, column.getKey())] = column.getValue();
}
writer.write(Arrays.copyOfRange(row, 0, row.length - 1));
deleteDeltaWriter.write(NullWritable.get(), writable);
}
deleteDeltaWriter.close(null);
writer.close();
reader.close();
}
Aggregations