Search in sources :

Example 1 with ObjectArrayWritable

use of org.apache.carbondata.hadoop.internal.ObjectArrayWritable in project carbondata by apache.

the class MapredCarbonOutputFormat method getHiveRecordWriter.

@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    ThreadLocalSessionInfo.setConfigurationToCurrentThread(jc);
    CarbonLoadModel carbonLoadModel = null;
    // Try to get loadmodel from JobConf.
    String encodedString = jc.get(LOAD_MODEL);
    if (encodedString != null) {
        carbonLoadModel = (CarbonLoadModel) ObjectSerializationUtil.convertStringToObject(encodedString);
    } else {
        // Try to get loadmodel from Container environment.
        encodedString = System.getenv("carbon");
        if (encodedString != null) {
            carbonLoadModel = (CarbonLoadModel) ObjectSerializationUtil.convertStringToObject(encodedString);
        } else {
            carbonLoadModel = HiveCarbonUtil.getCarbonLoadModel(tableProperties, jc);
        }
    }
    for (Map.Entry<Object, Object> entry : tableProperties.entrySet()) {
        carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getTableInfo().getFactTable().getTableProperties().put(entry.getKey().toString().toLowerCase(), entry.getValue().toString().toLowerCase());
    }
    String tablePath = FileFactory.getCarbonFile(carbonLoadModel.getTablePath()).getAbsolutePath();
    TaskAttemptID taskAttemptID = TaskAttemptID.forName(jc.get("mapred.task.id"));
    // null, so prepare a new ID.
    if (taskAttemptID == null) {
        SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMddHHmm");
        String jobTrackerId = formatter.format(new Date());
        taskAttemptID = new TaskAttemptID(jobTrackerId, 0, TaskType.MAP, 0, 0);
        // update the app name here, as in this class by default it will written by Hive
        CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, "presto");
    } else {
        carbonLoadModel.setTaskNo("" + taskAttemptID.getTaskID().getId());
    }
    TaskAttemptContextImpl context = new TaskAttemptContextImpl(jc, taskAttemptID);
    final boolean isHivePartitionedTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().isHivePartitionTable();
    PartitionInfo partitionInfo = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getPartitionInfo();
    final int partitionColumn = partitionInfo != null ? partitionInfo.getColumnSchemaList().size() : 0;
    if (carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().isHivePartitionTable()) {
        carbonLoadModel.getMetrics().addToPartitionPath(finalOutPath.toString());
        context.getConfiguration().set("carbon.outputformat.writepath", finalOutPath.toString());
    }
    CarbonTableOutputFormat.setLoadModel(jc, carbonLoadModel);
    org.apache.hadoop.mapreduce.RecordWriter<NullWritable, ObjectArrayWritable> re = super.getRecordWriter(context);
    return new FileSinkOperator.RecordWriter() {

        @Override
        public void write(Writable writable) throws IOException {
            try {
                ObjectArrayWritable objectArrayWritable = new ObjectArrayWritable();
                if (isHivePartitionedTable) {
                    Object[] actualRow = ((CarbonHiveRow) writable).getData();
                    Object[] newData = Arrays.copyOf(actualRow, actualRow.length + partitionColumn);
                    String[] partitionValues = finalOutPath.toString().substring(tablePath.length()).split("/");
                    for (int j = 0, i = actualRow.length; j < partitionValues.length; j++) {
                        if (partitionValues[j].contains("=")) {
                            newData[i++] = partitionValues[j].split("=")[1];
                        }
                    }
                    objectArrayWritable.set(newData);
                } else {
                    objectArrayWritable.set(((CarbonHiveRow) writable).getData());
                }
                re.write(NullWritable.get(), objectArrayWritable);
            } catch (InterruptedException e) {
                throw new IOException(e.getCause());
            }
        }

        @Override
        public void close(boolean b) throws IOException {
            try {
                re.close(context);
                ThreadLocalSessionInfo.setConfigurationToCurrentThread(context.getConfiguration());
            } catch (InterruptedException e) {
                throw new IOException(e);
            }
        }
    };
}
Also used : TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) ObjectArrayWritable(org.apache.carbondata.hadoop.internal.ObjectArrayWritable) IOException(java.io.IOException) ObjectArrayWritable(org.apache.carbondata.hadoop.internal.ObjectArrayWritable) NullWritable(org.apache.hadoop.io.NullWritable) Date(java.util.Date) RecordWriter(org.apache.hadoop.mapred.RecordWriter) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) Map(java.util.Map) SimpleDateFormat(java.text.SimpleDateFormat)

Example 2 with ObjectArrayWritable

use of org.apache.carbondata.hadoop.internal.ObjectArrayWritable in project carbondata by apache.

the class CarbonIUD method delete.

/**
 * This method deletes the rows at given path by applying the filterExpression
 *
 * @param path             is the table path on which delete is performed
 * @param filterExpression is the expression to delete the records
 * @throws IOException
 * @throws InterruptedException
 */
public void delete(String path, Expression filterExpression) throws IOException, InterruptedException {
    CarbonReader reader = CarbonReader.builder(path).projection(new String[] { CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID }).withHadoopConf(configuration).filter(filterExpression).build();
    RecordWriter<NullWritable, ObjectArrayWritable> deleteDeltaWriter = CarbonTableOutputFormat.getDeleteDeltaRecordWriter(path);
    ObjectArrayWritable writable = new ObjectArrayWritable();
    while (reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        writable.set(row);
        deleteDeltaWriter.write(NullWritable.get(), writable);
    }
    deleteDeltaWriter.close(null);
    reader.close();
}
Also used : ObjectArrayWritable(org.apache.carbondata.hadoop.internal.ObjectArrayWritable) NullWritable(org.apache.hadoop.io.NullWritable)

Example 3 with ObjectArrayWritable

use of org.apache.carbondata.hadoop.internal.ObjectArrayWritable in project carbondata by apache.

the class CarbonIUD method update.

/**
 * This method updates the rows at given path by applying the filterExpression
 *
 * @param path                        is the table path on which update is performed.
 * @param filterExpression            is the expression object to update the records
 * @param updatedColumnToValueMapping contains the mapping of updatedColumns to updatedValues
 * @throws IOException
 * @throws InterruptedException
 * @throws InvalidLoadOptionException
 */
public void update(String path, Expression filterExpression, Map<String, String> updatedColumnToValueMapping) throws IOException, InterruptedException, InvalidLoadOptionException {
    List<String> indexFiles = getCarbonIndexFile(path);
    Schema schema = CarbonSchemaReader.readSchema(indexFiles.get(0)).asOriginOrder();
    Field[] fields = schema.getFields();
    String[] projectionColumns = new String[fields.length + 1];
    for (int i = 0; i < fields.length; i++) {
        projectionColumns[i] = (fields[i].getFieldName());
    }
    projectionColumns[projectionColumns.length - 1] = CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID;
    CarbonWriter writer = CarbonWriter.builder().outputPath(path).withHadoopConf(configuration).withCsvInput(schema).writtenBy("CarbonIUD").build();
    CarbonReader reader = CarbonReader.builder(path).projection(projectionColumns).withHadoopConf(configuration).filter(filterExpression).build();
    RecordWriter<NullWritable, ObjectArrayWritable> deleteDeltaWriter = CarbonTableOutputFormat.getDeleteDeltaRecordWriter(path);
    ObjectArrayWritable writable = new ObjectArrayWritable();
    while (reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        writable.set(Arrays.copyOfRange(row, row.length - 1, row.length));
        for (Map.Entry<String, String> column : updatedColumnToValueMapping.entrySet()) {
            row[getColumnIndex(fields, column.getKey())] = column.getValue();
        }
        writer.write(Arrays.copyOfRange(row, 0, row.length - 1));
        deleteDeltaWriter.write(NullWritable.get(), writable);
    }
    deleteDeltaWriter.close(null);
    writer.close();
    reader.close();
}
Also used : ObjectArrayWritable(org.apache.carbondata.hadoop.internal.ObjectArrayWritable) NullWritable(org.apache.hadoop.io.NullWritable) Field(org.apache.carbondata.core.metadata.datatype.Field) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ObjectArrayWritable (org.apache.carbondata.hadoop.internal.ObjectArrayWritable)3 NullWritable (org.apache.hadoop.io.NullWritable)3 Map (java.util.Map)2 IOException (java.io.IOException)1 SimpleDateFormat (java.text.SimpleDateFormat)1 Date (java.util.Date)1 HashMap (java.util.HashMap)1 Field (org.apache.carbondata.core.metadata.datatype.Field)1 PartitionInfo (org.apache.carbondata.core.metadata.schema.PartitionInfo)1 CarbonLoadModel (org.apache.carbondata.processing.loading.model.CarbonLoadModel)1 Writable (org.apache.hadoop.io.Writable)1 RecordWriter (org.apache.hadoop.mapred.RecordWriter)1 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)1 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)1