use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.
the class FileOutputFormatContainer method getRecordWriter.
@Override
public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
// this needs to be manually set, under normal circumstances MR Task does this
setWorkOutputPath(context);
// Configure the output key and value classes.
// This is required for writing null as key for file based tables.
context.getConfiguration().set("mapred.output.key.class", NullWritable.class.getName());
String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString);
StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo();
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), storeInfo);
Class<? extends AbstractSerDe> serde = storageHandler.getSerDeClass();
AbstractSerDe sd = (AbstractSerDe) ReflectionUtils.newInstance(serde, context.getConfiguration());
context.getConfiguration().set("mapred.output.value.class", sd.getSerializedClass().getName());
RecordWriter<WritableComparable<?>, HCatRecord> rw;
if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()) {
// When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
// (That's because records can't be written until the values of the dynamic partitions are deduced.
// By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.)
rw = new DynamicPartitionFileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter) null, context);
} else {
Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir"));
Path childPath = new Path(parentDir, FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), context.getConfiguration().get("mapreduce.output.basename", "part")));
rw = new StaticPartitionFileRecordWriterContainer(getBaseOutputFormat().getRecordWriter(parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()), childPath.toString(), InternalUtil.createReporter(context)), context);
}
return rw;
}
use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.
the class HCatRecordReader method nextKeyValue.
/**
* Check if the wrapped RecordReader has another record, and if so convert it into an
* HCatRecord. We both check for records and convert here so a configurable percent of
* bad records can be tolerated.
*
* @return if there is a next record
* @throws IOException on error
* @throws InterruptedException on error
*/
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (currentKey == null) {
currentKey = baseRecordReader.createKey();
currentValue = baseRecordReader.createValue();
}
while (baseRecordReader.next(currentKey, currentValue)) {
HCatRecord r = null;
Throwable t = null;
errorTracker.incRecords();
try {
Object o = deserializer.deserialize(currentValue);
r = new LazyHCatRecord(o, deserializer.getObjectInspector());
} catch (Throwable throwable) {
t = throwable;
}
if (r == null) {
errorTracker.incErrors(t);
continue;
}
DefaultHCatRecord dr = new DefaultHCatRecord(outputSchema.size());
int i = 0;
for (String fieldName : outputSchema.getFieldNames()) {
if (dataSchema.getPosition(fieldName) != null) {
dr.set(i, r.get(fieldName, dataSchema));
} else {
dr.set(i, valuesNotInDataCols.get(fieldName));
}
i++;
}
currentHCatRecord = dr;
return true;
}
return false;
}
use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.
the class TestHCatPartitioned method columnOrderChangeTest.
// check behavior while change the order of columns
private void columnOrderChangeTest() throws Exception {
HCatSchema tableSchema = getTableSchema();
assertEquals(5, tableSchema.getFields().size());
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 10; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("co strvalue" + i);
objList.add("co str2value" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
Map<String, String> partitionMap = new HashMap<String, String>();
partitionMap.put("part1", "p1value8");
partitionMap.put("part0", "508");
Exception exc = null;
try {
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
} catch (IOException e) {
exc = e;
}
assertTrue(exc != null);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 10; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("co strvalue" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
if (isTableImmutable()) {
// Read should get 10 + 20 + 10 + 10 + 20 rows
runMRRead(70);
} else {
// +20 from the duplicate publish
runMRRead(90);
}
}
use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.
the class TestE2EScenarios method copyTable.
private void copyTable(String in, String out) throws IOException, InterruptedException {
Job ijob = new Job();
Job ojob = new Job();
HCatInputFormat inpy = new HCatInputFormat();
inpy.setInput(ijob, null, in);
HCatOutputFormat oupy = new HCatOutputFormat();
oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>()));
// Test HCatContext
System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent());
if (HCatContext.INSTANCE.getConf().isPresent()) {
System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get().getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT));
}
HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration());
System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString());
oupy.setSchema(ojob, tableSchema);
oupy.checkOutputSpecs(ojob);
OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration()));
oc.setupJob(ojob);
for (InputSplit split : inpy.getSplits(ijob)) {
TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration());
TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration());
RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext);
rr.initialize(split, rtaskContext);
OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext);
taskOc.setupTask(wtaskContext);
RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext);
while (rr.nextKeyValue()) {
rw.write(rr.getCurrentKey(), rr.getCurrentValue());
}
rw.close(wtaskContext);
taskOc.commitTask(wtaskContext);
rr.close();
}
oc.commitJob(ojob);
}
use of org.apache.hive.hcatalog.data.HCatRecord in project hive by apache.
the class DataReaderSlave method main.
public static void main(String[] args) throws IOException, ClassNotFoundException {
ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(args[0])));
ReaderContext cntxt = (ReaderContext) ois.readObject();
ois.close();
String[] inpSlitsToRead = args[1].split(",");
List<InputSplit> splits = cntxt.getSplits();
for (int i = 0; i < inpSlitsToRead.length; i++) {
InputSplit split = splits.get(Integer.parseInt(inpSlitsToRead[i]));
HCatReader reader = DataTransferFactory.getHCatReader(split, cntxt.getConf());
Iterator<HCatRecord> itr = reader.read();
File f = new File(args[2] + "-" + i);
f.delete();
BufferedWriter outFile = new BufferedWriter(new FileWriter(f));
while (itr.hasNext()) {
String rec = itr.next().toString().replaceFirst("\\s+$", "");
System.err.println(rec);
outFile.write(rec + "\n");
}
outFile.close();
}
}
Aggregations