use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class OrcRecordUpdater method parseKeyIndex.
static RecordIdentifier[] parseKeyIndex(Reader reader) {
String[] stripes;
try {
ByteBuffer val = reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME).duplicate();
stripes = utf8Decoder.decode(val).toString().split(";");
} catch (CharacterCodingException e) {
throw new IllegalArgumentException("Bad string encoding for " + OrcRecordUpdater.ACID_KEY_INDEX_NAME, e);
}
RecordIdentifier[] result = new RecordIdentifier[stripes.length];
for (int i = 0; i < stripes.length; ++i) {
if (stripes[i].length() != 0) {
String[] parts = stripes[i].split(",");
result[i] = new RecordIdentifier();
result[i].setValues(Long.parseLong(parts[0]), Integer.parseInt(parts[1]), Long.parseLong(parts[2]));
}
}
return result;
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class StreamingAssert method readRecords.
List<Record> readRecords() throws Exception {
if (currentDeltas.isEmpty()) {
throw new AssertionError("No data");
}
InputFormat<NullWritable, OrcStruct> inputFormat = new OrcInputFormat();
JobConf job = new JobConf();
job.set("mapred.input.dir", partitionLocation.toString());
job.set("bucket_count", Integer.toString(table.getSd().getNumBuckets()));
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
job.set(ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN.varname, "true");
job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString());
InputSplit[] splits = inputFormat.getSplits(job, 1);
assertEquals(1, splits.length);
final AcidRecordReader<NullWritable, OrcStruct> recordReader = (AcidRecordReader<NullWritable, OrcStruct>) inputFormat.getRecordReader(splits[0], job, Reporter.NULL);
NullWritable key = recordReader.createKey();
OrcStruct value = recordReader.createValue();
List<Record> records = new ArrayList<>();
while (recordReader.next(key, value)) {
RecordIdentifier recordIdentifier = recordReader.getRecordIdentifier();
Record record = new Record(new RecordIdentifier(recordIdentifier.getTransactionId(), recordIdentifier.getBucketId(), recordIdentifier.getRowId()), value.toString());
System.out.println(record);
records.add(record);
}
recordReader.close();
return records;
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class MutatorCoordinator method reconfigureState.
private void reconfigureState(OperationType operationType, List<String> newPartitionValues, Object record) throws WorkerException {
RecordIdentifier newRecordIdentifier = extractRecordIdentifier(operationType, newPartitionValues, record);
int newBucketId = newRecordIdentifier.getBucketProperty();
if (newPartitionValues == null) {
newPartitionValues = Collections.emptyList();
}
try {
if (partitionHasChanged(newPartitionValues)) {
if (table.createPartitions() && operationType == OperationType.INSERT) {
partitionHelper.createPartitionIfNotExists(newPartitionValues);
}
Path newPartitionPath = partitionHelper.getPathForPartition(newPartitionValues);
resetMutator(newBucketId, newPartitionValues, newPartitionPath);
} else if (bucketIdHasChanged(newBucketId)) {
resetMutator(newBucketId, partitionValues, partitionPath);
} else {
validateRecordSequence(operationType, newRecordIdentifier);
}
} catch (IOException e) {
throw new WorkerException("Failed to reset mutator when performing " + operationType + " of record: " + record, e);
}
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class MutatorCoordinator method extractRecordIdentifier.
private RecordIdentifier extractRecordIdentifier(OperationType operationType, List<String> newPartitionValues, Object record) throws BucketIdException {
RecordIdentifier recordIdentifier = recordInspector.extractRecordIdentifier(record);
int bucketIdFromRecord = BucketCodec.determineVersion(recordIdentifier.getBucketProperty()).decodeWriterId(recordIdentifier.getBucketProperty());
int computedBucketId = bucketIdResolver.computeBucketId(record);
if (operationType != OperationType.DELETE && bucketIdFromRecord != computedBucketId) {
throw new BucketIdException("RecordIdentifier.bucketId != computed bucketId (" + computedBucketId + ") for record " + recordIdentifier + " in partition " + newPartitionValues + ".");
}
return recordIdentifier;
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class StreamingAssert method readRecords.
/**
* TODO: this would be more flexible doing a SQL select statement rather than using InputFormat directly
* see {@link org.apache.hive.hcatalog.streaming.TestStreaming#checkDataWritten2(Path, long, long, int, String, String...)}
* @param numSplitsExpected
* @return
* @throws Exception
*/
List<Record> readRecords(int numSplitsExpected) throws Exception {
if (currentDeltas.isEmpty()) {
throw new AssertionError("No data");
}
InputFormat<NullWritable, OrcStruct> inputFormat = new OrcInputFormat();
JobConf job = new JobConf();
job.set("mapred.input.dir", partitionLocation.toString());
job.set(hive_metastoreConstants.BUCKET_COUNT, Integer.toString(table.getSd().getNumBuckets()));
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
AcidUtils.setAcidOperationalProperties(job, true, null);
job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.toString());
InputSplit[] splits = inputFormat.getSplits(job, 1);
assertEquals(numSplitsExpected, splits.length);
List<Record> records = new ArrayList<>();
for (InputSplit is : splits) {
final AcidRecordReader<NullWritable, OrcStruct> recordReader = (AcidRecordReader<NullWritable, OrcStruct>) inputFormat.getRecordReader(is, job, Reporter.NULL);
NullWritable key = recordReader.createKey();
OrcStruct value = recordReader.createValue();
while (recordReader.next(key, value)) {
RecordIdentifier recordIdentifier = recordReader.getRecordIdentifier();
Record record = new Record(new RecordIdentifier(recordIdentifier.getWriteId(), recordIdentifier.getBucketProperty(), recordIdentifier.getRowId()), value.toString());
System.out.println(record);
records.add(record);
}
recordReader.close();
}
return records;
}
Aggregations