use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class TestMutations method testMulti.
@Test
public void testMulti() throws Exception {
Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).create(metaStoreClient);
MutatorClient client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), true).metaStoreUri(metaStoreUri).build();
client.connect();
Transaction transaction = client.newTransaction();
List<AcidTable> destinations = client.getTables();
transaction.begin();
MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN, BUCKET_COLUMN_INDEXES);
MutatorCoordinator coordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
MutableRecord asiaIndiaRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1, "Hello streaming"));
MutableRecord europeUkRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(2, "Hello streaming"));
MutableRecord europeFranceRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(3, "Hello streaming"));
MutableRecord europeFranceRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(4, "Bonjour streaming"));
coordinator.insert(ASIA_INDIA, asiaIndiaRecord1);
coordinator.insert(EUROPE_UK, europeUkRecord1);
coordinator.insert(EUROPE_FRANCE, europeFranceRecord1);
coordinator.insert(EUROPE_FRANCE, europeFranceRecord2);
coordinator.close();
transaction.commit();
// ASIA_INDIA
StreamingAssert streamingAssertions = assertionFactory.newStreamingAssert(table, ASIA_INDIA);
streamingAssertions.assertMinWriteId(1L);
streamingAssertions.assertMaxWriteId(1L);
streamingAssertions.assertExpectedFileCount(1);
List<Record> readRecords = streamingAssertions.readRecords();
assertThat(readRecords.size(), is(1));
assertThat(readRecords.get(0).getRow(), is("{1, Hello streaming}"));
assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, encodeBucket(0), 0L)));
// EUROPE_UK
streamingAssertions = assertionFactory.newStreamingAssert(table, EUROPE_UK);
streamingAssertions.assertMinWriteId(1L);
streamingAssertions.assertMaxWriteId(1L);
streamingAssertions.assertExpectedFileCount(1);
readRecords = streamingAssertions.readRecords();
assertThat(readRecords.size(), is(1));
assertThat(readRecords.get(0).getRow(), is("{2, Hello streaming}"));
assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, encodeBucket(0), 0L)));
// EUROPE_FRANCE
streamingAssertions = assertionFactory.newStreamingAssert(table, EUROPE_FRANCE);
streamingAssertions.assertMinWriteId(1L);
streamingAssertions.assertMaxWriteId(1L);
streamingAssertions.assertExpectedFileCount(1);
readRecords = streamingAssertions.readRecords();
assertThat(readRecords.size(), is(2));
assertThat(readRecords.get(0).getRow(), is("{3, Hello streaming}"));
assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, encodeBucket(0), 0L)));
assertThat(readRecords.get(1).getRow(), is("{4, Bonjour streaming}"));
assertThat(readRecords.get(1).getRecordIdentifier(), is(new RecordIdentifier(1L, encodeBucket(0), 1L)));
client.close();
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class TestInputOutputFormat method testAcidReadPastLastStripeOffset.
@Test
public void testAcidReadPastLastStripeOffset() throws Exception {
Path baseDir = new Path(workDir, "base_00100");
testFilePath = new Path(baseDir, "bucket_00000");
fs.mkdirs(baseDir);
fs.delete(testFilePath, true);
TypeDescription fileSchema = TypeDescription.fromString("struct<operation:int," + "originalTransaction:bigint,bucket:int,rowId:bigint," + "currentTransaction:bigint," + "row:struct<a:int,b:struct<c:int>,d:string>>");
OrcRecordUpdater.KeyIndexBuilder indexBuilder = new OrcRecordUpdater.KeyIndexBuilder("test");
OrcFile.WriterOptions options = OrcFile.writerOptions(conf).fileSystem(fs).setSchema(fileSchema).compress(org.apache.orc.CompressionKind.NONE).callback(indexBuilder).stripeSize(128);
// Create ORC file with small stripe size so we can write multiple stripes.
Writer writer = OrcFile.createWriter(testFilePath, options);
VectorizedRowBatch batch = fileSchema.createRowBatch(TypeDescription.RowBatchVersion.USE_DECIMAL64, 1000);
batch.size = 1000;
StructColumnVector scv = (StructColumnVector) batch.cols[5];
// operation
batch.cols[0].isRepeating = true;
((LongColumnVector) batch.cols[0]).vector[0] = OrcRecordUpdater.INSERT_OPERATION;
// original transaction
batch.cols[1].isRepeating = true;
((LongColumnVector) batch.cols[1]).vector[0] = 1;
// bucket
batch.cols[2].isRepeating = true;
((LongColumnVector) batch.cols[2]).vector[0] = BucketCodec.V1.encode(new AcidOutputFormat.Options(conf).bucket(0).statementId(0));
// current transaction
batch.cols[4].isRepeating = true;
((LongColumnVector) batch.cols[4]).vector[0] = 1;
LongColumnVector lcv = (LongColumnVector) ((StructColumnVector) scv.fields[1]).fields[0];
for (int r = 0; r < 1000; r++) {
// row id
((LongColumnVector) batch.cols[3]).vector[r] = r;
// a
((LongColumnVector) scv.fields[0]).vector[r] = r * 42;
// b.c
lcv.vector[r] = r * 10001;
// d
((BytesColumnVector) scv.fields[2]).setVal(r, Integer.toHexString(r).getBytes(StandardCharsets.UTF_8));
indexBuilder.addKey(OrcRecordUpdater.INSERT_OPERATION, 1, (int) (((LongColumnVector) batch.cols[2]).vector[0]), r);
}
// Minimum 5000 rows per stripe.
for (int idx = 0; idx < 8; ++idx) {
writer.addRowBatch(batch);
// bucket
batch.cols[2].isRepeating = true;
((LongColumnVector) batch.cols[2]).vector[0] = BucketCodec.V1.encode(new AcidOutputFormat.Options(conf).bucket(0).statementId(idx + 1));
for (long row_id : ((LongColumnVector) batch.cols[3]).vector) {
indexBuilder.addKey(OrcRecordUpdater.INSERT_OPERATION, 1, (int) (((LongColumnVector) batch.cols[2]).vector[0]), row_id);
}
}
writer.close();
long fileLength = fs.getFileStatus(testFilePath).getLen();
// Find the last stripe.
List<StripeInformation> stripes;
RecordIdentifier[] keyIndex;
try (Reader orcReader = OrcFile.createReader(fs, testFilePath)) {
stripes = orcReader.getStripes();
keyIndex = OrcRecordUpdater.parseKeyIndex(orcReader);
}
StripeInformation lastStripe = stripes.get(stripes.size() - 1);
long lastStripeOffset = lastStripe.getOffset();
long lastStripeLength = lastStripe.getLength();
Assert.assertEquals("Index length doesn't match number of stripes", stripes.size(), keyIndex.length);
Assert.assertEquals("1st Index entry mismatch", new RecordIdentifier(1, 536870916, 999), keyIndex[0]);
Assert.assertEquals("2nd Index entry mismatch", new RecordIdentifier(1, 536870920, 999), keyIndex[1]);
// test with same schema with include
conf.set(ValidTxnList.VALID_TXNS_KEY, "100:99:");
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "a,b,d");
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "int,struct<c:int>,string");
conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false");
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2");
LOG.info("Last stripe " + stripes.size() + ", offset " + lastStripeOffset + ", length " + lastStripeLength);
// Specify an OrcSplit that starts beyond the offset of the last stripe.
OrcSplit split = new OrcSplit(testFilePath, null, lastStripeOffset + 1, lastStripeLength, new String[0], null, false, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), fileLength, fileLength, workDir, null);
OrcInputFormat inputFormat = new OrcInputFormat();
AcidInputFormat.RowReader<OrcStruct> reader = inputFormat.getReader(split, new AcidInputFormat.Options(conf));
int record = 0;
OrcRawRecordMerger.ReaderKey id = reader.createKey();
OrcStruct struct = reader.createValue();
// not be read. Thus 0 records.
while (reader.next(id, struct)) {
record += 1;
}
assertEquals(0, record);
reader.close();
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class OrcRawRecordMerger method discoverOriginalKeyBounds.
/**
* Find the key range for original bucket files.
* For unbucketed tables the insert event data is still written to bucket_N file except that
* N is just a writer ID - it still matches {@link RecordIdentifier#getBucketProperty()}. For
* 'original' files (ubucketed) the same applies. A file 000000_0 encodes a taskId/wirterId and
* at read time we synthesize {@link RecordIdentifier#getBucketProperty()} to match the file name
* and so the same bucketProperty is used here to create minKey/maxKey, i.e. these keys are valid
* to filter data from delete_delta files even for unbucketed tables.
* @param reader the reader
* @param bucket the bucket number we are reading
* @param options the options for reading with
* @throws IOException
*/
private KeyInterval discoverOriginalKeyBounds(Reader reader, int bucket, Reader.Options options, Configuration conf, Options mergerOptions) throws IOException {
long rowLength = 0;
long rowOffset = 0;
// this would usually be at block boundary
long offset = options.getOffset();
// this would usually be at block boundary
long maxOffset = options.getMaxOffset();
boolean isTail = true;
RecordIdentifier minKey = null;
RecordIdentifier maxKey = null;
TransactionMetaData tfp = TransactionMetaData.findWriteIDForSynthetcRowIDs(mergerOptions.getBucketPath(), mergerOptions.getRootPath(), conf);
int bucketProperty = encodeBucketId(conf, bucket, tfp.statementId);
/**
* options.getOffset() and getMaxOffset() would usually be at block boundary which doesn't
* necessarily match stripe boundary. So we want to come up with minKey to be one before the 1st
* row of the first stripe that starts after getOffset() and maxKey to be the last row of the
* stripe that contains getMaxOffset(). This breaks if getOffset() and getMaxOffset() are inside
* the sames tripe - in this case we have minKey & isTail=false but rowLength is never set.
* (HIVE-16953)
*/
for (StripeInformation stripe : reader.getStripes()) {
if (offset > stripe.getOffset()) {
rowOffset += stripe.getNumberOfRows();
} else if (maxOffset > stripe.getOffset()) {
rowLength += stripe.getNumberOfRows();
} else {
isTail = false;
break;
}
}
if (rowOffset > 0) {
minKey = new RecordIdentifier(tfp.syntheticWriteId, bucketProperty, rowOffset - 1);
}
if (!isTail) {
maxKey = new RecordIdentifier(tfp.syntheticWriteId, bucketProperty, rowOffset + rowLength - 1);
}
return new KeyInterval(minKey, maxKey);
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class VectorizedOrcAcidRowBatchReader method findMinMaxKeys.
/**
* A given ORC reader will always process one or more whole stripes but the
* split boundaries may not line up with stripe boundaries if the InputFormat
* doesn't understand ORC specifics. So first we need to figure out which
* stripe(s) we are reading.
*
* Suppose txn1 writes 100K rows
* and txn2 writes 100 rows so we have events
* {1,0,0}....{1,0,100K},{2,0,0}...{2,0,100} in 2 files
* After compaction we may have 2 stripes
* {1,0,0}...{1,0,90K},{1,0,90001}...{2,0,100}
*
* Now suppose there is a delete stmt that deletes every row. So when we load
* the 2nd stripe, if we just look at stripe {@link ColumnStatistics},
* minKey={1,0,100} and maxKey={2,0,90001}, all but the 1st 100 delete events
* will get loaded. But with {@link OrcRecordUpdater#ACID_KEY_INDEX_NAME},
* minKey={1,0,90001} and maxKey={2,0,100} so we only load about 10K deletes.
*
* Also, even with Query Based compactor (once we have it), FileSinkOperator
* uses OrcRecordWriter to write to file, so we should have the
* hive.acid.index in place.
*
* If reading the 1st stripe, we don't have the start event, so we'll get it
* from stats, which will strictly speaking be accurate only wrt writeId and
* bucket but that is good enough.
*
* @return empty <code>KeyInterval</code> if KeyInterval could not be
* determined
*/
private OrcRawRecordMerger.KeyInterval findMinMaxKeys(OrcSplit orcSplit, Configuration conf, Reader.Options deleteEventReaderOptions) throws IOException {
final boolean noDeleteDeltas = orcSplit.getDeltas().size() == 0;
if (!HiveConf.getBoolVar(conf, ConfVars.FILTER_DELETE_EVENTS) || noDeleteDeltas) {
LOG.debug("findMinMaxKeys() " + ConfVars.FILTER_DELETE_EVENTS + "=false");
return new OrcRawRecordMerger.KeyInterval(null, null);
}
try (VectorizedOrcAcidRowBatchReader.ReaderData orcReaderData = getOrcReaderData(orcSplit.getPath(), conf, cacheTag, orcSplit.getFileKey())) {
if (orcSplit.isOriginal()) {
/**
* Among originals we may have files with _copy_N suffix. To properly
* generate a synthetic ROW___ID for them we need
* {@link OffsetAndBucketProperty} which could be an expensive computation
* if there are lots of copy_N files for a given bucketId. But unless
* there are delete events, we often don't need synthetic ROW__IDs at all.
* Kind of chicken-and-egg - deal with this later.
* See {@link OrcRawRecordMerger#discoverOriginalKeyBounds(Reader, int,
* Reader.Options, Configuration, OrcRawRecordMerger.Options)}
*/
LOG.debug("findMinMaxKeys(original split)");
return findOriginalMinMaxKeys(orcSplit, orcReaderData.orcTail, deleteEventReaderOptions);
}
List<StripeInformation> stripes = orcReaderData.orcTail.getStripes();
final long splitStart = orcSplit.getStart();
final long splitEnd = splitStart + orcSplit.getLength();
int firstStripeIndex = -1;
int lastStripeIndex = -1;
for (int i = 0; i < stripes.size(); i++) {
StripeInformation stripe = stripes.get(i);
long stripeEnd = stripe.getOffset() + stripe.getLength();
if (firstStripeIndex == -1 && stripe.getOffset() >= splitStart) {
firstStripeIndex = i;
}
if (lastStripeIndex == -1 && splitEnd <= stripeEnd) {
lastStripeIndex = i;
}
}
if (lastStripeIndex == -1) {
// split goes to the EOF which is > end of stripe since file has a footer
assert stripes.get(stripes.size() - 1).getOffset() + stripes.get(stripes.size() - 1).getLength() < splitEnd;
lastStripeIndex = stripes.size() - 1;
}
if (firstStripeIndex > lastStripeIndex || firstStripeIndex == -1) {
/**
* If the firstStripeIndex was set after the lastStripeIndex the split lies entirely within a single stripe.
* In case the split lies entirely within the last stripe, the firstStripeIndex will never be found, hence the
* second condition.
* In this case, the reader for this split will not read any data.
* See {@link org.apache.orc.impl.RecordReaderImpl#RecordReaderImpl
* Create a KeyInterval such that no delete delta records are loaded into memory in the deleteEventRegistry.
*/
long minRowId = 1;
long maxRowId = 0;
int minBucketProp = 1;
int maxBucketProp = 0;
OrcRawRecordMerger.KeyInterval keyIntervalTmp = new OrcRawRecordMerger.KeyInterval(new RecordIdentifier(1, minBucketProp, minRowId), new RecordIdentifier(0, maxBucketProp, maxRowId));
setSARG(keyIntervalTmp, deleteEventReaderOptions, minBucketProp, maxBucketProp, minRowId, maxRowId);
LOG.info("findMinMaxKeys(): " + keyIntervalTmp + " stripes(" + firstStripeIndex + "," + lastStripeIndex + ")");
return keyIntervalTmp;
}
if (firstStripeIndex == -1 || lastStripeIndex == -1) {
// this should not happen but... if we don't know which stripe(s) are
// involved we can't figure out min/max bounds
LOG.warn("Could not find stripe (" + firstStripeIndex + "," + lastStripeIndex + ")");
return new OrcRawRecordMerger.KeyInterval(null, null);
}
RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(orcReaderData.orcTail);
if (keyIndex == null) {
LOG.warn("Could not find keyIndex (" + firstStripeIndex + "," + lastStripeIndex + "," + stripes.size() + ")");
}
if (keyIndex != null && keyIndex.length != stripes.size()) {
LOG.warn("keyIndex length doesn't match (" + firstStripeIndex + "," + lastStripeIndex + "," + stripes.size() + "," + keyIndex.length + ")");
return new OrcRawRecordMerger.KeyInterval(null, null);
}
/**
* If {@link OrcConf.ROW_INDEX_STRIDE} is set to 0 all column stats on
* ORC file are disabled though objects for them exist but and have
* min/max set to MIN_LONG/MAX_LONG so we only use column stats if they
* are actually computed. Streaming ingest used to set it 0 and Minor
* compaction so there are lots of legacy files with no (rather, bad)
* column stats
*/
boolean columnStatsPresent = orcReaderData.orcTail.getFooter().getRowIndexStride() > 0;
if (!columnStatsPresent) {
LOG.debug("findMinMaxKeys() No ORC column stats");
}
List<StripeStatistics> stats = orcReaderData.reader.getVariantStripeStatistics(null);
assert stripes.size() == stats.size() : "str.s=" + stripes.size() + " sta.s=" + stats.size();
RecordIdentifier minKey = null;
if (firstStripeIndex > 0 && keyIndex != null) {
// valid keys are strictly > than this key
minKey = keyIndex[firstStripeIndex - 1];
// add 1 to make comparison >= to match the case of 0th stripe
minKey.setRowId(minKey.getRowId() + 1);
} else {
if (columnStatsPresent) {
minKey = getKeyInterval(stats.get(firstStripeIndex).getColumnStatistics()).getMinKey();
}
}
RecordIdentifier maxKey = null;
if (keyIndex != null) {
maxKey = keyIndex[lastStripeIndex];
} else {
if (columnStatsPresent) {
maxKey = getKeyInterval(stats.get(lastStripeIndex).getColumnStatistics()).getMaxKey();
}
}
OrcRawRecordMerger.KeyInterval keyInterval = new OrcRawRecordMerger.KeyInterval(minKey, maxKey);
LOG.info("findMinMaxKeys(): " + keyInterval + " stripes(" + firstStripeIndex + "," + lastStripeIndex + ")");
long minBucketProp = Long.MAX_VALUE, maxBucketProp = Long.MIN_VALUE;
long minRowId = Long.MAX_VALUE, maxRowId = Long.MIN_VALUE;
if (columnStatsPresent) {
/**
* figure out min/max bucket, rowid for push down. This is different from
* min/max ROW__ID because ROW__ID comparison uses dictionary order on two
* tuples (a,b,c), but PPD can only do
* (a between (x,y) and b between(x1,y1) and c between(x2,y2))
* Consider:
* (0,536936448,0), (0,536936448,2), (10000001,536936448,0)
* 1st is min ROW_ID, 3r is max ROW_ID
* and Delete events (0,536936448,2),....,(10000001,536936448,1000000)
* So PPD based on min/max ROW_ID would have 0<= rowId <=0 which will
* miss this delete event. But we still want PPD to filter out data if
* possible.
*
* So use stripe stats to find proper min/max for bucketProp and rowId
* writeId is the same in both cases
*/
for (int i = firstStripeIndex; i <= lastStripeIndex; i++) {
OrcRawRecordMerger.KeyInterval key = getKeyInterval(stats.get(i).getColumnStatistics());
if (key.getMinKey().getBucketProperty() < minBucketProp) {
minBucketProp = key.getMinKey().getBucketProperty();
}
if (key.getMaxKey().getBucketProperty() > maxBucketProp) {
maxBucketProp = key.getMaxKey().getBucketProperty();
}
if (key.getMinKey().getRowId() < minRowId) {
minRowId = key.getMinKey().getRowId();
}
if (key.getMaxKey().getRowId() > maxRowId) {
maxRowId = key.getMaxKey().getRowId();
}
}
}
if (minBucketProp == Long.MAX_VALUE)
minBucketProp = Long.MIN_VALUE;
if (maxBucketProp == Long.MIN_VALUE)
maxBucketProp = Long.MAX_VALUE;
if (minRowId == Long.MAX_VALUE)
minRowId = Long.MIN_VALUE;
if (maxRowId == Long.MIN_VALUE)
maxRowId = Long.MAX_VALUE;
setSARG(keyInterval, deleteEventReaderOptions, minBucketProp, maxBucketProp, minRowId, maxRowId);
return keyInterval;
}
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class VectorizedOrcAcidRowBatchReader method setSARG.
/**
* Generates a SearchArgument to push down to delete_delta files.
*
* Note that bucket is a bit packed int, so even thought all delete events
* for a given split have the same bucket ID but not the same "bucket" value
* {@link BucketCodec}
*/
private void setSARG(OrcRawRecordMerger.KeyInterval keyInterval, Reader.Options deleteEventReaderOptions, long minBucketProp, long maxBucketProp, long minRowId, long maxRowId) {
SearchArgument.Builder b = null;
if (keyInterval.getMinKey() != null) {
RecordIdentifier k = keyInterval.getMinKey();
b = SearchArgumentFactory.newBuilder();
// not(ot < 7) -> ot >=7
b.startAnd().startNot().lessThan(OrcRecordUpdater.ORIGINAL_WRITEID_FIELD_NAME, PredicateLeaf.Type.LONG, k.getWriteId()).end();
b.startNot().lessThan(OrcRecordUpdater.BUCKET_FIELD_NAME, PredicateLeaf.Type.LONG, minBucketProp).end();
b.startNot().lessThan(OrcRecordUpdater.ROW_ID_FIELD_NAME, PredicateLeaf.Type.LONG, minRowId).end();
b.end();
}
if (keyInterval.getMaxKey() != null) {
RecordIdentifier k = keyInterval.getMaxKey();
if (b == null) {
b = SearchArgumentFactory.newBuilder();
}
b.startAnd().lessThanEquals(OrcRecordUpdater.ORIGINAL_WRITEID_FIELD_NAME, PredicateLeaf.Type.LONG, k.getWriteId());
b.lessThanEquals(OrcRecordUpdater.BUCKET_FIELD_NAME, PredicateLeaf.Type.LONG, maxBucketProp);
b.lessThanEquals(OrcRecordUpdater.ROW_ID_FIELD_NAME, PredicateLeaf.Type.LONG, maxRowId);
b.end();
}
if (b != null) {
deleteEventSarg = b.build();
LOG.info("deleteReader SARG(" + deleteEventSarg + ") ");
deleteEventReaderOptions.searchArgument(deleteEventSarg, new String[] { OrcRecordUpdater.ORIGINAL_WRITEID_FIELD_NAME, OrcRecordUpdater.BUCKET_FIELD_NAME, OrcRecordUpdater.ROW_ID_FIELD_NAME });
return;
}
deleteEventReaderOptions.searchArgument(null, null);
}
Aggregations