use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class TestCompactor method checkExpectedTxnsPresent.
private void checkExpectedTxnsPresent(Path base, Path[] deltas, String columnNamesProperty, String columnTypesProperty, int bucket, long min, long max, int numBuckets) throws IOException {
ValidWriteIdList writeIdList = new ValidWriteIdList() {
@Override
public String getTableName() {
return "AcidTable";
}
@Override
public boolean isWriteIdValid(long writeid) {
return true;
}
@Override
public RangeResponse isWriteIdRangeValid(long minWriteId, long maxWriteId) {
return RangeResponse.ALL;
}
@Override
public String writeToString() {
return "";
}
@Override
public void readFromString(String src) {
}
@Override
public Long getMinOpenWriteId() {
return null;
}
@Override
public long getHighWatermark() {
return Long.MAX_VALUE;
}
@Override
public long[] getInvalidWriteIds() {
return new long[0];
}
@Override
public boolean isValidBase(long writeid) {
return true;
}
@Override
public boolean isWriteIdAborted(long writeid) {
return true;
}
@Override
public RangeResponse isWriteIdRangeAborted(long minWriteId, long maxWriteId) {
return RangeResponse.ALL;
}
};
OrcInputFormat aif = new OrcInputFormat();
Configuration conf = new Configuration();
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesProperty);
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, columnTypesProperty);
conf.set(hive_metastoreConstants.BUCKET_COUNT, Integer.toString(numBuckets));
HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
AcidInputFormat.RawReader<OrcStruct> reader = aif.getRawReader(conf, true, bucket, writeIdList, base, deltas);
RecordIdentifier identifier = reader.createKey();
OrcStruct value = reader.createValue();
long currentTxn = min;
boolean seenCurrentTxn = false;
while (reader.next(identifier, value)) {
if (!seenCurrentTxn) {
Assert.assertEquals(currentTxn, identifier.getWriteId());
seenCurrentTxn = true;
}
if (currentTxn != identifier.getWriteId()) {
Assert.assertEquals(currentTxn + 1, identifier.getWriteId());
currentTxn++;
}
}
Assert.assertEquals(max, currentTxn);
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class TestMutations method testTransactionBatchCommitPartitioned.
@Test
public void testTransactionBatchCommitPartitioned() throws Exception {
Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).create(metaStoreClient);
MutatorClient client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), true).metaStoreUri(metaStoreUri).build();
client.connect();
Transaction transaction = client.newTransaction();
List<AcidTable> destinations = client.getTables();
transaction.begin();
MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN, BUCKET_COLUMN_INDEXES);
MutatorCoordinator coordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
BucketIdResolver bucketIdAppender = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
MutableRecord record = (MutableRecord) bucketIdAppender.attachBucketIdToRecord(new MutableRecord(1, "Hello streaming"));
coordinator.insert(ASIA_INDIA, record);
coordinator.close();
transaction.commit();
StreamingAssert streamingAssertions = assertionFactory.newStreamingAssert(table, ASIA_INDIA);
streamingAssertions.assertMinWriteId(1L);
streamingAssertions.assertMaxWriteId(1L);
streamingAssertions.assertExpectedFileCount(1);
List<Record> readRecords = streamingAssertions.readRecords();
assertThat(readRecords.size(), is(1));
assertThat(readRecords.get(0).getRow(), is("{1, Hello streaming}"));
assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, encodeBucket(0), 0L)));
assertThat(transaction.getState(), is(COMMITTED));
client.close();
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class TestMutations method testTransactionBatchCommitUnpartitioned.
@Test
public void testTransactionBatchCommitUnpartitioned() throws Exception {
Table table = unpartitionedTableBuilder.create(metaStoreClient);
MutatorClient client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), false).metaStoreUri(metaStoreUri).build();
client.connect();
Transaction transaction = client.newTransaction();
List<AcidTable> destinations = client.getTables();
transaction.begin();
MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN, BUCKET_COLUMN_INDEXES);
MutatorCoordinator coordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
MutableRecord record = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1, "Hello streaming"));
coordinator.insert(Collections.<String>emptyList(), record);
coordinator.close();
transaction.commit();
StreamingAssert streamingAssertions = assertionFactory.newStreamingAssert(table);
streamingAssertions.assertMinWriteId(1L);
streamingAssertions.assertMaxWriteId(1L);
streamingAssertions.assertExpectedFileCount(1);
List<Record> readRecords = streamingAssertions.readRecords();
assertThat(readRecords.size(), is(1));
assertThat(readRecords.get(0).getRow(), is("{1, Hello streaming}"));
assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, encodeBucket(0), 0L)));
assertThat(transaction.getState(), is(COMMITTED));
client.close();
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class TestMutations method testUpdatesAndDeletes.
@Test
public void testUpdatesAndDeletes() throws Exception {
// Set up some base data then stream some inserts/updates/deletes to a number of partitions
MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN, BUCKET_COLUMN_INDEXES);
// INSERT DATA
//
Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).addPartition(EUROPE_FRANCE).create(metaStoreClient);
MutatorClient client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), true).metaStoreUri(metaStoreUri).build();
client.connect();
Transaction insertTransaction = client.newTransaction();
List<AcidTable> destinations = client.getTables();
insertTransaction.begin();
MutatorCoordinator insertCoordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
MutableRecord asiaIndiaRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1, "Namaste streaming 1"));
MutableRecord asiaIndiaRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(2, "Namaste streaming 2"));
MutableRecord europeUkRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(3, "Hello streaming 1"));
MutableRecord europeUkRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(4, "Hello streaming 2"));
MutableRecord europeFranceRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(5, "Bonjour streaming 1"));
MutableRecord europeFranceRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(6, "Bonjour streaming 2"));
insertCoordinator.insert(ASIA_INDIA, asiaIndiaRecord1);
insertCoordinator.insert(ASIA_INDIA, asiaIndiaRecord2);
insertCoordinator.insert(EUROPE_UK, europeUkRecord1);
insertCoordinator.insert(EUROPE_UK, europeUkRecord2);
insertCoordinator.insert(EUROPE_FRANCE, europeFranceRecord1);
insertCoordinator.insert(EUROPE_FRANCE, europeFranceRecord2);
insertCoordinator.close();
insertTransaction.commit();
assertThat(insertTransaction.getState(), is(COMMITTED));
client.close();
// MUTATE DATA
//
client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), true).metaStoreUri(metaStoreUri).build();
client.connect();
Transaction mutateTransaction = client.newTransaction();
destinations = client.getTables();
mutateTransaction.begin();
MutatorCoordinator mutateCoordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
MutableRecord asiaIndiaRecord3 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(20, "Namaste streaming 3"));
mutateCoordinator.update(ASIA_INDIA, new MutableRecord(2, "UPDATED: Namaste streaming 2", new RecordIdentifier(1L, encodeBucket(0), 1L)));
mutateCoordinator.insert(ASIA_INDIA, asiaIndiaRecord3);
mutateCoordinator.delete(EUROPE_UK, new MutableRecord(3, "Hello streaming 1", new RecordIdentifier(1L, encodeBucket(0), 0L)));
mutateCoordinator.delete(EUROPE_FRANCE, new MutableRecord(5, "Bonjour streaming 1", new RecordIdentifier(1L, encodeBucket(0), 0L)));
mutateCoordinator.update(EUROPE_FRANCE, new MutableRecord(6, "UPDATED: Bonjour streaming 2", new RecordIdentifier(1L, encodeBucket(0), 1L)));
mutateCoordinator.close();
mutateTransaction.commit();
assertThat(mutateTransaction.getState(), is(COMMITTED));
StreamingAssert indiaAssertions = assertionFactory.newStreamingAssert(table, ASIA_INDIA);
indiaAssertions.assertMinWriteId(1L);
indiaAssertions.assertMaxWriteId(2L);
List<Record> indiaRecords = indiaAssertions.readRecords(2);
assertThat(indiaRecords.size(), is(3));
assertThat(indiaRecords.get(0).getRow(), is("{1, Namaste streaming 1}"));
assertThat(indiaRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, encodeBucket(0), 0L)));
assertThat(indiaRecords.get(1).getRow(), is("{2, UPDATED: Namaste streaming 2}"));
assertThat(indiaRecords.get(1).getRecordIdentifier(), is(new RecordIdentifier(2L, encodeBucket(0), // with split update, new version of the row is a new insert
0L)));
assertThat(indiaRecords.get(2).getRow(), is("{20, Namaste streaming 3}"));
assertThat(indiaRecords.get(2).getRecordIdentifier(), is(new RecordIdentifier(2L, encodeBucket(0), 1L)));
StreamingAssert ukAssertions = assertionFactory.newStreamingAssert(table, EUROPE_UK);
ukAssertions.assertMinWriteId(1L);
ukAssertions.assertMaxWriteId(2L);
// 1 split since mutateTransaction txn just does deletes
List<Record> ukRecords = ukAssertions.readRecords(1);
assertThat(ukRecords.size(), is(1));
assertThat(ukRecords.get(0).getRow(), is("{4, Hello streaming 2}"));
assertThat(ukRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, encodeBucket(0), 1L)));
StreamingAssert franceAssertions = assertionFactory.newStreamingAssert(table, EUROPE_FRANCE);
franceAssertions.assertMinWriteId(1L);
franceAssertions.assertMaxWriteId(2L);
List<Record> franceRecords = franceAssertions.readRecords(2);
assertThat(franceRecords.size(), is(1));
assertThat(franceRecords.get(0).getRow(), is("{6, UPDATED: Bonjour streaming 2}"));
assertThat(franceRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(2L, encodeBucket(0), // with split update, new version of the row is a new insert
0L)));
client.close();
}
use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.
the class TestBucketIdResolverImpl method testAttachBucketIdToRecord.
@Test
public void testAttachBucketIdToRecord() {
MutableRecord record = new MutableRecord(1, "hello");
capturingBucketIdResolver.attachBucketIdToRecord(record);
assertThat(record.rowId, is(new RecordIdentifier(-1L, BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(1)), -1L)));
assertThat(record.id, is(1));
assertThat(record.msg.toString(), is("hello"));
}
Aggregations