Search in sources :

Example 21 with RecordIdentifier

use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.

the class TestCompactor method checkExpectedTxnsPresent.

private void checkExpectedTxnsPresent(Path base, Path[] deltas, String columnNamesProperty, String columnTypesProperty, int bucket, long min, long max, int numBuckets) throws IOException {
    ValidWriteIdList writeIdList = new ValidWriteIdList() {

        @Override
        public String getTableName() {
            return "AcidTable";
        }

        @Override
        public boolean isWriteIdValid(long writeid) {
            return true;
        }

        @Override
        public RangeResponse isWriteIdRangeValid(long minWriteId, long maxWriteId) {
            return RangeResponse.ALL;
        }

        @Override
        public String writeToString() {
            return "";
        }

        @Override
        public void readFromString(String src) {
        }

        @Override
        public Long getMinOpenWriteId() {
            return null;
        }

        @Override
        public long getHighWatermark() {
            return Long.MAX_VALUE;
        }

        @Override
        public long[] getInvalidWriteIds() {
            return new long[0];
        }

        @Override
        public boolean isValidBase(long writeid) {
            return true;
        }

        @Override
        public boolean isWriteIdAborted(long writeid) {
            return true;
        }

        @Override
        public RangeResponse isWriteIdRangeAborted(long minWriteId, long maxWriteId) {
            return RangeResponse.ALL;
        }
    };
    OrcInputFormat aif = new OrcInputFormat();
    Configuration conf = new Configuration();
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesProperty);
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, columnTypesProperty);
    conf.set(hive_metastoreConstants.BUCKET_COUNT, Integer.toString(numBuckets));
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
    AcidInputFormat.RawReader<OrcStruct> reader = aif.getRawReader(conf, true, bucket, writeIdList, base, deltas);
    RecordIdentifier identifier = reader.createKey();
    OrcStruct value = reader.createValue();
    long currentTxn = min;
    boolean seenCurrentTxn = false;
    while (reader.next(identifier, value)) {
        if (!seenCurrentTxn) {
            Assert.assertEquals(currentTxn, identifier.getWriteId());
            seenCurrentTxn = true;
        }
        if (currentTxn != identifier.getWriteId()) {
            Assert.assertEquals(currentTxn + 1, identifier.getWriteId());
            currentTxn++;
        }
    }
    Assert.assertEquals(max, currentTxn);
}
Also used : AcidInputFormat(org.apache.hadoop.hive.ql.io.AcidInputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) Configuration(org.apache.hadoop.conf.Configuration) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList)

Example 22 with RecordIdentifier

use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.

the class TestMutations method testTransactionBatchCommitPartitioned.

@Test
public void testTransactionBatchCommitPartitioned() throws Exception {
    Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).create(metaStoreClient);
    MutatorClient client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), true).metaStoreUri(metaStoreUri).build();
    client.connect();
    Transaction transaction = client.newTransaction();
    List<AcidTable> destinations = client.getTables();
    transaction.begin();
    MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN, BUCKET_COLUMN_INDEXES);
    MutatorCoordinator coordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
    BucketIdResolver bucketIdAppender = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
    MutableRecord record = (MutableRecord) bucketIdAppender.attachBucketIdToRecord(new MutableRecord(1, "Hello streaming"));
    coordinator.insert(ASIA_INDIA, record);
    coordinator.close();
    transaction.commit();
    StreamingAssert streamingAssertions = assertionFactory.newStreamingAssert(table, ASIA_INDIA);
    streamingAssertions.assertMinWriteId(1L);
    streamingAssertions.assertMaxWriteId(1L);
    streamingAssertions.assertExpectedFileCount(1);
    List<Record> readRecords = streamingAssertions.readRecords();
    assertThat(readRecords.size(), is(1));
    assertThat(readRecords.get(0).getRow(), is("{1, Hello streaming}"));
    assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, encodeBucket(0), 0L)));
    assertThat(transaction.getState(), is(COMMITTED));
    client.close();
}
Also used : AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) Table(org.apache.hadoop.hive.metastore.api.Table) AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) MutatorCoordinator(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinator) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) MutatorFactory(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorFactory) Transaction(org.apache.hive.hcatalog.streaming.mutate.client.Transaction) MutatorCoordinatorBuilder(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinatorBuilder) BucketIdResolver(org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver) Record(org.apache.hive.hcatalog.streaming.mutate.StreamingAssert.Record) MutatorClient(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClient) MutatorClientBuilder(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClientBuilder) Test(org.junit.Test)

Example 23 with RecordIdentifier

use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.

the class TestMutations method testTransactionBatchCommitUnpartitioned.

@Test
public void testTransactionBatchCommitUnpartitioned() throws Exception {
    Table table = unpartitionedTableBuilder.create(metaStoreClient);
    MutatorClient client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), false).metaStoreUri(metaStoreUri).build();
    client.connect();
    Transaction transaction = client.newTransaction();
    List<AcidTable> destinations = client.getTables();
    transaction.begin();
    MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN, BUCKET_COLUMN_INDEXES);
    MutatorCoordinator coordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
    BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
    MutableRecord record = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1, "Hello streaming"));
    coordinator.insert(Collections.<String>emptyList(), record);
    coordinator.close();
    transaction.commit();
    StreamingAssert streamingAssertions = assertionFactory.newStreamingAssert(table);
    streamingAssertions.assertMinWriteId(1L);
    streamingAssertions.assertMaxWriteId(1L);
    streamingAssertions.assertExpectedFileCount(1);
    List<Record> readRecords = streamingAssertions.readRecords();
    assertThat(readRecords.size(), is(1));
    assertThat(readRecords.get(0).getRow(), is("{1, Hello streaming}"));
    assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, encodeBucket(0), 0L)));
    assertThat(transaction.getState(), is(COMMITTED));
    client.close();
}
Also used : AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) Table(org.apache.hadoop.hive.metastore.api.Table) AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) MutatorCoordinator(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinator) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) MutatorFactory(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorFactory) Transaction(org.apache.hive.hcatalog.streaming.mutate.client.Transaction) MutatorCoordinatorBuilder(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinatorBuilder) BucketIdResolver(org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver) Record(org.apache.hive.hcatalog.streaming.mutate.StreamingAssert.Record) MutatorClient(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClient) MutatorClientBuilder(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClientBuilder) Test(org.junit.Test)

Example 24 with RecordIdentifier

use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.

the class TestMutations method testUpdatesAndDeletes.

@Test
public void testUpdatesAndDeletes() throws Exception {
    // Set up some base data then stream some inserts/updates/deletes to a number of partitions
    MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN, BUCKET_COLUMN_INDEXES);
    // INSERT DATA
    // 
    Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).addPartition(EUROPE_FRANCE).create(metaStoreClient);
    MutatorClient client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), true).metaStoreUri(metaStoreUri).build();
    client.connect();
    Transaction insertTransaction = client.newTransaction();
    List<AcidTable> destinations = client.getTables();
    insertTransaction.begin();
    MutatorCoordinator insertCoordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
    BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
    MutableRecord asiaIndiaRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1, "Namaste streaming 1"));
    MutableRecord asiaIndiaRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(2, "Namaste streaming 2"));
    MutableRecord europeUkRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(3, "Hello streaming 1"));
    MutableRecord europeUkRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(4, "Hello streaming 2"));
    MutableRecord europeFranceRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(5, "Bonjour streaming 1"));
    MutableRecord europeFranceRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(6, "Bonjour streaming 2"));
    insertCoordinator.insert(ASIA_INDIA, asiaIndiaRecord1);
    insertCoordinator.insert(ASIA_INDIA, asiaIndiaRecord2);
    insertCoordinator.insert(EUROPE_UK, europeUkRecord1);
    insertCoordinator.insert(EUROPE_UK, europeUkRecord2);
    insertCoordinator.insert(EUROPE_FRANCE, europeFranceRecord1);
    insertCoordinator.insert(EUROPE_FRANCE, europeFranceRecord2);
    insertCoordinator.close();
    insertTransaction.commit();
    assertThat(insertTransaction.getState(), is(COMMITTED));
    client.close();
    // MUTATE DATA
    // 
    client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), true).metaStoreUri(metaStoreUri).build();
    client.connect();
    Transaction mutateTransaction = client.newTransaction();
    destinations = client.getTables();
    mutateTransaction.begin();
    MutatorCoordinator mutateCoordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
    bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
    MutableRecord asiaIndiaRecord3 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(20, "Namaste streaming 3"));
    mutateCoordinator.update(ASIA_INDIA, new MutableRecord(2, "UPDATED: Namaste streaming 2", new RecordIdentifier(1L, encodeBucket(0), 1L)));
    mutateCoordinator.insert(ASIA_INDIA, asiaIndiaRecord3);
    mutateCoordinator.delete(EUROPE_UK, new MutableRecord(3, "Hello streaming 1", new RecordIdentifier(1L, encodeBucket(0), 0L)));
    mutateCoordinator.delete(EUROPE_FRANCE, new MutableRecord(5, "Bonjour streaming 1", new RecordIdentifier(1L, encodeBucket(0), 0L)));
    mutateCoordinator.update(EUROPE_FRANCE, new MutableRecord(6, "UPDATED: Bonjour streaming 2", new RecordIdentifier(1L, encodeBucket(0), 1L)));
    mutateCoordinator.close();
    mutateTransaction.commit();
    assertThat(mutateTransaction.getState(), is(COMMITTED));
    StreamingAssert indiaAssertions = assertionFactory.newStreamingAssert(table, ASIA_INDIA);
    indiaAssertions.assertMinWriteId(1L);
    indiaAssertions.assertMaxWriteId(2L);
    List<Record> indiaRecords = indiaAssertions.readRecords(2);
    assertThat(indiaRecords.size(), is(3));
    assertThat(indiaRecords.get(0).getRow(), is("{1, Namaste streaming 1}"));
    assertThat(indiaRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, encodeBucket(0), 0L)));
    assertThat(indiaRecords.get(1).getRow(), is("{2, UPDATED: Namaste streaming 2}"));
    assertThat(indiaRecords.get(1).getRecordIdentifier(), is(new RecordIdentifier(2L, encodeBucket(0), // with split update, new version of the row is a new insert
    0L)));
    assertThat(indiaRecords.get(2).getRow(), is("{20, Namaste streaming 3}"));
    assertThat(indiaRecords.get(2).getRecordIdentifier(), is(new RecordIdentifier(2L, encodeBucket(0), 1L)));
    StreamingAssert ukAssertions = assertionFactory.newStreamingAssert(table, EUROPE_UK);
    ukAssertions.assertMinWriteId(1L);
    ukAssertions.assertMaxWriteId(2L);
    // 1 split since mutateTransaction txn just does deletes
    List<Record> ukRecords = ukAssertions.readRecords(1);
    assertThat(ukRecords.size(), is(1));
    assertThat(ukRecords.get(0).getRow(), is("{4, Hello streaming 2}"));
    assertThat(ukRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, encodeBucket(0), 1L)));
    StreamingAssert franceAssertions = assertionFactory.newStreamingAssert(table, EUROPE_FRANCE);
    franceAssertions.assertMinWriteId(1L);
    franceAssertions.assertMaxWriteId(2L);
    List<Record> franceRecords = franceAssertions.readRecords(2);
    assertThat(franceRecords.size(), is(1));
    assertThat(franceRecords.get(0).getRow(), is("{6, UPDATED: Bonjour streaming 2}"));
    assertThat(franceRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(2L, encodeBucket(0), // with split update, new version of the row is a new insert
    0L)));
    client.close();
}
Also used : AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) Table(org.apache.hadoop.hive.metastore.api.Table) AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) MutatorCoordinator(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinator) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) MutatorFactory(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorFactory) Transaction(org.apache.hive.hcatalog.streaming.mutate.client.Transaction) MutatorCoordinatorBuilder(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinatorBuilder) BucketIdResolver(org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver) Record(org.apache.hive.hcatalog.streaming.mutate.StreamingAssert.Record) MutatorClient(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClient) MutatorClientBuilder(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClientBuilder) Test(org.junit.Test)

Example 25 with RecordIdentifier

use of org.apache.hadoop.hive.ql.io.RecordIdentifier in project hive by apache.

the class TestBucketIdResolverImpl method testAttachBucketIdToRecord.

@Test
public void testAttachBucketIdToRecord() {
    MutableRecord record = new MutableRecord(1, "hello");
    capturingBucketIdResolver.attachBucketIdToRecord(record);
    assertThat(record.rowId, is(new RecordIdentifier(-1L, BucketCodec.V1.encode(new AcidOutputFormat.Options(null).bucket(1)), -1L)));
    assertThat(record.id, is(1));
    assertThat(record.msg.toString(), is("hello"));
}
Also used : RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) MutableRecord(org.apache.hive.hcatalog.streaming.mutate.MutableRecord) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) Test(org.junit.Test)

Aggregations

RecordIdentifier (org.apache.hadoop.hive.ql.io.RecordIdentifier)40 Test (org.junit.Test)13 Path (org.apache.hadoop.fs.Path)9 AcidOutputFormat (org.apache.hadoop.hive.ql.io.AcidOutputFormat)9 StripeInformation (org.apache.orc.StripeInformation)9 Configuration (org.apache.hadoop.conf.Configuration)7 BitSet (java.util.BitSet)5 FileSystem (org.apache.hadoop.fs.FileSystem)5 ValidReadTxnList (org.apache.hadoop.hive.common.ValidReadTxnList)5 ValidWriteIdList (org.apache.hadoop.hive.common.ValidWriteIdList)5 RecordUpdater (org.apache.hadoop.hive.ql.io.RecordUpdater)5 ValidReaderWriteIdList (org.apache.hadoop.hive.common.ValidReaderWriteIdList)4 Table (org.apache.hadoop.hive.metastore.api.Table)4 VectorizedRowBatchCtx (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx)4 OrcInputFormat (org.apache.hadoop.hive.ql.io.orc.OrcInputFormat)4 ReaderKey (org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey)4 OrcStruct (org.apache.hadoop.hive.ql.io.orc.OrcStruct)4 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)4 ArrayList (java.util.ArrayList)3 AcidInputFormat (org.apache.hadoop.hive.ql.io.AcidInputFormat)3