Search in sources :

Example 1 with BucketIdResolver

use of org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver in project hive by apache.

the class ExampleUseCase method example.

/* This is an illustration, not a functioning example. */
public void example() throws Exception {
    // CLIENT/TOOL END
    //
    // Singleton instance in the job client
    // Create a client to manage our transaction
    MutatorClient client = new MutatorClientBuilder().addSinkTable(databaseName, tableName, createPartitions).metaStoreUri(metaStoreUri).build();
    // Get the transaction
    Transaction transaction = client.newTransaction();
    // Get serializable details of the destination tables
    List<AcidTable> tables = client.getTables();
    transaction.begin();
    // CLUSTER / WORKER END
    //
    // Job submitted to the cluster
    // 
    BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(tables.get(0).getTotalBuckets());
    record1 = bucketIdResolver.attachBucketIdToRecord(record1);
    // --------------------------------------------------------------
    // DATA SHOULD GET SORTED BY YOUR ETL/MERGE PROCESS HERE
    //
    // Group the data by (partitionValues, ROW__ID.bucketId)
    // Order the groups by (ROW__ID.lastTransactionId, ROW__ID.rowId)
    // --------------------------------------------------------------
    // One of these runs at the output of each reducer
    //
    MutatorCoordinator coordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(tables.get(0)).mutatorFactory(mutatorFactory).build();
    coordinator.insert(partitionValues1, record1);
    coordinator.update(partitionValues2, record2);
    coordinator.delete(partitionValues3, record3);
    coordinator.close();
    // CLIENT/TOOL END
    //
    // The tasks have completed, control is back at the tool
    transaction.commit();
    client.close();
}
Also used : Transaction(org.apache.hive.hcatalog.streaming.mutate.client.Transaction) AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) MutatorCoordinatorBuilder(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinatorBuilder) MutatorCoordinator(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinator) BucketIdResolver(org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver) MutatorClient(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClient) MutatorClientBuilder(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClientBuilder)

Example 2 with BucketIdResolver

use of org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver in project hive by apache.

the class TestMutations method testMulti.

@Test
public void testMulti() throws Exception {
    Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).create(metaStoreClient);
    MutatorClient client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), true).metaStoreUri(metaStoreUri).build();
    client.connect();
    Transaction transaction = client.newTransaction();
    List<AcidTable> destinations = client.getTables();
    transaction.begin();
    MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN, BUCKET_COLUMN_INDEXES);
    MutatorCoordinator coordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
    BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
    MutableRecord asiaIndiaRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1, "Hello streaming"));
    MutableRecord europeUkRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(2, "Hello streaming"));
    MutableRecord europeFranceRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(3, "Hello streaming"));
    MutableRecord europeFranceRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(4, "Bonjour streaming"));
    coordinator.insert(ASIA_INDIA, asiaIndiaRecord1);
    coordinator.insert(EUROPE_UK, europeUkRecord1);
    coordinator.insert(EUROPE_FRANCE, europeFranceRecord1);
    coordinator.insert(EUROPE_FRANCE, europeFranceRecord2);
    coordinator.close();
    transaction.commit();
    // ASIA_INDIA
    StreamingAssert streamingAssertions = assertionFactory.newStreamingAssert(table, ASIA_INDIA);
    streamingAssertions.assertMinTransactionId(1L);
    streamingAssertions.assertMaxTransactionId(1L);
    streamingAssertions.assertExpectedFileCount(1);
    List<Record> readRecords = streamingAssertions.readRecords();
    assertThat(readRecords.size(), is(1));
    assertThat(readRecords.get(0).getRow(), is("{1, Hello streaming}"));
    assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 0L)));
    // EUROPE_UK
    streamingAssertions = assertionFactory.newStreamingAssert(table, EUROPE_UK);
    streamingAssertions.assertMinTransactionId(1L);
    streamingAssertions.assertMaxTransactionId(1L);
    streamingAssertions.assertExpectedFileCount(1);
    readRecords = streamingAssertions.readRecords();
    assertThat(readRecords.size(), is(1));
    assertThat(readRecords.get(0).getRow(), is("{2, Hello streaming}"));
    assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 0L)));
    // EUROPE_FRANCE
    streamingAssertions = assertionFactory.newStreamingAssert(table, EUROPE_FRANCE);
    streamingAssertions.assertMinTransactionId(1L);
    streamingAssertions.assertMaxTransactionId(1L);
    streamingAssertions.assertExpectedFileCount(1);
    readRecords = streamingAssertions.readRecords();
    assertThat(readRecords.size(), is(2));
    assertThat(readRecords.get(0).getRow(), is("{3, Hello streaming}"));
    assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 0L)));
    assertThat(readRecords.get(1).getRow(), is("{4, Bonjour streaming}"));
    assertThat(readRecords.get(1).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 1L)));
    client.close();
}
Also used : AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) Table(org.apache.hadoop.hive.metastore.api.Table) AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) MutatorCoordinator(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinator) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) MutatorFactory(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorFactory) Transaction(org.apache.hive.hcatalog.streaming.mutate.client.Transaction) MutatorCoordinatorBuilder(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinatorBuilder) BucketIdResolver(org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver) Record(org.apache.hive.hcatalog.streaming.mutate.StreamingAssert.Record) MutatorClient(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClient) MutatorClientBuilder(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClientBuilder) Test(org.junit.Test)

Example 3 with BucketIdResolver

use of org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver in project hive by apache.

the class TestMutations method testTransactionBatchAbort.

@Test
public void testTransactionBatchAbort() throws Exception {
    Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).create(metaStoreClient);
    MutatorClient client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), true).metaStoreUri(metaStoreUri).build();
    client.connect();
    Transaction transaction = client.newTransaction();
    List<AcidTable> destinations = client.getTables();
    transaction.begin();
    MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN, BUCKET_COLUMN_INDEXES);
    MutatorCoordinator coordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
    BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
    MutableRecord record1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1, "Hello streaming"));
    MutableRecord record2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(2, "Welcome to streaming"));
    coordinator.insert(ASIA_INDIA, record1);
    coordinator.insert(ASIA_INDIA, record2);
    coordinator.close();
    transaction.abort();
    assertThat(transaction.getState(), is(ABORTED));
    client.close();
    StreamingAssert streamingAssertions = assertionFactory.newStreamingAssert(table, ASIA_INDIA);
    streamingAssertions.assertNothingWritten();
}
Also used : AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) Table(org.apache.hadoop.hive.metastore.api.Table) AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) MutatorCoordinator(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinator) MutatorFactory(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorFactory) Transaction(org.apache.hive.hcatalog.streaming.mutate.client.Transaction) MutatorCoordinatorBuilder(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinatorBuilder) BucketIdResolver(org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver) MutatorClient(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClient) MutatorClientBuilder(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClientBuilder) Test(org.junit.Test)

Example 4 with BucketIdResolver

use of org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver in project hive by apache.

the class TestMutations method testTransactionBatchCommitUnpartitioned.

@Test
public void testTransactionBatchCommitUnpartitioned() throws Exception {
    Table table = unpartitionedTableBuilder.create(metaStoreClient);
    MutatorClient client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), false).metaStoreUri(metaStoreUri).build();
    client.connect();
    Transaction transaction = client.newTransaction();
    List<AcidTable> destinations = client.getTables();
    transaction.begin();
    MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN, BUCKET_COLUMN_INDEXES);
    MutatorCoordinator coordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
    BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
    MutableRecord record = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1, "Hello streaming"));
    coordinator.insert(Collections.<String>emptyList(), record);
    coordinator.close();
    transaction.commit();
    StreamingAssert streamingAssertions = assertionFactory.newStreamingAssert(table);
    streamingAssertions.assertMinTransactionId(1L);
    streamingAssertions.assertMaxTransactionId(1L);
    streamingAssertions.assertExpectedFileCount(1);
    List<Record> readRecords = streamingAssertions.readRecords();
    assertThat(readRecords.size(), is(1));
    assertThat(readRecords.get(0).getRow(), is("{1, Hello streaming}"));
    assertThat(readRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 0L)));
    assertThat(transaction.getState(), is(COMMITTED));
    client.close();
}
Also used : AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) Table(org.apache.hadoop.hive.metastore.api.Table) AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) MutatorCoordinator(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinator) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) MutatorFactory(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorFactory) Transaction(org.apache.hive.hcatalog.streaming.mutate.client.Transaction) MutatorCoordinatorBuilder(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinatorBuilder) BucketIdResolver(org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver) Record(org.apache.hive.hcatalog.streaming.mutate.StreamingAssert.Record) MutatorClient(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClient) MutatorClientBuilder(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClientBuilder) Test(org.junit.Test)

Example 5 with BucketIdResolver

use of org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver in project hive by apache.

the class TestMutations method testUpdatesAndDeletes.

@Test
public void testUpdatesAndDeletes() throws Exception {
    // Set up some base data then stream some inserts/updates/deletes to a number of partitions
    MutatorFactory mutatorFactory = new ReflectiveMutatorFactory(conf, MutableRecord.class, RECORD_ID_COLUMN, BUCKET_COLUMN_INDEXES);
    // INSERT DATA
    //
    Table table = partitionedTableBuilder.addPartition(ASIA_INDIA).addPartition(EUROPE_FRANCE).create(metaStoreClient);
    MutatorClient client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), true).metaStoreUri(metaStoreUri).build();
    client.connect();
    Transaction insertTransaction = client.newTransaction();
    List<AcidTable> destinations = client.getTables();
    insertTransaction.begin();
    MutatorCoordinator insertCoordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
    BucketIdResolver bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
    MutableRecord asiaIndiaRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(1, "Namaste streaming 1"));
    MutableRecord asiaIndiaRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(2, "Namaste streaming 2"));
    MutableRecord europeUkRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(3, "Hello streaming 1"));
    MutableRecord europeUkRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(4, "Hello streaming 2"));
    MutableRecord europeFranceRecord1 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(5, "Bonjour streaming 1"));
    MutableRecord europeFranceRecord2 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(6, "Bonjour streaming 2"));
    insertCoordinator.insert(ASIA_INDIA, asiaIndiaRecord1);
    insertCoordinator.insert(ASIA_INDIA, asiaIndiaRecord2);
    insertCoordinator.insert(EUROPE_UK, europeUkRecord1);
    insertCoordinator.insert(EUROPE_UK, europeUkRecord2);
    insertCoordinator.insert(EUROPE_FRANCE, europeFranceRecord1);
    insertCoordinator.insert(EUROPE_FRANCE, europeFranceRecord2);
    insertCoordinator.close();
    insertTransaction.commit();
    assertThat(insertTransaction.getState(), is(COMMITTED));
    client.close();
    // MUTATE DATA
    //
    client = new MutatorClientBuilder().addSinkTable(table.getDbName(), table.getTableName(), true).metaStoreUri(metaStoreUri).build();
    client.connect();
    Transaction mutateTransaction = client.newTransaction();
    destinations = client.getTables();
    mutateTransaction.begin();
    MutatorCoordinator mutateCoordinator = new MutatorCoordinatorBuilder().metaStoreUri(metaStoreUri).table(destinations.get(0)).mutatorFactory(mutatorFactory).build();
    bucketIdResolver = mutatorFactory.newBucketIdResolver(destinations.get(0).getTotalBuckets());
    MutableRecord asiaIndiaRecord3 = (MutableRecord) bucketIdResolver.attachBucketIdToRecord(new MutableRecord(20, "Namaste streaming 3"));
    mutateCoordinator.update(ASIA_INDIA, new MutableRecord(2, "UPDATED: Namaste streaming 2", new RecordIdentifier(1L, 0, 1L)));
    mutateCoordinator.insert(ASIA_INDIA, asiaIndiaRecord3);
    mutateCoordinator.delete(EUROPE_UK, new MutableRecord(3, "Hello streaming 1", new RecordIdentifier(1L, 0, 0L)));
    mutateCoordinator.delete(EUROPE_FRANCE, new MutableRecord(5, "Bonjour streaming 1", new RecordIdentifier(1L, 0, 0L)));
    mutateCoordinator.update(EUROPE_FRANCE, new MutableRecord(6, "UPDATED: Bonjour streaming 2", new RecordIdentifier(1L, 0, 1L)));
    mutateCoordinator.close();
    mutateTransaction.commit();
    assertThat(mutateTransaction.getState(), is(COMMITTED));
    StreamingAssert indiaAssertions = assertionFactory.newStreamingAssert(table, ASIA_INDIA);
    indiaAssertions.assertMinTransactionId(1L);
    indiaAssertions.assertMaxTransactionId(2L);
    List<Record> indiaRecords = indiaAssertions.readRecords();
    assertThat(indiaRecords.size(), is(3));
    assertThat(indiaRecords.get(0).getRow(), is("{1, Namaste streaming 1}"));
    assertThat(indiaRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 0L)));
    assertThat(indiaRecords.get(1).getRow(), is("{2, UPDATED: Namaste streaming 2}"));
    assertThat(indiaRecords.get(1).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 1L)));
    assertThat(indiaRecords.get(2).getRow(), is("{20, Namaste streaming 3}"));
    assertThat(indiaRecords.get(2).getRecordIdentifier(), is(new RecordIdentifier(2L, 0, 0L)));
    StreamingAssert ukAssertions = assertionFactory.newStreamingAssert(table, EUROPE_UK);
    ukAssertions.assertMinTransactionId(1L);
    ukAssertions.assertMaxTransactionId(2L);
    List<Record> ukRecords = ukAssertions.readRecords();
    assertThat(ukRecords.size(), is(1));
    assertThat(ukRecords.get(0).getRow(), is("{4, Hello streaming 2}"));
    assertThat(ukRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 1L)));
    StreamingAssert franceAssertions = assertionFactory.newStreamingAssert(table, EUROPE_FRANCE);
    franceAssertions.assertMinTransactionId(1L);
    franceAssertions.assertMaxTransactionId(2L);
    List<Record> franceRecords = franceAssertions.readRecords();
    assertThat(franceRecords.size(), is(1));
    assertThat(franceRecords.get(0).getRow(), is("{6, UPDATED: Bonjour streaming 2}"));
    assertThat(franceRecords.get(0).getRecordIdentifier(), is(new RecordIdentifier(1L, 0, 1L)));
    client.close();
}
Also used : AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) Table(org.apache.hadoop.hive.metastore.api.Table) AcidTable(org.apache.hive.hcatalog.streaming.mutate.client.AcidTable) MutatorCoordinator(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinator) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) MutatorFactory(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorFactory) Transaction(org.apache.hive.hcatalog.streaming.mutate.client.Transaction) MutatorCoordinatorBuilder(org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinatorBuilder) BucketIdResolver(org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver) Record(org.apache.hive.hcatalog.streaming.mutate.StreamingAssert.Record) MutatorClient(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClient) MutatorClientBuilder(org.apache.hive.hcatalog.streaming.mutate.client.MutatorClientBuilder) Test(org.junit.Test)

Aggregations

AcidTable (org.apache.hive.hcatalog.streaming.mutate.client.AcidTable)6 MutatorClient (org.apache.hive.hcatalog.streaming.mutate.client.MutatorClient)6 MutatorClientBuilder (org.apache.hive.hcatalog.streaming.mutate.client.MutatorClientBuilder)6 Transaction (org.apache.hive.hcatalog.streaming.mutate.client.Transaction)6 BucketIdResolver (org.apache.hive.hcatalog.streaming.mutate.worker.BucketIdResolver)6 MutatorCoordinator (org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinator)6 MutatorCoordinatorBuilder (org.apache.hive.hcatalog.streaming.mutate.worker.MutatorCoordinatorBuilder)6 Table (org.apache.hadoop.hive.metastore.api.Table)5 MutatorFactory (org.apache.hive.hcatalog.streaming.mutate.worker.MutatorFactory)5 Test (org.junit.Test)5 RecordIdentifier (org.apache.hadoop.hive.ql.io.RecordIdentifier)4 Record (org.apache.hive.hcatalog.streaming.mutate.StreamingAssert.Record)4