Search in sources :

Example 81 with AtomicBoolean

use of java.util.concurrent.atomic.AtomicBoolean in project hive by apache.

the class TestCompactor method dynamicPartitioningInsert.

@Test
public void dynamicPartitioningInsert() throws Exception {
    String tblName = "dpct";
    List<String> colNames = Arrays.asList("a", "b");
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
    executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
    Initiator initiator = new Initiator();
    initiator.setThreadId((int) initiator.getId());
    conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0);
    initiator.setHiveConf(conf);
    AtomicBoolean stop = new AtomicBoolean();
    stop.set(true);
    initiator.init(stop, new AtomicBoolean());
    initiator.run();
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    Assert.assertEquals(2, compacts.size());
    SortedSet<String> partNames = new TreeSet<String>();
    for (int i = 0; i < compacts.size(); i++) {
        Assert.assertEquals("default", compacts.get(i).getDbname());
        Assert.assertEquals(tblName, compacts.get(i).getTablename());
        Assert.assertEquals("initiated", compacts.get(i).getState());
        partNames.add(compacts.get(i).getPartitionname());
    }
    List<String> names = new ArrayList<String>(partNames);
    Assert.assertEquals("ds=today", names.get(0));
    Assert.assertEquals("ds=yesterday", names.get(1));
}
Also used : ArrayList(java.util.ArrayList) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TreeSet(java.util.TreeSet) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Example 82 with AtomicBoolean

use of java.util.concurrent.atomic.AtomicBoolean in project hive by apache.

the class TestCompactor method minorCompactWhileStreaming.

@Test
public void minorCompactWhileStreaming() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true')", driver);
    HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            writeBatch(connection, writer, false);
        }
        // Start a third batch, but don't close it.
        writeBatch(connection, writer, true);
        // Now, compact
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MINOR));
        Worker t = new Worker();
        t.setThreadId((int) t.getId());
        t.setHiveConf(conf);
        AtomicBoolean stop = new AtomicBoolean(true);
        AtomicBoolean looped = new AtomicBoolean();
        t.init(stop, looped);
        t.run();
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.deltaFileFilter);
        String[] names = new String[stat.length];
        Path resultFile = null;
        for (int i = 0; i < names.length; i++) {
            names[i] = stat[i].getPath().getName();
            if (names[i].equals("delta_0000001_0000004")) {
                resultFile = stat[i].getPath();
            }
        }
        Arrays.sort(names);
        String[] expected = new String[] { "delta_0000001_0000002", "delta_0000001_0000004", "delta_0000003_0000004", "delta_0000005_0000006" };
        if (!Arrays.deepEquals(expected, names)) {
            Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names));
        }
        checkExpectedTxnsPresent(null, new Path[] { resultFile }, columnNamesProperty, columnTypesProperty, 0, 1L, 4L);
    } finally {
        connection.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) StreamingConnection(org.apache.hive.hcatalog.streaming.StreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) DelimitedInputWriter(org.apache.hive.hcatalog.streaming.DelimitedInputWriter) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 83 with AtomicBoolean

use of java.util.concurrent.atomic.AtomicBoolean in project hive by apache.

the class TestCompactor method dynamicPartitioningUpdate.

@Test
public void dynamicPartitioningUpdate() throws Exception {
    String tblName = "udpct";
    List<String> colNames = Arrays.asList("a", "b");
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
    executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
    executeStatementOnDriver("update " + tblName + " set b = 'barney'", driver);
    Initiator initiator = new Initiator();
    initiator.setThreadId((int) initiator.getId());
    // Set to 1 so insert doesn't set it off but update does
    conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
    initiator.setHiveConf(conf);
    AtomicBoolean stop = new AtomicBoolean();
    stop.set(true);
    initiator.init(stop, new AtomicBoolean());
    initiator.run();
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    Assert.assertEquals(2, compacts.size());
    SortedSet<String> partNames = new TreeSet<String>();
    for (int i = 0; i < compacts.size(); i++) {
        Assert.assertEquals("default", compacts.get(i).getDbname());
        Assert.assertEquals(tblName, compacts.get(i).getTablename());
        Assert.assertEquals("initiated", compacts.get(i).getState());
        partNames.add(compacts.get(i).getPartitionname());
    }
    List<String> names = new ArrayList<String>(partNames);
    Assert.assertEquals("ds=today", names.get(0));
    Assert.assertEquals("ds=yesterday", names.get(1));
}
Also used : ArrayList(java.util.ArrayList) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TreeSet(java.util.TreeSet) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Example 84 with AtomicBoolean

use of java.util.concurrent.atomic.AtomicBoolean in project hive by apache.

the class TestCompactor method majorCompactWhileStreaming.

@Test
public void majorCompactWhileStreaming() throws Exception {
    String dbName = "default";
    String tblName = "cws";
    List<String> colNames = Arrays.asList("a", "b");
    String columnNamesProperty = "a,b";
    String columnTypesProperty = "int:string";
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 1 BUCKETS" + " STORED AS ORC  TBLPROPERTIES ('transactional'='true') ", driver);
    HiveEndPoint endPt = new HiveEndPoint(null, dbName, tblName, null);
    DelimitedInputWriter writer = new DelimitedInputWriter(new String[] { "a", "b" }, ",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
    try {
        // Write a couple of batches
        for (int i = 0; i < 2; i++) {
            writeBatch(connection, writer, false);
        }
        // Start a third batch, but don't close it.
        writeBatch(connection, writer, true);
        // Now, compact
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        txnHandler.compact(new CompactionRequest(dbName, tblName, CompactionType.MAJOR));
        Worker t = new Worker();
        t.setThreadId((int) t.getId());
        t.setHiveConf(conf);
        AtomicBoolean stop = new AtomicBoolean(true);
        AtomicBoolean looped = new AtomicBoolean();
        t.init(stop, looped);
        t.run();
        // Find the location of the table
        IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
        Table table = msClient.getTable(dbName, tblName);
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.baseFileFilter);
        if (1 != stat.length) {
            Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat));
        }
        String name = stat[0].getPath().getName();
        Assert.assertEquals(name, "base_0000004");
        checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L);
    } finally {
        connection.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveMetaStoreClient(org.apache.hadoop.hive.metastore.HiveMetaStoreClient) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) StreamingConnection(org.apache.hive.hcatalog.streaming.StreamingConnection) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) FileSystem(org.apache.hadoop.fs.FileSystem) DelimitedInputWriter(org.apache.hive.hcatalog.streaming.DelimitedInputWriter) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) CompactionRequest(org.apache.hadoop.hive.metastore.api.CompactionRequest) Test(org.junit.Test)

Example 85 with AtomicBoolean

use of java.util.concurrent.atomic.AtomicBoolean in project hive by apache.

the class TestCompactor method schemaEvolutionAddColDynamicPartitioningUpdate.

@Test
public void schemaEvolutionAddColDynamicPartitioningUpdate() throws Exception {
    String tblName = "udpct";
    List<String> colNames = Arrays.asList("a", "b");
    executeStatementOnDriver("drop table if exists " + tblName, driver);
    executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " PARTITIONED BY(ds string)" + //currently ACID requires table to be bucketed
    " CLUSTERED BY(a) INTO 2 BUCKETS" + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
    executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + "'today'), (2, 'wilma', 'yesterday')", driver);
    executeStatementOnDriver("update " + tblName + " set b = 'barney'", driver);
    // Validate the update.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    ArrayList<String> valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(2, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\tyesterday", valuesReadFromHiveDriver.get(1));
    // ALTER TABLE ... ADD COLUMNS
    executeStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(c int)", driver);
    // Validate there is an added NULL for column c.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(2, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
    // Second INSERT round with new inserts into previously existing partition 'yesterday'.
    executeStatementOnDriver("insert into " + tblName + " partition (ds) values " + "(3, 'mark', 1900, 'soon'), (4, 'douglas', 1901, 'last_century'), " + "(5, 'doc', 1902, 'yesterday')", driver);
    // Validate there the new insertions for column c.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(5, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\tNULL\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\tNULL\tyesterday", valuesReadFromHiveDriver.get(1));
    Assert.assertEquals("3\tmark\t1900\tsoon", valuesReadFromHiveDriver.get(2));
    Assert.assertEquals("4\tdouglas\t1901\tlast_century", valuesReadFromHiveDriver.get(3));
    Assert.assertEquals("5\tdoc\t1902\tyesterday", valuesReadFromHiveDriver.get(4));
    executeStatementOnDriver("update " + tblName + " set c = 2000", driver);
    // Validate the update of new column c, even in old rows.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(5, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\t2000\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\t2000\tyesterday", valuesReadFromHiveDriver.get(1));
    Assert.assertEquals("3\tmark\t2000\tsoon", valuesReadFromHiveDriver.get(2));
    Assert.assertEquals("4\tdouglas\t2000\tlast_century", valuesReadFromHiveDriver.get(3));
    Assert.assertEquals("5\tdoc\t2000\tyesterday", valuesReadFromHiveDriver.get(4));
    Initiator initiator = new Initiator();
    initiator.setThreadId((int) initiator.getId());
    // Set to 1 so insert doesn't set it off but update does
    conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1);
    initiator.setHiveConf(conf);
    AtomicBoolean stop = new AtomicBoolean();
    stop.set(true);
    initiator.init(stop, new AtomicBoolean());
    initiator.run();
    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
    List<ShowCompactResponseElement> compacts = rsp.getCompacts();
    Assert.assertEquals(4, compacts.size());
    SortedSet<String> partNames = new TreeSet<String>();
    for (int i = 0; i < compacts.size(); i++) {
        Assert.assertEquals("default", compacts.get(i).getDbname());
        Assert.assertEquals(tblName, compacts.get(i).getTablename());
        Assert.assertEquals("initiated", compacts.get(i).getState());
        partNames.add(compacts.get(i).getPartitionname());
    }
    List<String> names = new ArrayList<String>(partNames);
    Assert.assertEquals("ds=last_century", names.get(0));
    Assert.assertEquals("ds=soon", names.get(1));
    Assert.assertEquals("ds=today", names.get(2));
    Assert.assertEquals("ds=yesterday", names.get(3));
    // Validate after compaction.
    executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver);
    valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    Assert.assertEquals(5, valuesReadFromHiveDriver.size());
    Assert.assertEquals("1\tbarney\t2000\ttoday", valuesReadFromHiveDriver.get(0));
    Assert.assertEquals("2\tbarney\t2000\tyesterday", valuesReadFromHiveDriver.get(1));
    Assert.assertEquals("3\tmark\t2000\tsoon", valuesReadFromHiveDriver.get(2));
    Assert.assertEquals("4\tdouglas\t2000\tlast_century", valuesReadFromHiveDriver.get(3));
    Assert.assertEquals("5\tdoc\t2000\tyesterday", valuesReadFromHiveDriver.get(4));
}
Also used : ArrayList(java.util.ArrayList) HiveEndPoint(org.apache.hive.hcatalog.streaming.HiveEndPoint) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShowCompactResponse(org.apache.hadoop.hive.metastore.api.ShowCompactResponse) TreeSet(java.util.TreeSet) ShowCompactRequest(org.apache.hadoop.hive.metastore.api.ShowCompactRequest) TxnStore(org.apache.hadoop.hive.metastore.txn.TxnStore) ShowCompactResponseElement(org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement) Test(org.junit.Test)

Aggregations

AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)2412 Test (org.junit.Test)1002 CountDownLatch (java.util.concurrent.CountDownLatch)394 IOException (java.io.IOException)336 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)301 ArrayList (java.util.ArrayList)214 AtomicReference (java.util.concurrent.atomic.AtomicReference)202 ENotificationImpl (org.eclipse.emf.ecore.impl.ENotificationImpl)108 Test (org.testng.annotations.Test)106 List (java.util.List)98 Ignite (org.apache.ignite.Ignite)98 AtomicLong (java.util.concurrent.atomic.AtomicLong)94 HashMap (java.util.HashMap)93 ExecutorService (java.util.concurrent.ExecutorService)90 Map (java.util.Map)88 ExecutionException (java.util.concurrent.ExecutionException)87 File (java.io.File)68 Random (java.util.Random)68 CyclicBarrier (java.util.concurrent.CyclicBarrier)68 HashSet (java.util.HashSet)63