Search in sources :

Example 1 with Pair

use of org.apache.hive.hcatalog.data.Pair in project hive by apache.

the class DbNotificationListener method addWriteNotificationLog.

private void addWriteNotificationLog(List<NotificationEvent> eventBatch, List<AcidWriteEvent> acidWriteEventList, Connection dbConn, SQLGenerator sqlGenerator, List<AcidWriteMessage> msgBatch) throws MetaException, SQLException {
    LOG.debug("DbNotificationListener: adding write notification log for : {}", eventBatch);
    assert ((dbConn != null) && (sqlGenerator != null));
    int numRows;
    long maxRows = MetastoreConf.getIntVar(conf, ConfVars.JDBC_MAX_BATCH_SIZE);
    try (Statement stmt = dbConn.createStatement()) {
        String st = sqlGenerator.getDbProduct().getPrepareTxnStmt();
        if (st != null) {
            stmt.execute(st);
        }
    } catch (Exception e) {
        LOG.error("Failed to execute query ", e);
        throw new MetaException(e.getMessage());
    }
    ResultSet rs = null;
    String select = sqlGenerator.addForUpdateClause("select \"WNL_ID\", \"WNL_FILES\" from" + " \"TXN_WRITE_NOTIFICATION_LOG\" " + "where \"WNL_DATABASE\" = ? " + "and \"WNL_TABLE\" = ? " + " and \"WNL_PARTITION\" = ? " + "and \"WNL_TXNID\" = ? ");
    List<Integer> insertList = new ArrayList<>();
    Map<Integer, Pair<Long, String>> updateMap = new HashMap<>();
    try (PreparedStatement pst = dbConn.prepareStatement(select)) {
        for (int i = 0; i < acidWriteEventList.size(); i++) {
            String dbName = acidWriteEventList.get(i).getDatabase();
            String tblName = acidWriteEventList.get(i).getTable();
            String partition = acidWriteEventList.get(i).getPartition();
            Long txnId = acidWriteEventList.get(i).getTxnId();
            LOG.debug("Going to execute query <" + select.replaceAll("\\?", "{}") + ">", quoteString(dbName), quoteString(tblName), quoteString(partition));
            pst.setString(1, dbName);
            pst.setString(2, tblName);
            pst.setString(3, partition);
            pst.setLong(4, txnId);
            rs = pst.executeQuery();
            if (!rs.next()) {
                insertList.add(i);
            } else {
                updateMap.put(i, new Pair<>(rs.getLong(1), rs.getString(2)));
            }
        }
    } catch (Exception e) {
        LOG.error("Failed to execute insert ", e);
        throw new MetaException(e.getMessage());
    } finally {
        close(rs);
    }
    if (insertList.size() != 0) {
        // if rs is empty then no lock is taken and thus it can not cause deadlock.
        long nextNLId = getNextNLId(dbConn, sqlGenerator, "org.apache.hadoop.hive.metastore.model.MTxnWriteNotificationLog", insertList.size());
        String insert = "insert into \"TXN_WRITE_NOTIFICATION_LOG\" " + "(\"WNL_ID\", \"WNL_TXNID\", \"WNL_WRITEID\", \"WNL_DATABASE\", \"WNL_TABLE\", " + "\"WNL_PARTITION\", \"WNL_TABLE_OBJ\", \"WNL_PARTITION_OBJ\", " + "\"WNL_FILES\", \"WNL_EVENT_TIME\") VALUES (?,?,?,?,?,?,?,?,?,?)";
        try (PreparedStatement pst = dbConn.prepareStatement(sqlGenerator.addEscapeCharacters(insert))) {
            numRows = 0;
            for (int idx : insertList) {
                String tableObj = msgBatch.get(idx).getTableObjStr();
                String partitionObj = msgBatch.get(idx).getPartitionObjStr();
                String files = ReplChangeManager.joinWithSeparator(msgBatch.get(idx).getFiles());
                String dbName = acidWriteEventList.get(idx).getDatabase();
                String tblName = acidWriteEventList.get(idx).getTable();
                String partition = acidWriteEventList.get(idx).getPartition();
                int currentTime = now();
                pst.setLong(1, nextNLId++);
                pst.setLong(2, acidWriteEventList.get(idx).getTxnId());
                pst.setLong(3, acidWriteEventList.get(idx).getWriteId());
                pst.setString(4, dbName);
                pst.setString(5, tblName);
                pst.setString(6, partition);
                pst.setString(7, tableObj);
                pst.setString(8, partitionObj);
                pst.setString(9, files);
                pst.setInt(10, currentTime);
                LOG.debug("Going to execute insert <" + insert.replaceAll("\\?", "{}") + ">", nextNLId, acidWriteEventList.get(idx).getTxnId(), acidWriteEventList.get(idx).getWriteId(), quoteString(dbName), quoteString(tblName), quoteString(partition), quoteString(tableObj), quoteString(partitionObj), quoteString(files), currentTime);
                pst.addBatch();
                numRows++;
                if (numRows == maxRows) {
                    pst.executeBatch();
                    numRows = 0;
                }
            }
            if (numRows != 0) {
                pst.executeBatch();
            }
        } catch (Exception e) {
            LOG.error("Failed to execute insert ", e);
            throw new MetaException(e.getMessage());
        }
    }
    if (updateMap.size() != 0) {
        String update = "update \"TXN_WRITE_NOTIFICATION_LOG\" set \"WNL_TABLE_OBJ\" = ? ," + " \"WNL_PARTITION_OBJ\" = ? ," + " \"WNL_FILES\" = ? ," + " \"WNL_EVENT_TIME\" = ?" + " where \"WNL_ID\" = ?";
        try (PreparedStatement pst = dbConn.prepareStatement(sqlGenerator.addEscapeCharacters(update))) {
            numRows = 0;
            for (Map.Entry entry : updateMap.entrySet()) {
                int idx = (int) entry.getKey();
                Pair<Long, String> nlIdInfo = (Pair<Long, String>) entry.getValue();
                String tableObj = msgBatch.get(idx).getTableObjStr();
                String partitionObj = msgBatch.get(idx).getPartitionObjStr();
                String files = ReplChangeManager.joinWithSeparator(msgBatch.get(idx).getFiles());
                String existingFiles = nlIdInfo.second;
                long nlId = nlIdInfo.first;
                int currentTime = now();
                if (existingFiles.contains(sqlGenerator.addEscapeCharacters(files))) {
                    // If list of files are already present then no need to update it again. This scenario can come in case of
                    // retry done to the meta store for the same operation.
                    LOG.info("file list " + files + " already present");
                    continue;
                }
                files = ReplChangeManager.joinWithSeparator(Lists.newArrayList(files, existingFiles));
                pst.setString(1, tableObj);
                pst.setString(2, partitionObj);
                pst.setString(3, files);
                pst.setInt(4, currentTime);
                pst.setLong(5, nlId);
                LOG.debug("Going to execute update <" + update.replaceAll("\\?", "{}") + ">", quoteString(tableObj), quoteString(partitionObj), quoteString(files), currentTime, nlId);
                pst.addBatch();
                numRows++;
                if (numRows == maxRows) {
                    pst.executeBatch();
                    numRows = 0;
                }
            }
            if (numRows != 0) {
                pst.executeBatch();
            }
        } catch (Exception e) {
            LOG.error("Failed to execute update ", e);
            throw new MetaException(e.getMessage());
        }
    }
}
Also used : HashMap(java.util.HashMap) PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) ArrayList(java.util.ArrayList) PreparedStatement(java.sql.PreparedStatement) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) SQLException(java.sql.SQLException) IOException(java.io.IOException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) ResultSet(java.sql.ResultSet) Map(java.util.Map) HashMap(java.util.HashMap) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Pair(org.apache.hive.hcatalog.data.Pair)

Example 2 with Pair

use of org.apache.hive.hcatalog.data.Pair in project hive by apache.

the class TestHCatLoaderEncryption method setup.

@Before
public void setup() throws Exception {
    File f = new File(TEST_WAREHOUSE_DIR);
    if (f.exists()) {
        FileUtil.fullyDelete(f);
    }
    if (!(new File(TEST_WAREHOUSE_DIR).mkdirs())) {
        throw new RuntimeException("Could not create " + TEST_WAREHOUSE_DIR);
    }
    HiveConf hiveConf = new HiveConf(this.getClass());
    hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
    hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
    hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR);
    hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
    String s = hiveConf.get("hdfs.minidfs.basedir");
    if (s == null || s.length() <= 0) {
        // return System.getProperty("test.build.data", "build/test/data") + "/dfs/";
        hiveConf.set("hdfs.minidfs.basedir", System.getProperty("test.build.data", "build/test/data") + "_" + System.currentTimeMillis() + "_" + salt.getAndIncrement() + "/dfs/");
    }
    initEncryptionShim(hiveConf);
    String encryptedTablePath = TEST_WAREHOUSE_DIR + "/encryptedTable";
    SessionState.start(new CliSessionState(hiveConf));
    driver = DriverFactory.newDriver(hiveConf);
    SessionState.get().out = new SessionStream(System.out);
    createTable(BASIC_TABLE, "a int, b string");
    createTableInSpecifiedPath(ENCRYPTED_TABLE, "a int, b string", encryptedTablePath, driver);
    associateEncryptionZoneWithPath(encryptedTablePath);
    int LOOP_SIZE = 3;
    String[] input = new String[LOOP_SIZE * LOOP_SIZE];
    basicInputData = new HashMap<Integer, Pair<Integer, String>>();
    int k = 0;
    for (int i = 1; i <= LOOP_SIZE; i++) {
        String si = i + "";
        for (int j = 1; j <= LOOP_SIZE; j++) {
            String sj = "S" + j + "S";
            input[k] = si + "\t" + sj;
            basicInputData.put(k, new Pair<Integer, String>(i, sj));
            k++;
        }
    }
    HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input);
    PigServer server = HCatBaseTest.createPigServer(false);
    server.setBatchOn();
    int i = 0;
    server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);", ++i);
    server.registerQuery("store A into '" + ENCRYPTED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i);
    server.executeBatch();
}
Also used : CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SessionStream(org.apache.hadoop.hive.common.io.SessionStream) PigServer(org.apache.pig.PigServer) HiveConf(org.apache.hadoop.hive.conf.HiveConf) File(java.io.File) Pair(org.apache.hive.hcatalog.data.Pair) Before(org.junit.Before)

Example 3 with Pair

use of org.apache.hive.hcatalog.data.Pair in project hive by apache.

the class TestHCatStorerMulti method populateBasicFile.

private void populateBasicFile() throws IOException {
    int LOOP_SIZE = 3;
    String[] input = new String[LOOP_SIZE * LOOP_SIZE];
    basicInputData = new HashMap<Integer, Pair<Integer, String>>();
    int k = 0;
    File file = new File(INPUT_FILE_NAME);
    file.deleteOnExit();
    FileWriter writer = new FileWriter(file);
    for (int i = 1; i <= LOOP_SIZE; i++) {
        String si = i + "";
        for (int j = 1; j <= LOOP_SIZE; j++) {
            String sj = "S" + j + "S";
            input[k] = si + "\t" + sj;
            basicInputData.put(k, new Pair<Integer, String>(i, sj));
            writer.write(input[k] + "\n");
            k++;
        }
    }
    writer.close();
}
Also used : FileWriter(java.io.FileWriter) File(java.io.File) Pair(org.apache.hive.hcatalog.data.Pair)

Example 4 with Pair

use of org.apache.hive.hcatalog.data.Pair in project hive by apache.

the class AbstractHCatLoaderTest method testReadPartitionedBasic.

@Test
public void testReadPartitionedBasic() throws Exception {
    PigServer server = createPigServer(false);
    driver.run("select * from " + PARTITIONED_TABLE);
    ArrayList<String> valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    assertEquals(basicInputData.size(), valuesReadFromHiveDriver.size());
    server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Schema dumpedWSchema = server.dumpSchema("W");
    List<FieldSchema> Wfields = dumpedWSchema.getFields();
    assertEquals(3, Wfields.size());
    assertTrue(Wfields.get(0).alias.equalsIgnoreCase("a"));
    assertTrue(Wfields.get(0).type == DataType.INTEGER);
    assertTrue(Wfields.get(1).alias.equalsIgnoreCase("b"));
    assertTrue(Wfields.get(1).type == DataType.CHARARRAY);
    assertTrue(Wfields.get(2).alias.equalsIgnoreCase("bkt"));
    assertTrue(Wfields.get(2).type == DataType.CHARARRAY);
    Iterator<Tuple> WIter = server.openIterator("W");
    Collection<Pair<Integer, String>> valuesRead = new ArrayList<Pair<Integer, String>>();
    while (WIter.hasNext()) {
        Tuple t = WIter.next();
        assertTrue(t.size() == 3);
        assertNotNull(t.get(0));
        assertNotNull(t.get(1));
        assertNotNull(t.get(2));
        assertTrue(t.get(0).getClass() == Integer.class);
        assertTrue(t.get(1).getClass() == String.class);
        assertTrue(t.get(2).getClass() == String.class);
        valuesRead.add(new Pair<Integer, String>((Integer) t.get(0), (String) t.get(1)));
        if ((Integer) t.get(0) < 2) {
            assertEquals("0", t.get(2));
        } else {
            assertEquals("1", t.get(2));
        }
    }
    assertEquals(valuesReadFromHiveDriver.size(), valuesRead.size());
    server.registerQuery("P1 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    server.registerQuery("P1filter = filter P1 by bkt == '0';");
    Iterator<Tuple> P1Iter = server.openIterator("P1filter");
    int count1 = 0;
    while (P1Iter.hasNext()) {
        Tuple t = P1Iter.next();
        assertEquals("0", t.get(2));
        assertEquals(1, t.get(0));
        count1++;
    }
    assertEquals(3, count1);
    server.registerQuery("P2 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    server.registerQuery("P2filter = filter P2 by bkt == '1';");
    Iterator<Tuple> P2Iter = server.openIterator("P2filter");
    int count2 = 0;
    while (P2Iter.hasNext()) {
        Tuple t = P2Iter.next();
        assertEquals("1", t.get(2));
        assertTrue(((Integer) t.get(0)) > 1);
        count2++;
    }
    assertEquals(6, count2);
}
Also used : HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ArrayList(java.util.ArrayList) PigServer(org.apache.pig.PigServer) Tuple(org.apache.pig.data.Tuple) Pair(org.apache.hive.hcatalog.data.Pair) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 5 with Pair

use of org.apache.hive.hcatalog.data.Pair in project hive by apache.

the class PigHCatUtil method getTable.

/*
  * The job argument is passed so that configuration overrides can be used to initialize
  * the metastore configuration in the special case of an embedded metastore
  * (hive.metastore.uris = "").
  */
public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal, Job job) throws IOException {
    Pair<String, String> loc_server = new Pair<String, String>(location, hcatServerUri);
    Table hcatTable = hcatTableCache.get(loc_server);
    if (hcatTable != null) {
        return hcatTable;
    }
    Pair<String, String> dbTablePair = PigHCatUtil.getDBTableNames(location);
    String dbName = dbTablePair.first;
    String tableName = dbTablePair.second;
    Table table = null;
    IMetaStoreClient client = null;
    try {
        client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class, job);
        table = HCatUtil.getTable(client, dbName, tableName);
    } catch (NoSuchObjectException nsoe) {
        // prettier error messages to frontend
        throw new PigException("Table not found : " + nsoe.getMessage(), PIG_EXCEPTION_CODE);
    } catch (Exception e) {
        throw new IOException(e);
    } finally {
        HCatUtil.closeHiveClientQuietly(client);
    }
    hcatTableCache.put(loc_server, table);
    return table;
}
Also used : Table(org.apache.hadoop.hive.ql.metadata.Table) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) IOException(java.io.IOException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) PigException(org.apache.pig.PigException) HCatException(org.apache.hive.hcatalog.common.HCatException) PigException(org.apache.pig.PigException) IOException(java.io.IOException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) Pair(org.apache.hive.hcatalog.data.Pair)

Aggregations

Pair (org.apache.hive.hcatalog.data.Pair)6 PigServer (org.apache.pig.PigServer)3 File (java.io.File)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Before (org.junit.Before)2 FileWriter (java.io.FileWriter)1 PreparedStatement (java.sql.PreparedStatement)1 ResultSet (java.sql.ResultSet)1 SQLException (java.sql.SQLException)1 Statement (java.sql.Statement)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 CliSessionState (org.apache.hadoop.hive.cli.CliSessionState)1 SessionStream (org.apache.hadoop.hive.common.io.SessionStream)1 HiveConf (org.apache.hadoop.hive.conf.HiveConf)1 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)1 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)1 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)1