Search in sources :

Example 81 with SessionState

use of org.apache.hadoop.hive.ql.session.SessionState in project drill by apache.

the class HiveTestDataGenerator method generateTestData.

private void generateTestData() throws Exception {
    HiveConf conf = new HiveConf(SessionState.class);
    conf.set("javax.jdo.option.ConnectionURL", String.format("jdbc:derby:;databaseName=%s;create=true", dbDir));
    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, FileSystem.DEFAULT_FS);
    conf.set("hive.metastore.warehouse.dir", whDir);
    conf.set("mapred.job.tracker", "local");
    conf.set(ConfVars.SCRATCHDIR.varname, getTempDir("scratch_dir"));
    conf.set(ConfVars.LOCALSCRATCHDIR.varname, getTempDir("local_scratch_dir"));
    conf.set(ConfVars.DYNAMICPARTITIONINGMODE.varname, "nonstrict");
    SessionState ss = new SessionState(conf);
    SessionState.start(ss);
    Driver hiveDriver = new Driver(conf);
    // generate (key, value) test data
    String testDataFile = generateTestDataFile();
    // Create a (key, value) schema table with Text SerDe which is available in hive-serdes.jar
    executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS default.kv(key INT, value STRING) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE");
    executeQuery(hiveDriver, "LOAD DATA LOCAL INPATH '" + testDataFile + "' OVERWRITE INTO TABLE default.kv");
    // Create a (key, value) schema table in non-default database with RegexSerDe which is available in hive-contrib.jar
    // Table with RegExSerde is expected to have columns of STRING type only.
    executeQuery(hiveDriver, "CREATE DATABASE IF NOT EXISTS db1");
    executeQuery(hiveDriver, "CREATE TABLE db1.kv_db1(key STRING, value STRING) " + "ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' " + "WITH SERDEPROPERTIES (" + "  \"input.regex\" = \"([0-9]*), (.*_[0-9]*)\", " + "  \"output.format.string\" = \"%1$s, %2$s\"" + ") ");
    executeQuery(hiveDriver, "INSERT INTO TABLE db1.kv_db1 SELECT * FROM default.kv");
    // Create an Avro format based table backed by schema in a separate file
    final String avroCreateQuery = String.format("CREATE TABLE db1.avro " + "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' " + "STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' " + "OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' " + "TBLPROPERTIES ('avro.schema.url'='file:///%s')", BaseTestQuery.getPhysicalFileFromResource("avro_test_schema.json").replace('\\', '/'));
    executeQuery(hiveDriver, avroCreateQuery);
    executeQuery(hiveDriver, "INSERT INTO TABLE db1.avro SELECT * FROM default.kv");
    executeQuery(hiveDriver, "USE default");
    // create a table with no data
    executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS empty_table(a INT, b STRING)");
    // delete the table location of empty table
    File emptyTableLocation = new File(whDir, "empty_table");
    if (emptyTableLocation.exists()) {
        FileUtils.forceDelete(emptyTableLocation);
    }
    // create a Hive table that has columns with data types which are supported for reading in Drill.
    testDataFile = generateAllTypesDataFile();
    executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS readtest (" + "  binary_field BINARY," + "  boolean_field BOOLEAN," + "  tinyint_field TINYINT," + "  decimal0_field DECIMAL," + "  decimal9_field DECIMAL(6, 2)," + "  decimal18_field DECIMAL(15, 5)," + "  decimal28_field DECIMAL(23, 1)," + "  decimal38_field DECIMAL(30, 3)," + "  double_field DOUBLE," + "  float_field FLOAT," + "  int_field INT," + "  bigint_field BIGINT," + "  smallint_field SMALLINT," + "  string_field STRING," + "  varchar_field VARCHAR(50)," + "  timestamp_field TIMESTAMP," + "  date_field DATE," + "  char_field CHAR(10)" + ") PARTITIONED BY (" + // "  binary_part BINARY," +
    "  boolean_part BOOLEAN," + "  tinyint_part TINYINT," + "  decimal0_part DECIMAL," + "  decimal9_part DECIMAL(6, 2)," + "  decimal18_part DECIMAL(15, 5)," + "  decimal28_part DECIMAL(23, 1)," + "  decimal38_part DECIMAL(30, 3)," + "  double_part DOUBLE," + "  float_part FLOAT," + "  int_part INT," + "  bigint_part BIGINT," + "  smallint_part SMALLINT," + "  string_part STRING," + "  varchar_part VARCHAR(50)," + "  timestamp_part TIMESTAMP," + "  date_part DATE," + "  char_part CHAR(10)" + ") ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' " + "TBLPROPERTIES ('serialization.null.format'='') ");
    // Add a partition to table 'readtest'
    executeQuery(hiveDriver, "ALTER TABLE readtest ADD IF NOT EXISTS PARTITION ( " + // "  binary_part='binary', " +
    "  boolean_part='true', " + "  tinyint_part='64', " + "  decimal0_part='36.9', " + "  decimal9_part='36.9', " + "  decimal18_part='3289379872.945645', " + "  decimal28_part='39579334534534.35345', " + "  decimal38_part='363945093845093890.9', " + "  double_part='8.345', " + "  float_part='4.67', " + "  int_part='123456', " + "  bigint_part='234235', " + "  smallint_part='3455', " + "  string_part='string', " + "  varchar_part='varchar', " + "  timestamp_part='2013-07-05 17:01:00', " + "  date_part='2013-07-05', " + "  char_part='char')");
    // Add a second partition to table 'readtest' which contains the same values as the first partition except
    // for tinyint_part partition column
    executeQuery(hiveDriver, "ALTER TABLE readtest ADD IF NOT EXISTS PARTITION ( " + // "  binary_part='binary', " +
    "  boolean_part='true', " + "  tinyint_part='65', " + "  decimal0_part='36.9', " + "  decimal9_part='36.9', " + "  decimal18_part='3289379872.945645', " + "  decimal28_part='39579334534534.35345', " + "  decimal38_part='363945093845093890.9', " + "  double_part='8.345', " + "  float_part='4.67', " + "  int_part='123456', " + "  bigint_part='234235', " + "  smallint_part='3455', " + "  string_part='string', " + "  varchar_part='varchar', " + "  timestamp_part='2013-07-05 17:01:00', " + "  date_part='2013-07-05', " + "  char_part='char')");
    // Load data into table 'readtest'
    executeQuery(hiveDriver, String.format("LOAD DATA LOCAL INPATH '%s' INTO TABLE default.readtest PARTITION (" + // "  binary_part='binary', " +
    "  boolean_part='true', " + "  tinyint_part='64', " + "  decimal0_part='36.9', " + "  decimal9_part='36.9', " + "  decimal18_part='3289379872.945645', " + "  decimal28_part='39579334534534.35345', " + "  decimal38_part='363945093845093890.9', " + "  double_part='8.345', " + "  float_part='4.67', " + "  int_part='123456', " + "  bigint_part='234235', " + "  smallint_part='3455', " + "  string_part='string', " + "  varchar_part='varchar', " + "  timestamp_part='2013-07-05 17:01:00', " + "  date_part='2013-07-05'," + "  char_part='char'" + ")", testDataFile));
    // create a table that has all Hive types. This is to test how hive tables metadata is populated in
    // Drill's INFORMATION_SCHEMA.
    executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS infoschematest(" + "booleanType BOOLEAN, " + "tinyintType TINYINT, " + "smallintType SMALLINT, " + "intType INT, " + "bigintType BIGINT, " + "floatType FLOAT, " + "doubleType DOUBLE, " + "dateType DATE, " + "timestampType TIMESTAMP, " + "binaryType BINARY, " + "decimalType DECIMAL(38, 2), " + "stringType STRING, " + "varCharType VARCHAR(20), " + "listType ARRAY<STRING>, " + "mapType MAP<STRING,INT>, " + "structType STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>, " + "uniontypeType UNIONTYPE<int, double, array<string>>, " + "charType CHAR(10))");
    /**
     * Create a PARQUET table with all supported types.
     */
    executeQuery(hiveDriver, "CREATE TABLE readtest_parquet (" + "  binary_field BINARY, " + "  boolean_field BOOLEAN, " + "  tinyint_field TINYINT," + "  decimal0_field DECIMAL," + "  decimal9_field DECIMAL(6, 2)," + "  decimal18_field DECIMAL(15, 5)," + "  decimal28_field DECIMAL(23, 1)," + "  decimal38_field DECIMAL(30, 3)," + "  double_field DOUBLE," + "  float_field FLOAT," + "  int_field INT," + "  bigint_field BIGINT," + "  smallint_field SMALLINT," + "  string_field STRING," + "  varchar_field VARCHAR(50)," + "  timestamp_field TIMESTAMP," + "  char_field CHAR(10)" + ") PARTITIONED BY (" + // "  binary_part BINARY," +
    "  boolean_part BOOLEAN," + "  tinyint_part TINYINT," + "  decimal0_part DECIMAL," + "  decimal9_part DECIMAL(6, 2)," + "  decimal18_part DECIMAL(15, 5)," + "  decimal28_part DECIMAL(23, 1)," + "  decimal38_part DECIMAL(30, 3)," + "  double_part DOUBLE," + "  float_part FLOAT," + "  int_part INT," + "  bigint_part BIGINT," + "  smallint_part SMALLINT," + "  string_part STRING," + "  varchar_part VARCHAR(50)," + "  timestamp_part TIMESTAMP," + "  date_part DATE," + "  char_part CHAR(10)" + ") STORED AS parquet ");
    executeQuery(hiveDriver, "INSERT OVERWRITE TABLE readtest_parquet " + "PARTITION (" + // "  binary_part='binary', " +
    "  boolean_part='true', " + "  tinyint_part='64', " + "  decimal0_part='36.9', " + "  decimal9_part='36.9', " + "  decimal18_part='3289379872.945645', " + "  decimal28_part='39579334534534.35345', " + "  decimal38_part='363945093845093890.9', " + "  double_part='8.345', " + "  float_part='4.67', " + "  int_part='123456', " + "  bigint_part='234235', " + "  smallint_part='3455', " + "  string_part='string', " + "  varchar_part='varchar', " + "  timestamp_part='2013-07-05 17:01:00', " + "  date_part='2013-07-05', " + "  char_part='char'" + ") " + " SELECT " + "  binary_field," + "  boolean_field," + "  tinyint_field," + "  decimal0_field," + "  decimal9_field," + "  decimal18_field," + "  decimal28_field," + "  decimal38_field," + "  double_field," + "  float_field," + "  int_field," + "  bigint_field," + "  smallint_field," + "  string_field," + "  varchar_field," + "  timestamp_field," + "  char_field" + " FROM readtest WHERE tinyint_part = 64");
    // Add a second partition to table 'readtest_parquet' which contains the same values as the first partition except
    // for tinyint_part partition column
    executeQuery(hiveDriver, "ALTER TABLE readtest_parquet ADD PARTITION ( " + // "  binary_part='binary', " +
    "  boolean_part='true', " + "  tinyint_part='65', " + "  decimal0_part='36.9', " + "  decimal9_part='36.9', " + "  decimal18_part='3289379872.945645', " + "  decimal28_part='39579334534534.35345', " + "  decimal38_part='363945093845093890.9', " + "  double_part='8.345', " + "  float_part='4.67', " + "  int_part='123456', " + "  bigint_part='234235', " + "  smallint_part='3455', " + "  string_part='string', " + "  varchar_part='varchar', " + "  timestamp_part='2013-07-05 17:01:00', " + "  date_part='2013-07-05', " + "  char_part='char')");
    // create a Hive view to test how its metadata is populated in Drill's INFORMATION_SCHEMA
    executeQuery(hiveDriver, "CREATE VIEW IF NOT EXISTS hiveview AS SELECT * FROM kv");
    executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS " + "partition_pruning_test_loadtable(a DATE, b TIMESTAMP, c INT, d INT, e INT) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE");
    executeQuery(hiveDriver, String.format("LOAD DATA LOCAL INPATH '%s' INTO TABLE partition_pruning_test_loadtable", generateTestDataFileForPartitionInput()));
    // create partitioned hive table to test partition pruning
    executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS partition_pruning_test(a DATE, b TIMESTAMP) " + "partitioned by (c INT, d INT, e INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE");
    executeQuery(hiveDriver, "INSERT OVERWRITE TABLE partition_pruning_test PARTITION(c, d, e) " + "SELECT a, b, c, d, e FROM partition_pruning_test_loadtable");
    executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS partition_with_few_schemas(a DATE, b TIMESTAMP) " + "partitioned by (c INT, d INT, e INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE");
    executeQuery(hiveDriver, "INSERT OVERWRITE TABLE partition_with_few_schemas PARTITION(c, d, e) " + "SELECT a, b, c, d, e FROM partition_pruning_test_loadtable");
    executeQuery(hiveDriver, "alter table partition_with_few_schemas partition(c=1, d=1, e=1) change a a1 INT");
    executeQuery(hiveDriver, "alter table partition_with_few_schemas partition(c=1, d=1, e=2) change a a1 INT");
    executeQuery(hiveDriver, "alter table partition_with_few_schemas partition(c=2, d=2, e=2) change a a1 INT");
    // Add a partition with custom location
    executeQuery(hiveDriver, String.format("ALTER TABLE partition_pruning_test ADD PARTITION (c=99, d=98, e=97) LOCATION '%s'", getTempDir("part1")));
    executeQuery(hiveDriver, String.format("INSERT INTO TABLE partition_pruning_test PARTITION(c=99, d=98, e=97) " + "SELECT '%s', '%s' FROM kv LIMIT 1", new Date(System.currentTimeMillis()).toString(), new Timestamp(System.currentTimeMillis()).toString()));
    executeQuery(hiveDriver, "DROP TABLE partition_pruning_test_loadtable");
    // Create a partitioned parquet table (DRILL-3938)
    executeQuery(hiveDriver, "CREATE TABLE kv_parquet(key INT, value STRING) PARTITIONED BY (part1 int) STORED AS PARQUET");
    executeQuery(hiveDriver, "INSERT INTO TABLE kv_parquet PARTITION(part1) SELECT key, value, key FROM default.kv");
    executeQuery(hiveDriver, "ALTER TABLE kv_parquet ADD COLUMNS (newcol string)");
    executeQuery(hiveDriver, "CREATE TABLE countStar_Parquet (int_field INT) STORED AS parquet");
    final int numOfRows = 200;
    final StringBuffer sb = new StringBuffer();
    sb.append("VALUES ");
    for (int i = 0; i < numOfRows; ++i) {
        if (i != 0) {
            sb.append(",");
        }
        sb.append("(").append(i).append(")");
    }
    executeQuery(hiveDriver, "INSERT INTO TABLE countStar_Parquet \n" + sb.toString());
    // Create a StorageHandler based table (DRILL-3739)
    executeQuery(hiveDriver, "CREATE TABLE kv_sh(key INT, value STRING) STORED BY " + "'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler'");
    // Insert fails if the table directory already exists for tables with DefaultStorageHandlers. Its a known
    // issue in Hive. So delete the table directory created as part of the CREATE TABLE
    FileUtils.deleteQuietly(new File(whDir, "kv_sh"));
    //executeQuery(hiveDriver, "INSERT OVERWRITE TABLE kv_sh SELECT * FROM kv");
    // Create text tables with skip header and footer table property
    executeQuery(hiveDriver, "create database if not exists skipper");
    executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_text_small", "textfile", "1", "1"));
    executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_text_small", 5, 1, 1));
    executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_text_large", "textfile", "2", "2"));
    executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_text_large", 5000, 2, 2));
    executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_incorrect_skip_header", "textfile", "A", "1"));
    executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_incorrect_skip_header", 5, 1, 1));
    executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_incorrect_skip_footer", "textfile", "1", "A"));
    executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_incorrect_skip_footer", 5, 1, 1));
    // Create rcfile table with skip header and footer table property
    executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_rcfile_large", "rcfile", "1", "1"));
    executeQuery(hiveDriver, "insert into table skipper.kv_rcfile_large select * from skipper.kv_text_large");
    // Create parquet table with skip header and footer table property
    executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_parquet_large", "parquet", "1", "1"));
    executeQuery(hiveDriver, "insert into table skipper.kv_parquet_large select * from skipper.kv_text_large");
    // Create sequencefile table with skip header and footer table property
    executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_sequencefile_large", "sequencefile", "1", "1"));
    executeQuery(hiveDriver, "insert into table skipper.kv_sequencefile_large select * from skipper.kv_text_large");
    // Create a table based on json file
    executeQuery(hiveDriver, "create table default.simple_json(json string)");
    final String loadData = String.format("load data local inpath '" + Resources.getResource("simple.json") + "' into table default.simple_json");
    executeQuery(hiveDriver, loadData);
    ss.close();
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) Driver(org.apache.hadoop.hive.ql.Driver) HiveConf(org.apache.hadoop.hive.conf.HiveConf) File(java.io.File) Timestamp(java.sql.Timestamp) Date(java.sql.Date)

Example 82 with SessionState

use of org.apache.hadoop.hive.ql.session.SessionState in project drill by apache.

the class TestSqlStdBasedAuthorization method generateTestData.

private static void generateTestData() throws Exception {
    final SessionState ss = new SessionState(hiveConf);
    SessionState.start(ss);
    final Driver driver = new Driver(hiveConf);
    executeQuery(driver, "CREATE DATABASE " + db_general);
    createTbl(driver, db_general, g_student_user0, studentDef, studentData);
    createTbl(driver, db_general, g_voter_role0, voterDef, voterData);
    createTbl(driver, db_general, g_student_user2, studentDef, studentData);
    executeQuery(driver, "SET ROLE admin");
    executeQuery(driver, "CREATE ROLE " + test_role0);
    executeQuery(driver, "GRANT ROLE " + test_role0 + " TO USER " + org1Users[1]);
    executeQuery(driver, "GRANT ROLE " + test_role0 + " TO USER " + org1Users[2]);
    executeQuery(driver, String.format("GRANT SELECT ON %s.%s TO USER %s", db_general, g_student_user0, org1Users[0]));
    executeQuery(driver, String.format("GRANT SELECT ON %s.%s TO ROLE %s", db_general, g_voter_role0, test_role0));
    executeQuery(driver, String.format("GRANT SELECT ON %s.%s TO USER %s", db_general, g_student_user2, org1Users[2]));
    createView(org1Users[0], org1Groups[0], v_student_u0g0_750, String.format("SELECT rownum, name, age, studentnum FROM %s.%s.%s", hivePluginName, db_general, g_student_user0));
    createView(org1Users[1], org1Groups[1], v_student_u1g1_750, String.format("SELECT rownum, name, age FROM %s.%s.%s", MINIDFS_STORAGE_PLUGIN_NAME, "tmp", v_student_u0g0_750));
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) Driver(org.apache.hadoop.hive.ql.Driver)

Example 83 with SessionState

use of org.apache.hadoop.hive.ql.session.SessionState in project phoenix by apache.

the class PhoenixStorageHandler method configureJobProperties.

@SuppressWarnings({ "unchecked", "rawtypes" })
protected void configureJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
    Properties tableProperties = tableDesc.getProperties();
    String inputFormatClassName = tableProperties.getProperty(PhoenixStorageHandlerConstants.HBASE_INPUT_FORMAT_CLASS);
    if (LOG.isDebugEnabled()) {
        LOG.debug(PhoenixStorageHandlerConstants.HBASE_INPUT_FORMAT_CLASS + " is " + inputFormatClassName);
    }
    Class<?> inputFormatClass;
    try {
        if (inputFormatClassName != null) {
            inputFormatClass = JavaUtils.loadClass(inputFormatClassName);
        } else {
            inputFormatClass = PhoenixInputFormat.class;
        }
    } catch (Exception e) {
        LOG.error(e.getMessage(), e);
        throw new RuntimeException(e);
    }
    if (inputFormatClass != null) {
        tableDesc.setInputFileFormatClass((Class<? extends InputFormat>) inputFormatClass);
    }
    String tableName = tableProperties.getProperty(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME);
    if (tableName == null) {
        tableName = tableDesc.getTableName();
        tableProperties.setProperty(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME, tableName);
    }
    SessionState sessionState = SessionState.get();
    String sessionId = sessionState.getSessionId();
    jobProperties.put(PhoenixConfigurationUtil.SESSION_ID, sessionId);
    jobProperties.put(PhoenixConfigurationUtil.INPUT_TABLE_NAME, tableName);
    jobProperties.put(PhoenixStorageHandlerConstants.ZOOKEEPER_QUORUM, tableProperties.getProperty(PhoenixStorageHandlerConstants.ZOOKEEPER_QUORUM, PhoenixStorageHandlerConstants.DEFAULT_ZOOKEEPER_QUORUM));
    jobProperties.put(PhoenixStorageHandlerConstants.ZOOKEEPER_PORT, tableProperties.getProperty(PhoenixStorageHandlerConstants.ZOOKEEPER_PORT, String.valueOf(PhoenixStorageHandlerConstants.DEFAULT_ZOOKEEPER_PORT)));
    jobProperties.put(PhoenixStorageHandlerConstants.ZOOKEEPER_PARENT, tableProperties.getProperty(PhoenixStorageHandlerConstants.ZOOKEEPER_PARENT, PhoenixStorageHandlerConstants.DEFAULT_ZOOKEEPER_PARENT));
    String columnMapping = tableProperties.getProperty(PhoenixStorageHandlerConstants.PHOENIX_COLUMN_MAPPING);
    if (columnMapping != null) {
        jobProperties.put(PhoenixStorageHandlerConstants.PHOENIX_COLUMN_MAPPING, columnMapping);
    }
    jobProperties.put(hive_metastoreConstants.META_TABLE_STORAGE, this.getClass().getName());
    // set configuration when direct work with HBase.
    jobProperties.put(HConstants.ZOOKEEPER_QUORUM, jobProperties.get(PhoenixStorageHandlerConstants.ZOOKEEPER_QUORUM));
    jobProperties.put(HConstants.ZOOKEEPER_CLIENT_PORT, jobProperties.get(PhoenixStorageHandlerConstants.ZOOKEEPER_PORT));
    jobProperties.put(HConstants.ZOOKEEPER_ZNODE_PARENT, jobProperties.get(PhoenixStorageHandlerConstants.ZOOKEEPER_PARENT));
    addHBaseResources(jobConf, jobProperties);
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) Properties(java.util.Properties) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 84 with SessionState

use of org.apache.hadoop.hive.ql.session.SessionState in project phoenix by apache.

the class HiveTestUtil method executeDiffCommand.

private static int executeDiffCommand(String inFileName, String outFileName, boolean ignoreWhiteSpace, boolean sortResults) throws Exception {
    int result = 0;
    if (sortResults) {
        // sort will try to open the output file in write mode on windows. We need to
        // close it first.
        SessionState ss = SessionState.get();
        if (ss != null && ss.out != null && ss.out != System.out) {
            ss.out.close();
        }
        String inSorted = inFileName + SORT_SUFFIX;
        String outSorted = outFileName + SORT_SUFFIX;
        result = sortFiles(inFileName, inSorted);
        result |= sortFiles(outFileName, outSorted);
        if (result != 0) {
            LOG.error("ERROR: Could not sort files before comparing");
            return result;
        }
        inFileName = inSorted;
        outFileName = outSorted;
    }
    ArrayList<String> diffCommandArgs = new ArrayList<String>();
    diffCommandArgs.add("diff");
    // Text file comparison
    diffCommandArgs.add("-a");
    // Ignore changes in the amount of white space
    if (ignoreWhiteSpace || Shell.WINDOWS) {
        diffCommandArgs.add("-b");
    }
    // spaces at the end of the line.
    if (Shell.WINDOWS) {
        // Strip trailing carriage return on input
        diffCommandArgs.add("--strip-trailing-cr");
        // Ignore changes whose lines are all blank
        diffCommandArgs.add("-B");
    }
    // Add files to compare to the arguments list
    diffCommandArgs.add(getQuotedString(inFileName));
    diffCommandArgs.add(getQuotedString(outFileName));
    result = executeCmd(diffCommandArgs);
    if (sortResults) {
        new File(inFileName).delete();
        new File(outFileName).delete();
    }
    return result;
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) ArrayList(java.util.ArrayList) File(java.io.File)

Example 85 with SessionState

use of org.apache.hadoop.hive.ql.session.SessionState in project hive by apache.

the class QTestUtil method cliInit.

public String cliInit(File file, boolean recreate) throws Exception {
    String fileName = file.getName();
    if (recreate) {
        cleanUp(fileName);
        createSources(fileName);
    }
    initDataSetForTest(file);
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER, "org.apache.hadoop.hive.ql.security.DummyAuthenticator");
    Utilities.clearWorkMap(conf);
    CliSessionState ss = new CliSessionState(conf);
    assert ss != null;
    ss.in = System.in;
    String outFileExtension = getOutFileExtension(fileName);
    String stdoutName = null;
    if (outDir != null) {
        // TODO: why is this needed?
        File qf = new File(outDir, fileName);
        stdoutName = qf.getName().concat(outFileExtension);
    } else {
        stdoutName = fileName + outFileExtension;
    }
    File outf = new File(logDir, stdoutName);
    OutputStream fo = new BufferedOutputStream(new FileOutputStream(outf));
    if (qSortQuerySet.contains(fileName)) {
        ss.out = new SortPrintStream(fo, "UTF-8");
    } else if (qHashQuerySet.contains(fileName)) {
        ss.out = new DigestPrintStream(fo, "UTF-8");
    } else if (qSortNHashQuerySet.contains(fileName)) {
        ss.out = new SortAndDigestPrintStream(fo, "UTF-8");
    } else {
        ss.out = new PrintStream(fo, true, "UTF-8");
    }
    ss.err = new CachingPrintStream(fo, true, "UTF-8");
    ss.setIsSilent(true);
    SessionState oldSs = SessionState.get();
    boolean canReuseSession = !qNoSessionReuseQuerySet.contains(fileName);
    if (oldSs != null && canReuseSession && clusterType.getCoreClusterType() == CoreClusterType.TEZ) {
        // Copy the tezSessionState from the old CliSessionState.
        TezSessionState tezSessionState = oldSs.getTezSession();
        oldSs.setTezSession(null);
        ss.setTezSession(tezSessionState);
        oldSs.close();
    }
    if (oldSs != null && clusterType.getCoreClusterType() == CoreClusterType.SPARK) {
        sparkSession = oldSs.getSparkSession();
        ss.setSparkSession(sparkSession);
        oldSs.setSparkSession(null);
        oldSs.close();
    }
    if (oldSs != null && oldSs.out != null && oldSs.out != System.out) {
        oldSs.out.close();
    }
    if (oldSs != null) {
        oldSs.close();
    }
    SessionState.start(ss);
    cliDriver = new CliDriver();
    if (fileName.equals("init_file.q")) {
        ss.initFiles.add(AbstractCliConfig.HIVE_ROOT + "/data/scripts/test_init_file.sql");
    }
    cliDriver.processInitFiles(ss);
    return outf.getAbsolutePath();
}
Also used : SortAndDigestPrintStream(org.apache.hadoop.hive.common.io.SortAndDigestPrintStream) CachingPrintStream(org.apache.hadoop.hive.common.io.CachingPrintStream) DigestPrintStream(org.apache.hadoop.hive.common.io.DigestPrintStream) SortPrintStream(org.apache.hadoop.hive.common.io.SortPrintStream) PrintStream(java.io.PrintStream) TezSessionState(org.apache.hadoop.hive.ql.exec.tez.TezSessionState) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) SessionState(org.apache.hadoop.hive.ql.session.SessionState) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) FileOutputStream(java.io.FileOutputStream) OutputStream(java.io.OutputStream) SortAndDigestPrintStream(org.apache.hadoop.hive.common.io.SortAndDigestPrintStream) DigestPrintStream(org.apache.hadoop.hive.common.io.DigestPrintStream) CliSessionState(org.apache.hadoop.hive.cli.CliSessionState) CachingPrintStream(org.apache.hadoop.hive.common.io.CachingPrintStream) TezSessionState(org.apache.hadoop.hive.ql.exec.tez.TezSessionState) FileOutputStream(java.io.FileOutputStream) SortAndDigestPrintStream(org.apache.hadoop.hive.common.io.SortAndDigestPrintStream) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) SortPrintStream(org.apache.hadoop.hive.common.io.SortPrintStream) CliDriver(org.apache.hadoop.hive.cli.CliDriver)

Aggregations

SessionState (org.apache.hadoop.hive.ql.session.SessionState)112 IOException (java.io.IOException)28 HiveConf (org.apache.hadoop.hive.conf.HiveConf)22 ArrayList (java.util.ArrayList)14 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)14 Path (org.apache.hadoop.fs.Path)13 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)13 LinkedList (java.util.LinkedList)12 CliSessionState (org.apache.hadoop.hive.cli.CliSessionState)12 File (java.io.File)11 FileNotFoundException (java.io.FileNotFoundException)11 Map (java.util.Map)11 Test (org.junit.Test)10 PrintStream (java.io.PrintStream)9 ExecutionException (java.util.concurrent.ExecutionException)9 HashMap (java.util.HashMap)8 LinkedHashMap (java.util.LinkedHashMap)7 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)7 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)6 Callable (java.util.concurrent.Callable)6