Search in sources :

Example 6 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.

the class HCatBaseStorer method doSchemaValidations.

protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException {
    // Iterate through all the elements in Pig Schema and do validations as
    // dictated by semantics, consult HCatSchema of table when need be.
    //helps with debug messages
    int columnPos = 0;
    for (FieldSchema pigField : pigSchema.getFields()) {
        HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema);
        validateSchema(pigField, hcatField, pigSchema, tblSchema, columnPos++);
    }
    try {
        PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema);
    } catch (IOException e) {
        throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e);
    }
}
Also used : HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) IOException(java.io.IOException) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 7 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.

the class HCatBaseStorer method convertPigSchemaToHCatSchema.

/** Constructs HCatSchema from pigSchema. Passed tableSchema is the existing
   * schema of the table in metastore.
   */
protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("convertPigSchemaToHCatSchema(pigSchema,tblSchema)=(" + pigSchema + "," + tableSchema + ")");
    }
    List<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(pigSchema.size());
    for (FieldSchema fSchema : pigSchema.getFields()) {
        try {
            HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema);
            //if writing to a partitioned table, then pigSchema will have more columns than tableSchema
            //partition columns are not part of tableSchema... e.g. TestHCatStorer#testPartColsInData()
            //        HCatUtil.assertNotNull(hcatFieldSchema, "Nothing matching '" + fSchema.alias + "' found " +
            //                "in target table schema", LOG);
            fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema, pigSchema, tableSchema));
        } catch (HCatException he) {
            throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
        }
    }
    HCatSchema s = new HCatSchema(fieldSchemas);
    LOG.debug("convertPigSchemaToHCatSchema(computed)=(" + s + ")");
    return s;
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ArrayList(java.util.ArrayList) HCatException(org.apache.hive.hcatalog.common.HCatException) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 8 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.

the class TestPigHBaseStorageHandler method testPigPopulation.

@Test
public void testPigPopulation() throws Exception {
    Initialize();
    String tableName = newTableName("MyTable");
    String databaseName = newTableName("MyDatabase");
    //Table name will be lower case unless specified by hbase.table.name property
    String hbaseTableName = (databaseName + "." + tableName).toLowerCase();
    String db_dir = HCatUtil.makePathASafeFileName(getTestDir() + "/hbasedb");
    String POPTXT_FILE_NAME = db_dir + "testfile.txt";
    float f = -100.1f;
    String dbQuery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + db_dir + "'";
    String deleteQuery = "DROP TABLE " + databaseName + "." + tableName;
    String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + "(key int, testqualifier1 float, testqualifier2 string) STORED BY " + "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'" + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')" + " TBLPROPERTIES ('hbase.table.default.storage.type'='binary')";
    String selectQuery = "SELECT * from " + databaseName.toLowerCase() + "." + tableName.toLowerCase();
    CommandProcessorResponse responseOne = driver.run(deleteQuery);
    assertEquals(0, responseOne.getResponseCode());
    CommandProcessorResponse responseTwo = driver.run(dbQuery);
    assertEquals(0, responseTwo.getResponseCode());
    CommandProcessorResponse responseThree = driver.run(tableQuery);
    HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
    boolean doesTableExist = hAdmin.tableExists(hbaseTableName);
    assertTrue(doesTableExist);
    createTestDataFile(POPTXT_FILE_NAME);
    PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties());
    server.registerQuery("A = load '" + POPTXT_FILE_NAME + "' using PigStorage() as (key:int, testqualifier1:float, testqualifier2:chararray);");
    server.registerQuery("B = filter A by (key > 2) AND (key < 8) ;");
    server.registerQuery("store B into '" + databaseName.toLowerCase() + "." + tableName.toLowerCase() + "' using  org.apache.hive.hcatalog.pig.HCatStorer();");
    server.registerQuery("C = load '" + databaseName.toLowerCase() + "." + tableName.toLowerCase() + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    // Schema should be same
    Schema dumpedBSchema = server.dumpSchema("C");
    List<FieldSchema> fields = dumpedBSchema.getFields();
    assertEquals(3, fields.size());
    assertEquals(DataType.INTEGER, fields.get(0).type);
    assertEquals("key", fields.get(0).alias.toLowerCase());
    assertEquals(DataType.FLOAT, fields.get(1).type);
    assertEquals("testQualifier1".toLowerCase(), fields.get(1).alias.toLowerCase());
    assertEquals(DataType.CHARARRAY, fields.get(2).type);
    assertEquals("testQualifier2".toLowerCase(), fields.get(2).alias.toLowerCase());
    //Query the hbase table and check the key is valid and only 5  are present
    Configuration conf = new Configuration(getHbaseConf());
    HTable table = new HTable(conf, hbaseTableName);
    Scan scan = new Scan();
    scan.addFamily(Bytes.toBytes("testFamily"));
    byte[] familyNameBytes = Bytes.toBytes("testFamily");
    ResultScanner scanner = table.getScanner(scan);
    int index = 3;
    int count = 0;
    for (Result result : scanner) {
        //key is correct
        assertEquals(index, Bytes.toInt(result.getRow()));
        //first column exists
        assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes("testQualifier1")));
        //value is correct
        assertEquals((index + f), Bytes.toFloat(result.getValue(familyNameBytes, Bytes.toBytes("testQualifier1"))), 0);
        //second column exists
        assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes("testQualifier2")));
        //value is correct
        assertEquals(("textB-" + index).toString(), Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes("testQualifier2"))));
        index++;
        count++;
    }
    // 5 rows should be returned
    assertEquals(count, 5);
    //Check if hive returns results correctly
    driver.run(selectQuery);
    ArrayList<String> result = new ArrayList<String>();
    driver.getResults(result);
    //Query using the hive command line
    assertEquals(5, result.size());
    Iterator<String> itr = result.iterator();
    for (int i = 3; i <= 7; i++) {
        String[] tokens = itr.next().split("\\s+");
        assertEquals(i, Integer.parseInt(tokens[0]));
        assertEquals(i + f, Float.parseFloat(tokens[1]), 0);
        assertEquals(("textB-" + i).toString(), tokens[2]);
    }
    //delete the table from the database
    CommandProcessorResponse responseFour = driver.run(deleteQuery);
    assertEquals(0, responseFour.getResponseCode());
}
Also used : ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) Configuration(org.apache.hadoop.conf.Configuration) CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ArrayList(java.util.ArrayList) HTable(org.apache.hadoop.hbase.client.HTable) Result(org.apache.hadoop.hbase.client.Result) HBaseAdmin(org.apache.hadoop.hbase.client.HBaseAdmin) PigServer(org.apache.pig.PigServer) Scan(org.apache.hadoop.hbase.client.Scan) Test(org.junit.Test)

Example 9 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project phoenix by apache.

the class PhoenixHBaseLoaderIT method testSchemaForTableWithAlias.

/**
     * Validates the schema when it is given as part of LOAD..AS
     * @throws Exception
     */
@Test
public void testSchemaForTableWithAlias() throws Exception {
    //create the table.
    final String TABLE = "S.TABLE4";
    String ddl = "CREATE TABLE  " + TABLE + "  (A_STRING VARCHAR NOT NULL, A_DECIMAL DECIMAL NOT NULL, CF1.A_INTEGER INTEGER, CF2.A_DOUBLE DOUBLE" + "  CONSTRAINT pk PRIMARY KEY (A_STRING, A_DECIMAL)) \n";
    conn.createStatement().execute(ddl);
    //select query given as part of LOAD.
    final String sqlQuery = "SELECT A_STRING,A_DECIMAL,CF1.A_INTEGER,CF2.A_DOUBLE FROM " + TABLE;
    LOG.info(String.format("Generated SQL Query [%s]", sqlQuery));
    pigServer.registerQuery(String.format("raw = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s') AS (a:chararray,b:bigdecimal,c:int,d:double);", sqlQuery, zkQuorum));
    //test the schema.
    Schema schema = pigServer.dumpSchema("raw");
    List<FieldSchema> fields = schema.getFields();
    assertEquals(4, fields.size());
    assertTrue(fields.get(0).alias.equalsIgnoreCase("a"));
    assertTrue(fields.get(0).type == DataType.CHARARRAY);
    assertTrue(fields.get(1).alias.equalsIgnoreCase("b"));
    assertTrue(fields.get(1).type == DataType.BIGDECIMAL);
    assertTrue(fields.get(2).alias.equalsIgnoreCase("c"));
    assertTrue(fields.get(2).type == DataType.INTEGER);
    assertTrue(fields.get(3).alias.equalsIgnoreCase("d"));
    assertTrue(fields.get(3).type == DataType.DOUBLE);
}
Also used : Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) Test(org.junit.Test)

Example 10 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project phoenix by apache.

the class PhoenixHBaseLoaderIT method testSchemaForTableWithSpecificColumns.

/**
     * Validates the schema returned when specific columns of a table are given as part of LOAD .
     * @throws Exception
     */
@Test
public void testSchemaForTableWithSpecificColumns() throws Exception {
    //create the table
    final String TABLE = "TABLE2";
    final String ddl = "CREATE TABLE " + TABLE + "  (ID INTEGER NOT NULL PRIMARY KEY,NAME VARCHAR, AGE INTEGER) ";
    conn.createStatement().execute(ddl);
    final String selectColumns = "ID,NAME";
    pigServer.registerQuery(String.format("A = load 'hbase://table/%s/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE, selectColumns, zkQuorum));
    Schema schema = pigServer.dumpSchema("A");
    List<FieldSchema> fields = schema.getFields();
    assertEquals(2, fields.size());
    assertTrue(fields.get(0).alias.equalsIgnoreCase("ID"));
    assertTrue(fields.get(0).type == DataType.INTEGER);
    assertTrue(fields.get(1).alias.equalsIgnoreCase("NAME"));
    assertTrue(fields.get(1).type == DataType.CHARARRAY);
}
Also used : Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) Test(org.junit.Test)

Aggregations

FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)18 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 Test (org.junit.Test)13 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)9 PigServer (org.apache.pig.PigServer)7 ArrayList (java.util.ArrayList)6 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)5 Tuple (org.apache.pig.data.Tuple)5 FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)4 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)3 IOException (java.io.IOException)2 PreparedStatement (java.sql.PreparedStatement)2 HBaseAdmin (org.apache.hadoop.hbase.client.HBaseAdmin)2 CommandProcessorResponse (org.apache.hadoop.hive.ql.processors.CommandProcessorResponse)2 File (java.io.File)1 RandomAccessFile (java.io.RandomAccessFile)1 Array (java.sql.Array)1 List (java.util.List)1 Configuration (org.apache.hadoop.conf.Configuration)1 HTable (org.apache.hadoop.hbase.client.HTable)1