use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.
the class HCatBaseStorer method doSchemaValidations.
protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException {
// Iterate through all the elements in Pig Schema and do validations as
// dictated by semantics, consult HCatSchema of table when need be.
//helps with debug messages
int columnPos = 0;
for (FieldSchema pigField : pigSchema.getFields()) {
HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema);
validateSchema(pigField, hcatField, pigSchema, tblSchema, columnPos++);
}
try {
PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema);
} catch (IOException e) {
throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e);
}
}
use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.
the class HCatBaseStorer method convertPigSchemaToHCatSchema.
/** Constructs HCatSchema from pigSchema. Passed tableSchema is the existing
* schema of the table in metastore.
*/
protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException {
if (LOG.isDebugEnabled()) {
LOG.debug("convertPigSchemaToHCatSchema(pigSchema,tblSchema)=(" + pigSchema + "," + tableSchema + ")");
}
List<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(pigSchema.size());
for (FieldSchema fSchema : pigSchema.getFields()) {
try {
HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema);
//if writing to a partitioned table, then pigSchema will have more columns than tableSchema
//partition columns are not part of tableSchema... e.g. TestHCatStorer#testPartColsInData()
// HCatUtil.assertNotNull(hcatFieldSchema, "Nothing matching '" + fSchema.alias + "' found " +
// "in target table schema", LOG);
fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema, pigSchema, tableSchema));
} catch (HCatException he) {
throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he);
}
}
HCatSchema s = new HCatSchema(fieldSchemas);
LOG.debug("convertPigSchemaToHCatSchema(computed)=(" + s + ")");
return s;
}
use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.
the class TestPigHBaseStorageHandler method testPigPopulation.
@Test
public void testPigPopulation() throws Exception {
Initialize();
String tableName = newTableName("MyTable");
String databaseName = newTableName("MyDatabase");
//Table name will be lower case unless specified by hbase.table.name property
String hbaseTableName = (databaseName + "." + tableName).toLowerCase();
String db_dir = HCatUtil.makePathASafeFileName(getTestDir() + "/hbasedb");
String POPTXT_FILE_NAME = db_dir + "testfile.txt";
float f = -100.1f;
String dbQuery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + db_dir + "'";
String deleteQuery = "DROP TABLE " + databaseName + "." + tableName;
String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + "(key int, testqualifier1 float, testqualifier2 string) STORED BY " + "'org.apache.hadoop.hive.hbase.HBaseStorageHandler'" + " WITH SERDEPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')" + " TBLPROPERTIES ('hbase.table.default.storage.type'='binary')";
String selectQuery = "SELECT * from " + databaseName.toLowerCase() + "." + tableName.toLowerCase();
CommandProcessorResponse responseOne = driver.run(deleteQuery);
assertEquals(0, responseOne.getResponseCode());
CommandProcessorResponse responseTwo = driver.run(dbQuery);
assertEquals(0, responseTwo.getResponseCode());
CommandProcessorResponse responseThree = driver.run(tableQuery);
HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf());
boolean doesTableExist = hAdmin.tableExists(hbaseTableName);
assertTrue(doesTableExist);
createTestDataFile(POPTXT_FILE_NAME);
PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties());
server.registerQuery("A = load '" + POPTXT_FILE_NAME + "' using PigStorage() as (key:int, testqualifier1:float, testqualifier2:chararray);");
server.registerQuery("B = filter A by (key > 2) AND (key < 8) ;");
server.registerQuery("store B into '" + databaseName.toLowerCase() + "." + tableName.toLowerCase() + "' using org.apache.hive.hcatalog.pig.HCatStorer();");
server.registerQuery("C = load '" + databaseName.toLowerCase() + "." + tableName.toLowerCase() + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
// Schema should be same
Schema dumpedBSchema = server.dumpSchema("C");
List<FieldSchema> fields = dumpedBSchema.getFields();
assertEquals(3, fields.size());
assertEquals(DataType.INTEGER, fields.get(0).type);
assertEquals("key", fields.get(0).alias.toLowerCase());
assertEquals(DataType.FLOAT, fields.get(1).type);
assertEquals("testQualifier1".toLowerCase(), fields.get(1).alias.toLowerCase());
assertEquals(DataType.CHARARRAY, fields.get(2).type);
assertEquals("testQualifier2".toLowerCase(), fields.get(2).alias.toLowerCase());
//Query the hbase table and check the key is valid and only 5 are present
Configuration conf = new Configuration(getHbaseConf());
HTable table = new HTable(conf, hbaseTableName);
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes("testFamily"));
byte[] familyNameBytes = Bytes.toBytes("testFamily");
ResultScanner scanner = table.getScanner(scan);
int index = 3;
int count = 0;
for (Result result : scanner) {
//key is correct
assertEquals(index, Bytes.toInt(result.getRow()));
//first column exists
assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes("testQualifier1")));
//value is correct
assertEquals((index + f), Bytes.toFloat(result.getValue(familyNameBytes, Bytes.toBytes("testQualifier1"))), 0);
//second column exists
assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes("testQualifier2")));
//value is correct
assertEquals(("textB-" + index).toString(), Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes("testQualifier2"))));
index++;
count++;
}
// 5 rows should be returned
assertEquals(count, 5);
//Check if hive returns results correctly
driver.run(selectQuery);
ArrayList<String> result = new ArrayList<String>();
driver.getResults(result);
//Query using the hive command line
assertEquals(5, result.size());
Iterator<String> itr = result.iterator();
for (int i = 3; i <= 7; i++) {
String[] tokens = itr.next().split("\\s+");
assertEquals(i, Integer.parseInt(tokens[0]));
assertEquals(i + f, Float.parseFloat(tokens[1]), 0);
assertEquals(("textB-" + i).toString(), tokens[2]);
}
//delete the table from the database
CommandProcessorResponse responseFour = driver.run(deleteQuery);
assertEquals(0, responseFour.getResponseCode());
}
use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project phoenix by apache.
the class PhoenixHBaseLoaderIT method testSchemaForTableWithAlias.
/**
* Validates the schema when it is given as part of LOAD..AS
* @throws Exception
*/
@Test
public void testSchemaForTableWithAlias() throws Exception {
//create the table.
final String TABLE = "S.TABLE4";
String ddl = "CREATE TABLE " + TABLE + " (A_STRING VARCHAR NOT NULL, A_DECIMAL DECIMAL NOT NULL, CF1.A_INTEGER INTEGER, CF2.A_DOUBLE DOUBLE" + " CONSTRAINT pk PRIMARY KEY (A_STRING, A_DECIMAL)) \n";
conn.createStatement().execute(ddl);
//select query given as part of LOAD.
final String sqlQuery = "SELECT A_STRING,A_DECIMAL,CF1.A_INTEGER,CF2.A_DOUBLE FROM " + TABLE;
LOG.info(String.format("Generated SQL Query [%s]", sqlQuery));
pigServer.registerQuery(String.format("raw = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s') AS (a:chararray,b:bigdecimal,c:int,d:double);", sqlQuery, zkQuorum));
//test the schema.
Schema schema = pigServer.dumpSchema("raw");
List<FieldSchema> fields = schema.getFields();
assertEquals(4, fields.size());
assertTrue(fields.get(0).alias.equalsIgnoreCase("a"));
assertTrue(fields.get(0).type == DataType.CHARARRAY);
assertTrue(fields.get(1).alias.equalsIgnoreCase("b"));
assertTrue(fields.get(1).type == DataType.BIGDECIMAL);
assertTrue(fields.get(2).alias.equalsIgnoreCase("c"));
assertTrue(fields.get(2).type == DataType.INTEGER);
assertTrue(fields.get(3).alias.equalsIgnoreCase("d"));
assertTrue(fields.get(3).type == DataType.DOUBLE);
}
use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project phoenix by apache.
the class PhoenixHBaseLoaderIT method testSchemaForTableWithSpecificColumns.
/**
* Validates the schema returned when specific columns of a table are given as part of LOAD .
* @throws Exception
*/
@Test
public void testSchemaForTableWithSpecificColumns() throws Exception {
//create the table
final String TABLE = "TABLE2";
final String ddl = "CREATE TABLE " + TABLE + " (ID INTEGER NOT NULL PRIMARY KEY,NAME VARCHAR, AGE INTEGER) ";
conn.createStatement().execute(ddl);
final String selectColumns = "ID,NAME";
pigServer.registerQuery(String.format("A = load 'hbase://table/%s/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE, selectColumns, zkQuorum));
Schema schema = pigServer.dumpSchema("A");
List<FieldSchema> fields = schema.getFields();
assertEquals(2, fields.size());
assertTrue(fields.get(0).alias.equalsIgnoreCase("ID"));
assertTrue(fields.get(0).type == DataType.INTEGER);
assertTrue(fields.get(1).alias.equalsIgnoreCase("NAME"));
assertTrue(fields.get(1).type == DataType.CHARARRAY);
}
Aggregations