use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.
the class TestHCatLoaderComplexSchema method compareIgnoreFiledNames.
private String compareIgnoreFiledNames(Schema expected, Schema got) throws FrontendException {
if (expected == null || got == null) {
if (expected == got) {
return "";
} else {
return "\nexpected " + expected + " got " + got;
}
}
if (expected.size() != got.size()) {
return "\nsize expected " + expected.size() + " (" + expected + ") got " + got.size() + " (" + got + ")";
}
String message = "";
for (int i = 0; i < expected.size(); i++) {
FieldSchema expectedField = expected.getField(i);
FieldSchema gotField = got.getField(i);
if (expectedField.type != gotField.type) {
message += "\ntype expected " + expectedField.type + " (" + expectedField + ") got " + gotField.type + " (" + gotField + ")";
} else {
message += compareIgnoreFiledNames(expectedField.schema, gotField.schema);
}
}
return message;
}
use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.
the class AbstractHCatLoaderTest method testReadPartitionedBasic.
@Test
public void testReadPartitionedBasic() throws IOException, CommandNeedRetryException {
PigServer server = createPigServer(false);
driver.run("select * from " + PARTITIONED_TABLE);
ArrayList<String> valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
assertEquals(basicInputData.size(), valuesReadFromHiveDriver.size());
server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
Schema dumpedWSchema = server.dumpSchema("W");
List<FieldSchema> Wfields = dumpedWSchema.getFields();
assertEquals(3, Wfields.size());
assertTrue(Wfields.get(0).alias.equalsIgnoreCase("a"));
assertTrue(Wfields.get(0).type == DataType.INTEGER);
assertTrue(Wfields.get(1).alias.equalsIgnoreCase("b"));
assertTrue(Wfields.get(1).type == DataType.CHARARRAY);
assertTrue(Wfields.get(2).alias.equalsIgnoreCase("bkt"));
assertTrue(Wfields.get(2).type == DataType.CHARARRAY);
Iterator<Tuple> WIter = server.openIterator("W");
Collection<Pair<Integer, String>> valuesRead = new ArrayList<Pair<Integer, String>>();
while (WIter.hasNext()) {
Tuple t = WIter.next();
assertTrue(t.size() == 3);
assertNotNull(t.get(0));
assertNotNull(t.get(1));
assertNotNull(t.get(2));
assertTrue(t.get(0).getClass() == Integer.class);
assertTrue(t.get(1).getClass() == String.class);
assertTrue(t.get(2).getClass() == String.class);
valuesRead.add(new Pair<Integer, String>((Integer) t.get(0), (String) t.get(1)));
if ((Integer) t.get(0) < 2) {
assertEquals("0", t.get(2));
} else {
assertEquals("1", t.get(2));
}
}
assertEquals(valuesReadFromHiveDriver.size(), valuesRead.size());
server.registerQuery("P1 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
server.registerQuery("P1filter = filter P1 by bkt == '0';");
Iterator<Tuple> P1Iter = server.openIterator("P1filter");
int count1 = 0;
while (P1Iter.hasNext()) {
Tuple t = P1Iter.next();
assertEquals("0", t.get(2));
assertEquals(1, t.get(0));
count1++;
}
assertEquals(3, count1);
server.registerQuery("P2 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
server.registerQuery("P2filter = filter P2 by bkt == '1';");
Iterator<Tuple> P2Iter = server.openIterator("P2filter");
int count2 = 0;
while (P2Iter.hasNext()) {
Tuple t = P2Iter.next();
assertEquals("1", t.get(2));
assertTrue(((Integer) t.get(0)) > 1);
count2++;
}
assertEquals(6, count2);
}
use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.
the class AbstractHCatLoaderTest method testReadMissingPartitionBasicNeg.
@Test
public void testReadMissingPartitionBasicNeg() throws IOException, CommandNeedRetryException {
PigServer server = createPigServer(false);
File removedPartitionDir = new File(TEST_WAREHOUSE_DIR + "/" + PARTITIONED_TABLE + "/bkt=0");
if (!removeDirectory(removedPartitionDir)) {
System.out.println("Test did not run because its environment could not be set.");
return;
}
driver.run("select * from " + PARTITIONED_TABLE);
ArrayList<String> valuesReadFromHiveDriver = new ArrayList<String>();
driver.getResults(valuesReadFromHiveDriver);
assertTrue(valuesReadFromHiveDriver.size() == 6);
server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
Schema dumpedWSchema = server.dumpSchema("W");
List<FieldSchema> Wfields = dumpedWSchema.getFields();
assertEquals(3, Wfields.size());
assertTrue(Wfields.get(0).alias.equalsIgnoreCase("a"));
assertTrue(Wfields.get(0).type == DataType.INTEGER);
assertTrue(Wfields.get(1).alias.equalsIgnoreCase("b"));
assertTrue(Wfields.get(1).type == DataType.CHARARRAY);
assertTrue(Wfields.get(2).alias.equalsIgnoreCase("bkt"));
assertTrue(Wfields.get(2).type == DataType.CHARARRAY);
try {
Iterator<Tuple> WIter = server.openIterator("W");
fail("Should failed in retriving an invalid partition");
} catch (IOException ioe) {
// expected
}
}
use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.
the class AbstractHCatLoaderTest method testProjectionsBasic.
@Test
public void testProjectionsBasic() throws IOException {
PigServer server = createPigServer(false);
// projections are handled by using generate, not "as" on the Load
server.registerQuery("Y1 = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
server.registerQuery("Y2 = foreach Y1 generate a;");
server.registerQuery("Y3 = foreach Y1 generate b,a;");
Schema dumpedY2Schema = server.dumpSchema("Y2");
Schema dumpedY3Schema = server.dumpSchema("Y3");
List<FieldSchema> Y2fields = dumpedY2Schema.getFields();
List<FieldSchema> Y3fields = dumpedY3Schema.getFields();
assertEquals(1, Y2fields.size());
assertEquals("a", Y2fields.get(0).alias.toLowerCase());
assertEquals(DataType.INTEGER, Y2fields.get(0).type);
assertEquals(2, Y3fields.size());
assertEquals("b", Y3fields.get(0).alias.toLowerCase());
assertEquals(DataType.CHARARRAY, Y3fields.get(0).type);
assertEquals("a", Y3fields.get(1).alias.toLowerCase());
assertEquals(DataType.INTEGER, Y3fields.get(1).type);
int numTuplesRead = 0;
Iterator<Tuple> Y2Iter = server.openIterator("Y2");
while (Y2Iter.hasNext()) {
Tuple t = Y2Iter.next();
assertEquals(t.size(), 1);
assertNotNull(t.get(0));
assertTrue(t.get(0).getClass() == Integer.class);
assertEquals(t.get(0), basicInputData.get(numTuplesRead).first);
numTuplesRead++;
}
numTuplesRead = 0;
Iterator<Tuple> Y3Iter = server.openIterator("Y3");
while (Y3Iter.hasNext()) {
Tuple t = Y3Iter.next();
assertEquals(t.size(), 2);
assertNotNull(t.get(0));
assertTrue(t.get(0).getClass() == String.class);
assertEquals(t.get(0), basicInputData.get(numTuplesRead).second);
assertNotNull(t.get(1));
assertTrue(t.get(1).getClass() == Integer.class);
assertEquals(t.get(1), basicInputData.get(numTuplesRead).first);
numTuplesRead++;
}
assertEquals(basicInputData.size(), numTuplesRead);
}
use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.
the class HCatBaseStorer method getHCatFSFromPigFS.
/**
* Here we are processing HCat table schema as derived from metastore,
* thus it should have information about all fields/sub-fields, but not for partition columns
*/
private HCatFieldSchema getHCatFSFromPigFS(FieldSchema fSchema, HCatFieldSchema hcatFieldSchema, Schema pigSchema, HCatSchema tableSchema) throws FrontendException, HCatException {
if (hcatFieldSchema == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("hcatFieldSchema is null for fSchema '" + fSchema.alias + "'");
//throw new IllegalArgumentException("hcatFiledSchema is null; fSchema=" + fSchema + " " +
// "(pigSchema, tableSchema)=(" + pigSchema + "," + tableSchema + ")");
}
}
byte type = fSchema.type;
switch(type) {
case DataType.CHARARRAY:
case DataType.BIGCHARARRAY:
if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
}
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, null);
case DataType.INTEGER:
if (hcatFieldSchema != null) {
if (!SUPPORTED_INTEGER_CONVERSIONS.contains(hcatFieldSchema.getType())) {
throw new FrontendException("Unsupported type: " + type + " in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
}
return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
}
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.intTypeInfo, null);
case DataType.LONG:
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.longTypeInfo, null);
case DataType.FLOAT:
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.floatTypeInfo, null);
case DataType.DOUBLE:
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.doubleTypeInfo, null);
case DataType.BYTEARRAY:
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.binaryTypeInfo, null);
case DataType.BOOLEAN:
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.booleanTypeInfo, null);
case DataType.DATETIME:
//is controlled by Hive target table information
if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
}
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.timestampTypeInfo, null);
case DataType.BIGDECIMAL:
if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
}
return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.decimalTypeInfo, null);
case DataType.BAG:
Schema bagSchema = fSchema.schema;
List<HCatFieldSchema> arrFields = new ArrayList<HCatFieldSchema>(1);
FieldSchema field;
// Find out if we need to throw away the tuple or not.
if (removeTupleFromBag(hcatFieldSchema, fSchema)) {
field = bagSchema.getField(0).schema.getField(0);
} else {
field = bagSchema.getField(0);
}
arrFields.add(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0), pigSchema, tableSchema));
return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), "");
case DataType.TUPLE:
List<HCatFieldSchema> hcatFSs = new ArrayList<HCatFieldSchema>();
HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema();
List<FieldSchema> fields = fSchema.schema.getFields();
for (int i = 0; i < fields.size(); i++) {
FieldSchema fieldSchema = fields.get(i);
hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i), pigSchema, tableSchema));
}
return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), "");
case DataType.MAP:
{
// Pig's schema contain no type information about map's keys and
// values. So, if its a new column assume <string,string> if its existing
// return whatever is contained in the existing column.
HCatFieldSchema valFS;
List<HCatFieldSchema> valFSList = new ArrayList<HCatFieldSchema>(1);
if (hcatFieldSchema != null) {
return HCatFieldSchema.createMapTypeFieldSchema(fSchema.alias, hcatFieldSchema.getMapKeyTypeInfo(), hcatFieldSchema.getMapValueSchema(), "");
}
// Column not found in target table. Its a new column. Its schema is map<string,string>
valFS = new HCatFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, "");
valFSList.add(valFS);
return HCatFieldSchema.createMapTypeFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, new HCatSchema(valFSList), "");
}
case DataType.BIGINTEGER:
//fall through; doesn't map to Hive/Hcat type; here for completeness
default:
throw new FrontendException("Unsupported type: " + type + " in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
}
}
Aggregations