use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.
the class HCatBaseStorer method validateSchema.
/**
* This method encodes which Pig type can map (be stored in) to which HCat type.
* @throws HCatException
* @throws FrontendException
*/
private void validateSchema(FieldSchema pigField, HCatFieldSchema hcatField, Schema topLevelPigSchema, HCatSchema topLevelHCatSchema, int columnPos) throws HCatException, FrontendException {
validateAlias(pigField.alias);
byte type = pigField.type;
if (DataType.isComplex(type)) {
switch(type) {
case DataType.MAP:
if (hcatField != null) {
if (hcatField.getMapKeyType() != Type.STRING) {
throw new FrontendException("Key Type of map must be String " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE);
}
// Map values can be primitive or complex
}
break;
case DataType.BAG:
HCatSchema arrayElementSchema = hcatField == null ? null : hcatField.getArrayElementSchema();
for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) {
validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema), topLevelPigSchema, topLevelHCatSchema, columnPos);
}
break;
case DataType.TUPLE:
HCatSchema structSubSchema = hcatField == null ? null : hcatField.getStructSubSchema();
for (FieldSchema innerField : pigField.schema.getFields()) {
validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema), topLevelPigSchema, topLevelHCatSchema, columnPos);
}
break;
default:
throw new FrontendException("Internal Error.", PigHCatUtil.PIG_EXCEPTION_CODE);
}
} else if (hcatField != null) {
// there is no point trying to validate further if we have no type info about target field
switch(type) {
case DataType.BIGDECIMAL:
throwTypeMismatchException(type, Lists.newArrayList(Type.DECIMAL), hcatField, columnPos);
break;
case DataType.DATETIME:
throwTypeMismatchException(type, Lists.newArrayList(Type.TIMESTAMP, Type.DATE), hcatField, columnPos);
break;
case DataType.BYTEARRAY:
throwTypeMismatchException(type, Lists.newArrayList(Type.BINARY), hcatField, columnPos);
break;
case DataType.BIGINTEGER:
throwTypeMismatchException(type, Collections.<Type>emptyList(), hcatField, columnPos);
break;
case DataType.BOOLEAN:
throwTypeMismatchException(type, Lists.newArrayList(Type.BOOLEAN), hcatField, columnPos);
break;
case DataType.CHARARRAY:
throwTypeMismatchException(type, Lists.newArrayList(Type.STRING, Type.CHAR, Type.VARCHAR), hcatField, columnPos);
break;
case DataType.DOUBLE:
throwTypeMismatchException(type, Lists.newArrayList(Type.DOUBLE), hcatField, columnPos);
break;
case DataType.FLOAT:
throwTypeMismatchException(type, Lists.newArrayList(Type.FLOAT), hcatField, columnPos);
break;
case DataType.INTEGER:
throwTypeMismatchException(type, Lists.newArrayList(Type.INT, Type.BIGINT, Type.TINYINT, Type.SMALLINT), hcatField, columnPos);
break;
case DataType.LONG:
throwTypeMismatchException(type, Lists.newArrayList(Type.BIGINT), hcatField, columnPos);
break;
default:
throw new FrontendException("'" + type + "' Pig datatype in column " + columnPos + "(0-based) is not supported by HCat", PigHCatUtil.PIG_EXCEPTION_CODE);
}
} else {
if (false) {
// see HIVE-6194
throw new FrontendException("(pigSch,hcatSchema)=(" + pigField + "," + "" + hcatField + ") (topPig, topHcat)=(" + topLevelPigSchema + "," + "" + topLevelHCatSchema + ")");
}
}
}
use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.
the class AbstractHCatLoaderTest method testSchemaLoadComplex.
@Test
public void testSchemaLoadComplex() throws IOException {
PigServer server = createPigServer(false);
// test that schema was loaded correctly
server.registerQuery("K = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
Schema dumpedKSchema = server.dumpSchema("K");
List<FieldSchema> Kfields = dumpedKSchema.getFields();
assertEquals(6, Kfields.size());
assertEquals(DataType.CHARARRAY, Kfields.get(0).type);
assertEquals("name", Kfields.get(0).alias.toLowerCase());
assertEquals(DataType.INTEGER, Kfields.get(1).type);
assertEquals("studentid", Kfields.get(1).alias.toLowerCase());
assertEquals(DataType.TUPLE, Kfields.get(2).type);
assertEquals("contact", Kfields.get(2).alias.toLowerCase());
{
assertNotNull(Kfields.get(2).schema);
assertTrue(Kfields.get(2).schema.getFields().size() == 2);
assertTrue(Kfields.get(2).schema.getFields().get(0).type == DataType.CHARARRAY);
assertTrue(Kfields.get(2).schema.getFields().get(0).alias.equalsIgnoreCase("phno"));
assertTrue(Kfields.get(2).schema.getFields().get(1).type == DataType.CHARARRAY);
assertTrue(Kfields.get(2).schema.getFields().get(1).alias.equalsIgnoreCase("email"));
}
assertEquals(DataType.BAG, Kfields.get(3).type);
assertEquals("currently_registered_courses", Kfields.get(3).alias.toLowerCase());
{
assertNotNull(Kfields.get(3).schema);
assertEquals(1, Kfields.get(3).schema.getFields().size());
assertEquals(DataType.TUPLE, Kfields.get(3).schema.getFields().get(0).type);
assertNotNull(Kfields.get(3).schema.getFields().get(0).schema);
assertEquals(1, Kfields.get(3).schema.getFields().get(0).schema.getFields().size());
assertEquals(DataType.CHARARRAY, Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).type);
// assertEquals("course",Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase());
// commented out, because the name becomes "innerfield" by default - we call it "course" in pig,
// but in the metadata, it'd be anonymous, so this would be autogenerated, which is fine
}
assertEquals(DataType.MAP, Kfields.get(4).type);
assertEquals("current_grades", Kfields.get(4).alias.toLowerCase());
assertEquals(DataType.BAG, Kfields.get(5).type);
assertEquals("phnos", Kfields.get(5).alias.toLowerCase());
{
assertNotNull(Kfields.get(5).schema);
assertEquals(1, Kfields.get(5).schema.getFields().size());
assertEquals(DataType.TUPLE, Kfields.get(5).schema.getFields().get(0).type);
assertNotNull(Kfields.get(5).schema.getFields().get(0).schema);
assertTrue(Kfields.get(5).schema.getFields().get(0).schema.getFields().size() == 2);
assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).type);
assertEquals("phno", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase());
assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).type);
assertEquals("type", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).alias.toLowerCase());
}
}
use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.
the class AbstractHCatLoaderTest method testSchemaLoadBasic.
@Test
public void testSchemaLoadBasic() throws IOException {
PigServer server = createPigServer(false);
// test that schema was loaded correctly
server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
Schema dumpedXSchema = server.dumpSchema("X");
List<FieldSchema> Xfields = dumpedXSchema.getFields();
assertEquals(2, Xfields.size());
assertTrue(Xfields.get(0).alias.equalsIgnoreCase("a"));
assertTrue(Xfields.get(0).type == DataType.INTEGER);
assertTrue(Xfields.get(1).alias.equalsIgnoreCase("b"));
assertTrue(Xfields.get(1).type == DataType.CHARARRAY);
}
Aggregations