Search in sources :

Example 16 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.

the class HCatBaseStorer method validateSchema.

/**
 * This method encodes which Pig type can map (be stored in) to which HCat type.
 * @throws HCatException
 * @throws FrontendException
 */
private void validateSchema(FieldSchema pigField, HCatFieldSchema hcatField, Schema topLevelPigSchema, HCatSchema topLevelHCatSchema, int columnPos) throws HCatException, FrontendException {
    validateAlias(pigField.alias);
    byte type = pigField.type;
    if (DataType.isComplex(type)) {
        switch(type) {
            case DataType.MAP:
                if (hcatField != null) {
                    if (hcatField.getMapKeyType() != Type.STRING) {
                        throw new FrontendException("Key Type of map must be String " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE);
                    }
                // Map values can be primitive or complex
                }
                break;
            case DataType.BAG:
                HCatSchema arrayElementSchema = hcatField == null ? null : hcatField.getArrayElementSchema();
                for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) {
                    validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema), topLevelPigSchema, topLevelHCatSchema, columnPos);
                }
                break;
            case DataType.TUPLE:
                HCatSchema structSubSchema = hcatField == null ? null : hcatField.getStructSubSchema();
                for (FieldSchema innerField : pigField.schema.getFields()) {
                    validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema), topLevelPigSchema, topLevelHCatSchema, columnPos);
                }
                break;
            default:
                throw new FrontendException("Internal Error.", PigHCatUtil.PIG_EXCEPTION_CODE);
        }
    } else if (hcatField != null) {
        // there is no point trying to validate further if we have no type info about target field
        switch(type) {
            case DataType.BIGDECIMAL:
                throwTypeMismatchException(type, Lists.newArrayList(Type.DECIMAL), hcatField, columnPos);
                break;
            case DataType.DATETIME:
                throwTypeMismatchException(type, Lists.newArrayList(Type.TIMESTAMP, Type.DATE), hcatField, columnPos);
                break;
            case DataType.BYTEARRAY:
                throwTypeMismatchException(type, Lists.newArrayList(Type.BINARY), hcatField, columnPos);
                break;
            case DataType.BIGINTEGER:
                throwTypeMismatchException(type, Collections.<Type>emptyList(), hcatField, columnPos);
                break;
            case DataType.BOOLEAN:
                throwTypeMismatchException(type, Lists.newArrayList(Type.BOOLEAN), hcatField, columnPos);
                break;
            case DataType.CHARARRAY:
                throwTypeMismatchException(type, Lists.newArrayList(Type.STRING, Type.CHAR, Type.VARCHAR), hcatField, columnPos);
                break;
            case DataType.DOUBLE:
                throwTypeMismatchException(type, Lists.newArrayList(Type.DOUBLE), hcatField, columnPos);
                break;
            case DataType.FLOAT:
                throwTypeMismatchException(type, Lists.newArrayList(Type.FLOAT), hcatField, columnPos);
                break;
            case DataType.INTEGER:
                throwTypeMismatchException(type, Lists.newArrayList(Type.INT, Type.BIGINT, Type.TINYINT, Type.SMALLINT), hcatField, columnPos);
                break;
            case DataType.LONG:
                throwTypeMismatchException(type, Lists.newArrayList(Type.BIGINT), hcatField, columnPos);
                break;
            default:
                throw new FrontendException("'" + type + "' Pig datatype in column " + columnPos + "(0-based) is not supported by HCat", PigHCatUtil.PIG_EXCEPTION_CODE);
        }
    } else {
        if (false) {
            // see HIVE-6194
            throw new FrontendException("(pigSch,hcatSchema)=(" + pigField + "," + "" + hcatField + ") (topPig, topHcat)=(" + topLevelPigSchema + "," + "" + topLevelHCatSchema + ")");
        }
    }
}
Also used : DataType(org.apache.pig.data.DataType) Type(org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException)

Example 17 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.

the class AbstractHCatLoaderTest method testSchemaLoadComplex.

@Test
public void testSchemaLoadComplex() throws IOException {
    PigServer server = createPigServer(false);
    // test that schema was loaded correctly
    server.registerQuery("K = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Schema dumpedKSchema = server.dumpSchema("K");
    List<FieldSchema> Kfields = dumpedKSchema.getFields();
    assertEquals(6, Kfields.size());
    assertEquals(DataType.CHARARRAY, Kfields.get(0).type);
    assertEquals("name", Kfields.get(0).alias.toLowerCase());
    assertEquals(DataType.INTEGER, Kfields.get(1).type);
    assertEquals("studentid", Kfields.get(1).alias.toLowerCase());
    assertEquals(DataType.TUPLE, Kfields.get(2).type);
    assertEquals("contact", Kfields.get(2).alias.toLowerCase());
    {
        assertNotNull(Kfields.get(2).schema);
        assertTrue(Kfields.get(2).schema.getFields().size() == 2);
        assertTrue(Kfields.get(2).schema.getFields().get(0).type == DataType.CHARARRAY);
        assertTrue(Kfields.get(2).schema.getFields().get(0).alias.equalsIgnoreCase("phno"));
        assertTrue(Kfields.get(2).schema.getFields().get(1).type == DataType.CHARARRAY);
        assertTrue(Kfields.get(2).schema.getFields().get(1).alias.equalsIgnoreCase("email"));
    }
    assertEquals(DataType.BAG, Kfields.get(3).type);
    assertEquals("currently_registered_courses", Kfields.get(3).alias.toLowerCase());
    {
        assertNotNull(Kfields.get(3).schema);
        assertEquals(1, Kfields.get(3).schema.getFields().size());
        assertEquals(DataType.TUPLE, Kfields.get(3).schema.getFields().get(0).type);
        assertNotNull(Kfields.get(3).schema.getFields().get(0).schema);
        assertEquals(1, Kfields.get(3).schema.getFields().get(0).schema.getFields().size());
        assertEquals(DataType.CHARARRAY, Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).type);
    // assertEquals("course",Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase());
    // commented out, because the name becomes "innerfield" by default - we call it "course" in pig,
    // but in the metadata, it'd be anonymous, so this would be autogenerated, which is fine
    }
    assertEquals(DataType.MAP, Kfields.get(4).type);
    assertEquals("current_grades", Kfields.get(4).alias.toLowerCase());
    assertEquals(DataType.BAG, Kfields.get(5).type);
    assertEquals("phnos", Kfields.get(5).alias.toLowerCase());
    {
        assertNotNull(Kfields.get(5).schema);
        assertEquals(1, Kfields.get(5).schema.getFields().size());
        assertEquals(DataType.TUPLE, Kfields.get(5).schema.getFields().get(0).type);
        assertNotNull(Kfields.get(5).schema.getFields().get(0).schema);
        assertTrue(Kfields.get(5).schema.getFields().get(0).schema.getFields().size() == 2);
        assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).type);
        assertEquals("phno", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase());
        assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).type);
        assertEquals("type", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).alias.toLowerCase());
    }
}
Also used : PigServer(org.apache.pig.PigServer) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 18 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.

the class AbstractHCatLoaderTest method testSchemaLoadBasic.

@Test
public void testSchemaLoadBasic() throws IOException {
    PigServer server = createPigServer(false);
    // test that schema was loaded correctly
    server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Schema dumpedXSchema = server.dumpSchema("X");
    List<FieldSchema> Xfields = dumpedXSchema.getFields();
    assertEquals(2, Xfields.size());
    assertTrue(Xfields.get(0).alias.equalsIgnoreCase("a"));
    assertTrue(Xfields.get(0).type == DataType.INTEGER);
    assertTrue(Xfields.get(1).alias.equalsIgnoreCase("b"));
    assertTrue(Xfields.get(1).type == DataType.CHARARRAY);
}
Also used : PigServer(org.apache.pig.PigServer) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Aggregations

FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)18 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 Test (org.junit.Test)13 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)9 PigServer (org.apache.pig.PigServer)7 ArrayList (java.util.ArrayList)6 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)5 Tuple (org.apache.pig.data.Tuple)5 FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)4 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)3 IOException (java.io.IOException)2 PreparedStatement (java.sql.PreparedStatement)2 Admin (org.apache.hadoop.hbase.client.Admin)2 Connection (org.apache.hadoop.hbase.client.Connection)2 HBaseAdmin (org.apache.hadoop.hbase.client.HBaseAdmin)2 CommandProcessorResponse (org.apache.hadoop.hive.ql.processors.CommandProcessorResponse)2 File (java.io.File)1 RandomAccessFile (java.io.RandomAccessFile)1 Array (java.sql.Array)1 List (java.util.List)1