Search in sources :

Example 1 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.

the class TestHCatLoaderComplexSchema method compareIgnoreFiledNames.

private String compareIgnoreFiledNames(Schema expected, Schema got) throws FrontendException {
    if (expected == null || got == null) {
        if (expected == got) {
            return "";
        } else {
            return "\nexpected " + expected + " got " + got;
        }
    }
    if (expected.size() != got.size()) {
        return "\nsize expected " + expected.size() + " (" + expected + ") got " + got.size() + " (" + got + ")";
    }
    String message = "";
    for (int i = 0; i < expected.size(); i++) {
        FieldSchema expectedField = expected.getField(i);
        FieldSchema gotField = got.getField(i);
        if (expectedField.type != gotField.type) {
            message += "\ntype expected " + expectedField.type + " (" + expectedField + ") got " + gotField.type + " (" + gotField + ")";
        } else {
            message += compareIgnoreFiledNames(expectedField.schema, gotField.schema);
        }
    }
    return message;
}
Also used : FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)

Example 2 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.

the class AbstractHCatLoaderTest method testReadPartitionedBasic.

@Test
public void testReadPartitionedBasic() throws IOException, CommandNeedRetryException {
    PigServer server = createPigServer(false);
    driver.run("select * from " + PARTITIONED_TABLE);
    ArrayList<String> valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    assertEquals(basicInputData.size(), valuesReadFromHiveDriver.size());
    server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Schema dumpedWSchema = server.dumpSchema("W");
    List<FieldSchema> Wfields = dumpedWSchema.getFields();
    assertEquals(3, Wfields.size());
    assertTrue(Wfields.get(0).alias.equalsIgnoreCase("a"));
    assertTrue(Wfields.get(0).type == DataType.INTEGER);
    assertTrue(Wfields.get(1).alias.equalsIgnoreCase("b"));
    assertTrue(Wfields.get(1).type == DataType.CHARARRAY);
    assertTrue(Wfields.get(2).alias.equalsIgnoreCase("bkt"));
    assertTrue(Wfields.get(2).type == DataType.CHARARRAY);
    Iterator<Tuple> WIter = server.openIterator("W");
    Collection<Pair<Integer, String>> valuesRead = new ArrayList<Pair<Integer, String>>();
    while (WIter.hasNext()) {
        Tuple t = WIter.next();
        assertTrue(t.size() == 3);
        assertNotNull(t.get(0));
        assertNotNull(t.get(1));
        assertNotNull(t.get(2));
        assertTrue(t.get(0).getClass() == Integer.class);
        assertTrue(t.get(1).getClass() == String.class);
        assertTrue(t.get(2).getClass() == String.class);
        valuesRead.add(new Pair<Integer, String>((Integer) t.get(0), (String) t.get(1)));
        if ((Integer) t.get(0) < 2) {
            assertEquals("0", t.get(2));
        } else {
            assertEquals("1", t.get(2));
        }
    }
    assertEquals(valuesReadFromHiveDriver.size(), valuesRead.size());
    server.registerQuery("P1 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    server.registerQuery("P1filter = filter P1 by bkt == '0';");
    Iterator<Tuple> P1Iter = server.openIterator("P1filter");
    int count1 = 0;
    while (P1Iter.hasNext()) {
        Tuple t = P1Iter.next();
        assertEquals("0", t.get(2));
        assertEquals(1, t.get(0));
        count1++;
    }
    assertEquals(3, count1);
    server.registerQuery("P2 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    server.registerQuery("P2filter = filter P2 by bkt == '1';");
    Iterator<Tuple> P2Iter = server.openIterator("P2filter");
    int count2 = 0;
    while (P2Iter.hasNext()) {
        Tuple t = P2Iter.next();
        assertEquals("1", t.get(2));
        assertTrue(((Integer) t.get(0)) > 1);
        count2++;
    }
    assertEquals(6, count2);
}
Also used : HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ArrayList(java.util.ArrayList) PigServer(org.apache.pig.PigServer) Tuple(org.apache.pig.data.Tuple) Pair(org.apache.hive.hcatalog.data.Pair) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest) Test(org.junit.Test)

Example 3 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.

the class AbstractHCatLoaderTest method testReadMissingPartitionBasicNeg.

@Test
public void testReadMissingPartitionBasicNeg() throws IOException, CommandNeedRetryException {
    PigServer server = createPigServer(false);
    File removedPartitionDir = new File(TEST_WAREHOUSE_DIR + "/" + PARTITIONED_TABLE + "/bkt=0");
    if (!removeDirectory(removedPartitionDir)) {
        System.out.println("Test did not run because its environment could not be set.");
        return;
    }
    driver.run("select * from " + PARTITIONED_TABLE);
    ArrayList<String> valuesReadFromHiveDriver = new ArrayList<String>();
    driver.getResults(valuesReadFromHiveDriver);
    assertTrue(valuesReadFromHiveDriver.size() == 6);
    server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Schema dumpedWSchema = server.dumpSchema("W");
    List<FieldSchema> Wfields = dumpedWSchema.getFields();
    assertEquals(3, Wfields.size());
    assertTrue(Wfields.get(0).alias.equalsIgnoreCase("a"));
    assertTrue(Wfields.get(0).type == DataType.INTEGER);
    assertTrue(Wfields.get(1).alias.equalsIgnoreCase("b"));
    assertTrue(Wfields.get(1).type == DataType.CHARARRAY);
    assertTrue(Wfields.get(2).alias.equalsIgnoreCase("bkt"));
    assertTrue(Wfields.get(2).type == DataType.CHARARRAY);
    try {
        Iterator<Tuple> WIter = server.openIterator("W");
        fail("Should failed in retriving an invalid partition");
    } catch (IOException ioe) {
    // expected
    }
}
Also used : PigServer(org.apache.pig.PigServer) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ArrayList(java.util.ArrayList) IOException(java.io.IOException) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Tuple(org.apache.pig.data.Tuple) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest) Test(org.junit.Test)

Example 4 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.

the class AbstractHCatLoaderTest method testProjectionsBasic.

@Test
public void testProjectionsBasic() throws IOException {
    PigServer server = createPigServer(false);
    // projections are handled by using generate, not "as" on the Load
    server.registerQuery("Y1 = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    server.registerQuery("Y2 = foreach Y1 generate a;");
    server.registerQuery("Y3 = foreach Y1 generate b,a;");
    Schema dumpedY2Schema = server.dumpSchema("Y2");
    Schema dumpedY3Schema = server.dumpSchema("Y3");
    List<FieldSchema> Y2fields = dumpedY2Schema.getFields();
    List<FieldSchema> Y3fields = dumpedY3Schema.getFields();
    assertEquals(1, Y2fields.size());
    assertEquals("a", Y2fields.get(0).alias.toLowerCase());
    assertEquals(DataType.INTEGER, Y2fields.get(0).type);
    assertEquals(2, Y3fields.size());
    assertEquals("b", Y3fields.get(0).alias.toLowerCase());
    assertEquals(DataType.CHARARRAY, Y3fields.get(0).type);
    assertEquals("a", Y3fields.get(1).alias.toLowerCase());
    assertEquals(DataType.INTEGER, Y3fields.get(1).type);
    int numTuplesRead = 0;
    Iterator<Tuple> Y2Iter = server.openIterator("Y2");
    while (Y2Iter.hasNext()) {
        Tuple t = Y2Iter.next();
        assertEquals(t.size(), 1);
        assertNotNull(t.get(0));
        assertTrue(t.get(0).getClass() == Integer.class);
        assertEquals(t.get(0), basicInputData.get(numTuplesRead).first);
        numTuplesRead++;
    }
    numTuplesRead = 0;
    Iterator<Tuple> Y3Iter = server.openIterator("Y3");
    while (Y3Iter.hasNext()) {
        Tuple t = Y3Iter.next();
        assertEquals(t.size(), 2);
        assertNotNull(t.get(0));
        assertTrue(t.get(0).getClass() == String.class);
        assertEquals(t.get(0), basicInputData.get(numTuplesRead).second);
        assertNotNull(t.get(1));
        assertTrue(t.get(1).getClass() == Integer.class);
        assertEquals(t.get(1), basicInputData.get(numTuplesRead).first);
        numTuplesRead++;
    }
    assertEquals(basicInputData.size(), numTuplesRead);
}
Also used : PigServer(org.apache.pig.PigServer) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) Tuple(org.apache.pig.data.Tuple) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest) Test(org.junit.Test)

Example 5 with FieldSchema

use of org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema in project hive by apache.

the class HCatBaseStorer method getHCatFSFromPigFS.

/**
   * Here we are processing HCat table schema as derived from metastore, 
   * thus it should have information about all fields/sub-fields, but not for partition columns
   */
private HCatFieldSchema getHCatFSFromPigFS(FieldSchema fSchema, HCatFieldSchema hcatFieldSchema, Schema pigSchema, HCatSchema tableSchema) throws FrontendException, HCatException {
    if (hcatFieldSchema == null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("hcatFieldSchema is null for fSchema '" + fSchema.alias + "'");
        //throw new IllegalArgumentException("hcatFiledSchema is null; fSchema=" + fSchema + " " +
        //      "(pigSchema, tableSchema)=(" + pigSchema + "," + tableSchema + ")");
        }
    }
    byte type = fSchema.type;
    switch(type) {
        case DataType.CHARARRAY:
        case DataType.BIGCHARARRAY:
            if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
                return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
            }
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, null);
        case DataType.INTEGER:
            if (hcatFieldSchema != null) {
                if (!SUPPORTED_INTEGER_CONVERSIONS.contains(hcatFieldSchema.getType())) {
                    throw new FrontendException("Unsupported type: " + type + "  in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
                }
                return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
            }
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.intTypeInfo, null);
        case DataType.LONG:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.longTypeInfo, null);
        case DataType.FLOAT:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.floatTypeInfo, null);
        case DataType.DOUBLE:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.doubleTypeInfo, null);
        case DataType.BYTEARRAY:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.binaryTypeInfo, null);
        case DataType.BOOLEAN:
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.booleanTypeInfo, null);
        case DataType.DATETIME:
            //is controlled by Hive target table information
            if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
                return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
            }
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.timestampTypeInfo, null);
        case DataType.BIGDECIMAL:
            if (hcatFieldSchema != null && hcatFieldSchema.getTypeInfo() != null) {
                return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getTypeInfo(), null);
            }
            return new HCatFieldSchema(fSchema.alias, TypeInfoFactory.decimalTypeInfo, null);
        case DataType.BAG:
            Schema bagSchema = fSchema.schema;
            List<HCatFieldSchema> arrFields = new ArrayList<HCatFieldSchema>(1);
            FieldSchema field;
            // Find out if we need to throw away the tuple or not.
            if (removeTupleFromBag(hcatFieldSchema, fSchema)) {
                field = bagSchema.getField(0).schema.getField(0);
            } else {
                field = bagSchema.getField(0);
            }
            arrFields.add(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0), pigSchema, tableSchema));
            return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), "");
        case DataType.TUPLE:
            List<HCatFieldSchema> hcatFSs = new ArrayList<HCatFieldSchema>();
            HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema();
            List<FieldSchema> fields = fSchema.schema.getFields();
            for (int i = 0; i < fields.size(); i++) {
                FieldSchema fieldSchema = fields.get(i);
                hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i), pigSchema, tableSchema));
            }
            return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), "");
        case DataType.MAP:
            {
                // Pig's schema contain no type information about map's keys and
                // values. So, if its a new column assume <string,string> if its existing
                // return whatever is contained in the existing column.
                HCatFieldSchema valFS;
                List<HCatFieldSchema> valFSList = new ArrayList<HCatFieldSchema>(1);
                if (hcatFieldSchema != null) {
                    return HCatFieldSchema.createMapTypeFieldSchema(fSchema.alias, hcatFieldSchema.getMapKeyTypeInfo(), hcatFieldSchema.getMapValueSchema(), "");
                }
                // Column not found in target table. Its a new column. Its schema is map<string,string>
                valFS = new HCatFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, "");
                valFSList.add(valFS);
                return HCatFieldSchema.createMapTypeFieldSchema(fSchema.alias, TypeInfoFactory.stringTypeInfo, new HCatSchema(valFSList), "");
            }
        case DataType.BIGINTEGER:
        //fall through; doesn't map to Hive/Hcat type; here for completeness
        default:
            throw new FrontendException("Unsupported type: " + type + "  in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE);
    }
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) ResourceSchema(org.apache.pig.ResourceSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) FrontendException(org.apache.pig.impl.logicalLayer.FrontendException) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Aggregations

FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)18 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 Test (org.junit.Test)13 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)9 PigServer (org.apache.pig.PigServer)7 ArrayList (java.util.ArrayList)6 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)5 Tuple (org.apache.pig.data.Tuple)5 FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)4 HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)3 IOException (java.io.IOException)2 PreparedStatement (java.sql.PreparedStatement)2 HBaseAdmin (org.apache.hadoop.hbase.client.HBaseAdmin)2 CommandProcessorResponse (org.apache.hadoop.hive.ql.processors.CommandProcessorResponse)2 File (java.io.File)1 RandomAccessFile (java.io.RandomAccessFile)1 Array (java.sql.Array)1 List (java.util.List)1 Configuration (org.apache.hadoop.conf.Configuration)1 HTable (org.apache.hadoop.hbase.client.HTable)1