Search in sources :

Example 66 with PigServer

use of org.apache.pig.PigServer in project vespa by vespa-engine.

the class VespaQueryTest method setup.

private PigServer setup(String script, String endpoint) throws Exception {
    Configuration conf = new HdfsConfiguration();
    Map<String, String> parameters = new HashMap<>();
    parameters.put("ENDPOINT", endpoint);
    PigServer ps = new PigServer(ExecType.LOCAL, conf);
    ps.setBatchOn();
    ps.registerScript(script, parameters);
    return ps;
}
Also used : HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Configuration(org.apache.hadoop.conf.Configuration) PigServer(org.apache.pig.PigServer) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration)

Example 67 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatLoaderTest method testReadDataBasic.

@Test
public void testReadDataBasic() throws IOException {
    PigServer server = createPigServer(false);
    server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Iterator<Tuple> XIter = server.openIterator("X");
    int numTuplesRead = 0;
    while (XIter.hasNext()) {
        Tuple t = XIter.next();
        assertEquals(2, t.size());
        assertNotNull(t.get(0));
        assertNotNull(t.get(1));
        assertTrue(t.get(0).getClass() == Integer.class);
        assertTrue(t.get(1).getClass() == String.class);
        assertEquals(t.get(0), basicInputData.get(numTuplesRead).first);
        assertEquals(t.get(1), basicInputData.get(numTuplesRead).second);
        numTuplesRead++;
    }
    assertEquals(basicInputData.size(), numTuplesRead);
}
Also used : PigServer(org.apache.pig.PigServer) Tuple(org.apache.pig.data.Tuple) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 68 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatLoaderTest method testSchemaLoadBasic.

@Test
public void testSchemaLoadBasic() throws IOException {
    PigServer server = createPigServer(false);
    // test that schema was loaded correctly
    server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Schema dumpedXSchema = server.dumpSchema("X");
    List<FieldSchema> Xfields = dumpedXSchema.getFields();
    assertEquals(2, Xfields.size());
    assertTrue(Xfields.get(0).alias.equalsIgnoreCase("a"));
    assertTrue(Xfields.get(0).type == DataType.INTEGER);
    assertTrue(Xfields.get(1).alias.equalsIgnoreCase("b"));
    assertTrue(Xfields.get(1).type == DataType.CHARARRAY);
}
Also used : PigServer(org.apache.pig.PigServer) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 69 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatLoaderTest method testSchemaLoadComplex.

@Test
public void testSchemaLoadComplex() throws IOException {
    PigServer server = createPigServer(false);
    // test that schema was loaded correctly
    server.registerQuery("K = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
    Schema dumpedKSchema = server.dumpSchema("K");
    List<FieldSchema> Kfields = dumpedKSchema.getFields();
    assertEquals(6, Kfields.size());
    assertEquals(DataType.CHARARRAY, Kfields.get(0).type);
    assertEquals("name", Kfields.get(0).alias.toLowerCase());
    assertEquals(DataType.INTEGER, Kfields.get(1).type);
    assertEquals("studentid", Kfields.get(1).alias.toLowerCase());
    assertEquals(DataType.TUPLE, Kfields.get(2).type);
    assertEquals("contact", Kfields.get(2).alias.toLowerCase());
    {
        assertNotNull(Kfields.get(2).schema);
        assertTrue(Kfields.get(2).schema.getFields().size() == 2);
        assertTrue(Kfields.get(2).schema.getFields().get(0).type == DataType.CHARARRAY);
        assertTrue(Kfields.get(2).schema.getFields().get(0).alias.equalsIgnoreCase("phno"));
        assertTrue(Kfields.get(2).schema.getFields().get(1).type == DataType.CHARARRAY);
        assertTrue(Kfields.get(2).schema.getFields().get(1).alias.equalsIgnoreCase("email"));
    }
    assertEquals(DataType.BAG, Kfields.get(3).type);
    assertEquals("currently_registered_courses", Kfields.get(3).alias.toLowerCase());
    {
        assertNotNull(Kfields.get(3).schema);
        assertEquals(1, Kfields.get(3).schema.getFields().size());
        assertEquals(DataType.TUPLE, Kfields.get(3).schema.getFields().get(0).type);
        assertNotNull(Kfields.get(3).schema.getFields().get(0).schema);
        assertEquals(1, Kfields.get(3).schema.getFields().get(0).schema.getFields().size());
        assertEquals(DataType.CHARARRAY, Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).type);
    // assertEquals("course",Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase());
    // commented out, because the name becomes "innerfield" by default - we call it "course" in pig,
    // but in the metadata, it'd be anonymous, so this would be autogenerated, which is fine
    }
    assertEquals(DataType.MAP, Kfields.get(4).type);
    assertEquals("current_grades", Kfields.get(4).alias.toLowerCase());
    assertEquals(DataType.BAG, Kfields.get(5).type);
    assertEquals("phnos", Kfields.get(5).alias.toLowerCase());
    {
        assertNotNull(Kfields.get(5).schema);
        assertEquals(1, Kfields.get(5).schema.getFields().size());
        assertEquals(DataType.TUPLE, Kfields.get(5).schema.getFields().get(0).type);
        assertNotNull(Kfields.get(5).schema.getFields().get(0).schema);
        assertTrue(Kfields.get(5).schema.getFields().get(0).schema.getFields().size() == 2);
        assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).type);
        assertEquals("phno", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase());
        assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).type);
        assertEquals("type", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).alias.toLowerCase());
    }
}
Also used : PigServer(org.apache.pig.PigServer) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) FieldSchema(org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema) Test(org.junit.Test) HCatBaseTest(org.apache.hive.hcatalog.mapreduce.HCatBaseTest)

Example 70 with PigServer

use of org.apache.pig.PigServer in project hive by apache.

the class AbstractHCatLoaderTest method setUpTest.

@Before
public void setUpTest() throws Exception {
    createTableDefaultDB(BASIC_TABLE, "a int, b string");
    createTableDefaultDB(COMPLEX_TABLE, "name string, studentid int, " + "contact struct<phno:string,email:string>, " + "currently_registered_courses array<string>, " + "current_grades map<string,string>, " + "phnos array<struct<phno:string,type:string>>");
    createTableDefaultDB(PARTITIONED_TABLE, "a int, b string", "bkt string");
    createTableDefaultDB(SPECIFIC_SIZE_TABLE, "a int, b string");
    createTable(SPECIFIC_DATABASE, SPECIFIC_SIZE_TABLE_2, "a int, b string");
    createTableDefaultDB(PARTITIONED_DATE_TABLE, "b string", "dt date");
    AllTypesTable.setupAllTypesTable(driver);
    int LOOP_SIZE = 3;
    String[] input = new String[LOOP_SIZE * LOOP_SIZE];
    basicInputData = new HashMap<Integer, Pair<Integer, String>>();
    int k = 0;
    for (int i = 1; i <= LOOP_SIZE; i++) {
        String si = i + "";
        for (int j = 1; j <= LOOP_SIZE; j++) {
            String sj = "S" + j + "S";
            input[k] = si + "\t" + sj;
            basicInputData.put(k, new Pair<Integer, String>(i, sj));
            k++;
        }
    }
    HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input);
    HcatTestUtils.createTestDataFile(COMPLEX_FILE_NAME, new String[] { "Henry Jekyll\t42\t(415-253-6367,hjekyll@contemporary.edu.uk)\t{(PHARMACOLOGY),(PSYCHIATRY)}\t[PHARMACOLOGY#A-,PSYCHIATRY#B+]\t{(415-253-6367,cell),(408-253-6367,landline)}", "Edward Hyde\t1337\t(415-253-6367,anonymous@b44chan.org)\t{(CREATIVE_WRITING),(COPYRIGHT_LAW)}\t[CREATIVE_WRITING#A+,COPYRIGHT_LAW#D]\t{(415-253-6367,cell),(408-253-6367,landline)}" });
    HcatTestUtils.createTestDataFile(DATE_FILE_NAME, new String[] { "2016-07-14 08:10:15\tHenry Jekyll", "2016-07-15 11:54:55\tEdward Hyde" });
    PigServer server = createPigServer(false);
    server.setBatchOn();
    int i = 0;
    server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);", ++i);
    server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i);
    server.registerQuery("store A into '" + SPECIFIC_SIZE_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i);
    server.registerQuery("store A into '" + SPECIFIC_DATABASE + "." + SPECIFIC_SIZE_TABLE_2 + "' " + "using org.apache.hive" + ".hcatalog.pig.HCatStorer();", ++i);
    server.registerQuery("B = foreach A generate a,b;", ++i);
    server.registerQuery("B2 = filter B by a < 2;", ++i);
    server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');", ++i);
    server.registerQuery("C = foreach A generate a,b;", ++i);
    server.registerQuery("C2 = filter C by a >= 2;", ++i);
    server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');", ++i);
    server.registerQuery("D = load '" + COMPLEX_FILE_NAME + "' as (name:chararray, studentid:int, contact:tuple(phno:chararray,email:chararray), currently_registered_courses:bag{innertup:tuple(course:chararray)}, current_grades:map[ ] , phnos :bag{innertup:tuple(phno:chararray,type:chararray)});", ++i);
    server.registerQuery("store D into '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i);
    server.registerQuery("E = load '" + DATE_FILE_NAME + "' as (dt:chararray, b:chararray);", ++i);
    server.registerQuery("F = foreach E generate ToDate(dt, 'yyyy-MM-dd HH:mm:ss') as dt, b;", ++i);
    server.registerQuery("store F into '" + PARTITIONED_DATE_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i);
    server.executeBatch();
}
Also used : PigServer(org.apache.pig.PigServer) Pair(org.apache.hive.hcatalog.data.Pair) Before(org.junit.Before)

Aggregations

PigServer (org.apache.pig.PigServer)115 Tuple (org.apache.pig.data.Tuple)74 ArrayList (java.util.ArrayList)70 Test (org.junit.Test)59 HCatBaseTest (org.apache.hive.hcatalog.mapreduce.HCatBaseTest)37 Data (org.apache.pig.builtin.mock.Storage.Data)15 File (java.io.File)14 Schema (org.apache.pig.impl.logicalLayer.schema.Schema)14 FieldSchema (org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema)9 Properties (java.util.Properties)8 Vector (java.util.Vector)8 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)6 Path (org.apache.hadoop.fs.Path)4 FileWriter (java.io.FileWriter)3 List (java.util.List)3 Map (java.util.Map)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Connection (org.apache.hadoop.hbase.client.Connection)3 Pair (org.apache.hive.hcatalog.data.Pair)3 ExecJob (org.apache.pig.backend.executionengine.ExecJob)3