use of org.apache.pig.PigServer in project vespa by vespa-engine.
the class VespaQueryTest method setup.
private PigServer setup(String script, String endpoint) throws Exception {
Configuration conf = new HdfsConfiguration();
Map<String, String> parameters = new HashMap<>();
parameters.put("ENDPOINT", endpoint);
PigServer ps = new PigServer(ExecType.LOCAL, conf);
ps.setBatchOn();
ps.registerScript(script, parameters);
return ps;
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatLoaderTest method testReadDataBasic.
@Test
public void testReadDataBasic() throws IOException {
PigServer server = createPigServer(false);
server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
Iterator<Tuple> XIter = server.openIterator("X");
int numTuplesRead = 0;
while (XIter.hasNext()) {
Tuple t = XIter.next();
assertEquals(2, t.size());
assertNotNull(t.get(0));
assertNotNull(t.get(1));
assertTrue(t.get(0).getClass() == Integer.class);
assertTrue(t.get(1).getClass() == String.class);
assertEquals(t.get(0), basicInputData.get(numTuplesRead).first);
assertEquals(t.get(1), basicInputData.get(numTuplesRead).second);
numTuplesRead++;
}
assertEquals(basicInputData.size(), numTuplesRead);
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatLoaderTest method testSchemaLoadBasic.
@Test
public void testSchemaLoadBasic() throws IOException {
PigServer server = createPigServer(false);
// test that schema was loaded correctly
server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
Schema dumpedXSchema = server.dumpSchema("X");
List<FieldSchema> Xfields = dumpedXSchema.getFields();
assertEquals(2, Xfields.size());
assertTrue(Xfields.get(0).alias.equalsIgnoreCase("a"));
assertTrue(Xfields.get(0).type == DataType.INTEGER);
assertTrue(Xfields.get(1).alias.equalsIgnoreCase("b"));
assertTrue(Xfields.get(1).type == DataType.CHARARRAY);
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatLoaderTest method testSchemaLoadComplex.
@Test
public void testSchemaLoadComplex() throws IOException {
PigServer server = createPigServer(false);
// test that schema was loaded correctly
server.registerQuery("K = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
Schema dumpedKSchema = server.dumpSchema("K");
List<FieldSchema> Kfields = dumpedKSchema.getFields();
assertEquals(6, Kfields.size());
assertEquals(DataType.CHARARRAY, Kfields.get(0).type);
assertEquals("name", Kfields.get(0).alias.toLowerCase());
assertEquals(DataType.INTEGER, Kfields.get(1).type);
assertEquals("studentid", Kfields.get(1).alias.toLowerCase());
assertEquals(DataType.TUPLE, Kfields.get(2).type);
assertEquals("contact", Kfields.get(2).alias.toLowerCase());
{
assertNotNull(Kfields.get(2).schema);
assertTrue(Kfields.get(2).schema.getFields().size() == 2);
assertTrue(Kfields.get(2).schema.getFields().get(0).type == DataType.CHARARRAY);
assertTrue(Kfields.get(2).schema.getFields().get(0).alias.equalsIgnoreCase("phno"));
assertTrue(Kfields.get(2).schema.getFields().get(1).type == DataType.CHARARRAY);
assertTrue(Kfields.get(2).schema.getFields().get(1).alias.equalsIgnoreCase("email"));
}
assertEquals(DataType.BAG, Kfields.get(3).type);
assertEquals("currently_registered_courses", Kfields.get(3).alias.toLowerCase());
{
assertNotNull(Kfields.get(3).schema);
assertEquals(1, Kfields.get(3).schema.getFields().size());
assertEquals(DataType.TUPLE, Kfields.get(3).schema.getFields().get(0).type);
assertNotNull(Kfields.get(3).schema.getFields().get(0).schema);
assertEquals(1, Kfields.get(3).schema.getFields().get(0).schema.getFields().size());
assertEquals(DataType.CHARARRAY, Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).type);
// assertEquals("course",Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase());
// commented out, because the name becomes "innerfield" by default - we call it "course" in pig,
// but in the metadata, it'd be anonymous, so this would be autogenerated, which is fine
}
assertEquals(DataType.MAP, Kfields.get(4).type);
assertEquals("current_grades", Kfields.get(4).alias.toLowerCase());
assertEquals(DataType.BAG, Kfields.get(5).type);
assertEquals("phnos", Kfields.get(5).alias.toLowerCase());
{
assertNotNull(Kfields.get(5).schema);
assertEquals(1, Kfields.get(5).schema.getFields().size());
assertEquals(DataType.TUPLE, Kfields.get(5).schema.getFields().get(0).type);
assertNotNull(Kfields.get(5).schema.getFields().get(0).schema);
assertTrue(Kfields.get(5).schema.getFields().get(0).schema.getFields().size() == 2);
assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).type);
assertEquals("phno", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase());
assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).type);
assertEquals("type", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).alias.toLowerCase());
}
}
use of org.apache.pig.PigServer in project hive by apache.
the class AbstractHCatLoaderTest method setUpTest.
@Before
public void setUpTest() throws Exception {
createTableDefaultDB(BASIC_TABLE, "a int, b string");
createTableDefaultDB(COMPLEX_TABLE, "name string, studentid int, " + "contact struct<phno:string,email:string>, " + "currently_registered_courses array<string>, " + "current_grades map<string,string>, " + "phnos array<struct<phno:string,type:string>>");
createTableDefaultDB(PARTITIONED_TABLE, "a int, b string", "bkt string");
createTableDefaultDB(SPECIFIC_SIZE_TABLE, "a int, b string");
createTable(SPECIFIC_DATABASE, SPECIFIC_SIZE_TABLE_2, "a int, b string");
createTableDefaultDB(PARTITIONED_DATE_TABLE, "b string", "dt date");
AllTypesTable.setupAllTypesTable(driver);
int LOOP_SIZE = 3;
String[] input = new String[LOOP_SIZE * LOOP_SIZE];
basicInputData = new HashMap<Integer, Pair<Integer, String>>();
int k = 0;
for (int i = 1; i <= LOOP_SIZE; i++) {
String si = i + "";
for (int j = 1; j <= LOOP_SIZE; j++) {
String sj = "S" + j + "S";
input[k] = si + "\t" + sj;
basicInputData.put(k, new Pair<Integer, String>(i, sj));
k++;
}
}
HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input);
HcatTestUtils.createTestDataFile(COMPLEX_FILE_NAME, new String[] { "Henry Jekyll\t42\t(415-253-6367,hjekyll@contemporary.edu.uk)\t{(PHARMACOLOGY),(PSYCHIATRY)}\t[PHARMACOLOGY#A-,PSYCHIATRY#B+]\t{(415-253-6367,cell),(408-253-6367,landline)}", "Edward Hyde\t1337\t(415-253-6367,anonymous@b44chan.org)\t{(CREATIVE_WRITING),(COPYRIGHT_LAW)}\t[CREATIVE_WRITING#A+,COPYRIGHT_LAW#D]\t{(415-253-6367,cell),(408-253-6367,landline)}" });
HcatTestUtils.createTestDataFile(DATE_FILE_NAME, new String[] { "2016-07-14 08:10:15\tHenry Jekyll", "2016-07-15 11:54:55\tEdward Hyde" });
PigServer server = createPigServer(false);
server.setBatchOn();
int i = 0;
server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);", ++i);
server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i);
server.registerQuery("store A into '" + SPECIFIC_SIZE_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i);
server.registerQuery("store A into '" + SPECIFIC_DATABASE + "." + SPECIFIC_SIZE_TABLE_2 + "' " + "using org.apache.hive" + ".hcatalog.pig.HCatStorer();", ++i);
server.registerQuery("B = foreach A generate a,b;", ++i);
server.registerQuery("B2 = filter B by a < 2;", ++i);
server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');", ++i);
server.registerQuery("C = foreach A generate a,b;", ++i);
server.registerQuery("C2 = filter C by a >= 2;", ++i);
server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');", ++i);
server.registerQuery("D = load '" + COMPLEX_FILE_NAME + "' as (name:chararray, studentid:int, contact:tuple(phno:chararray,email:chararray), currently_registered_courses:bag{innertup:tuple(course:chararray)}, current_grades:map[ ] , phnos :bag{innertup:tuple(phno:chararray,type:chararray)});", ++i);
server.registerQuery("store D into '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i);
server.registerQuery("E = load '" + DATE_FILE_NAME + "' as (dt:chararray, b:chararray);", ++i);
server.registerQuery("F = foreach E generate ToDate(dt, 'yyyy-MM-dd HH:mm:ss') as dt, b;", ++i);
server.registerQuery("store F into '" + PARTITIONED_DATE_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i);
server.executeBatch();
}
Aggregations