Search in sources :

Example 6 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class TestPigHCatUtil method testGetBagSubSchema.

@Test
public void testGetBagSubSchema() throws Exception {
    // Define the expected schema.
    ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1];
    bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple").setDescription("The tuple in the bag").setType(DataType.TUPLE);
    ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1];
    innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY);
    bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas));
    ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas);
    // Get the actual converted schema.
    HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList(new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null)));
    HCatFieldSchema hCatFieldSchema = new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null);
    ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema);
    Assert.assertEquals(expected.toString(), actual.toString());
}
Also used : ResourceSchema(org.apache.pig.ResourceSchema) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema) Test(org.junit.Test)

Example 7 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class StoreComplex method main.

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();
    String[] otherArgs = new String[1];
    int j = 0;
    for (int i = 0; i < args.length; i++) {
        if (args[i].equals("-libjars")) {
            // generic options parser doesn't seem to work!
            conf.set("tmpjars", args[i + 1]);
            // skip it , the for loop will skip its value
            i = i + 1;
        } else {
            otherArgs[j++] = args[i];
        }
    }
    if (otherArgs.length != 1) {
        usage();
    }
    String serverUri = otherArgs[0];
    String tableName = COMPLEX_TABLE_NAME;
    String dbName = "default";
    Map<String, String> outputPartitionKvps = new HashMap<String, String>();
    String outputTableName = null;
    outputTableName = COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME;
    // test with null or empty randomly
    if (new Random().nextInt(2) == 0) {
        System.err.println("INFO: output partition keys set to null for writing");
        outputPartitionKvps = null;
    }
    String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
    if (principalID != null)
        conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
    Job job = new Job(conf, "storecomplex");
    // initialize HCatInputFormat
    HCatInputFormat.setInput(job, dbName, tableName);
    // initialize HCatOutputFormat
    HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, outputPartitionKvps));
    HCatSchema s = HCatInputFormat.getTableSchema(job);
    HCatOutputFormat.setSchema(job, s);
    job.setInputFormatClass(HCatInputFormat.class);
    job.setOutputFormatClass(HCatOutputFormat.class);
    job.setJarByClass(StoreComplex.class);
    job.setMapperClass(ComplexMapper.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DefaultHCatRecord.class);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Random(java.util.Random) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HashMap(java.util.HashMap) Job(org.apache.hadoop.mapreduce.Job) GenericOptionsParser(org.apache.hadoop.util.GenericOptionsParser)

Example 8 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class TestPassProperties method getSchema.

private HCatSchema getSchema() throws HCatException {
    HCatSchema schema = new HCatSchema(new ArrayList<HCatFieldSchema>());
    schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, ""));
    schema.append(new HCatFieldSchema("a1", HCatFieldSchema.Type.STRING, ""));
    schema.append(new HCatFieldSchema("a2", HCatFieldSchema.Type.STRING, ""));
    return schema;
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Example 9 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class TestHCatPartitionPublish method runMRCreateFail.

void runMRCreateFail(String dbName, String tableName, Map<String, String> partitionValues, List<HCatFieldSchema> columns) throws Exception {
    Job job = new Job(mrConf, "hcat mapreduce write fail test");
    job.setJarByClass(this.getClass());
    job.setMapperClass(TestHCatPartitionPublish.MapFail.class);
    // input/output settings
    job.setInputFormatClass(TextInputFormat.class);
    Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput");
    // The write count does not matter, as the map will fail in its first
    // call.
    createInputFile(path, 5);
    TextInputFormat.setInputPaths(job, path);
    job.setOutputFormatClass(HCatOutputFormat.class);
    OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues);
    HCatOutputFormat.setOutput(job, outputJobInfo);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(DefaultHCatRecord.class);
    job.setNumReduceTasks(0);
    HCatOutputFormat.setSchema(job, new HCatSchema(columns));
    boolean success = job.waitForCompletion(true);
    Assert.assertTrue(success == false);
}
Also used : Path(org.apache.hadoop.fs.Path) HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) Job(org.apache.hadoop.mapreduce.Job)

Example 10 with HCatSchema

use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.

the class HCatBaseInputFormat method getTableSchema.

/**
 * Gets the HCatTable schema for the table specified in the HCatInputFormat.setInput call
 * on the specified job context. This information is available only after HCatInputFormat.setInput
 * has been called for a JobContext.
 * @param conf the Configuration object
 * @return the table schema
 * @throws IOException if HCatInputFormat.setInput has not been called
 *                     for the current context
 */
public static HCatSchema getTableSchema(Configuration conf) throws IOException {
    InputJobInfo inputJobInfo = getJobInfo(conf);
    HCatSchema allCols = new HCatSchema(new LinkedList<HCatFieldSchema>());
    for (HCatFieldSchema field : inputJobInfo.getTableInfo().getDataColumns().getFields()) {
        allCols.append(field);
    }
    for (HCatFieldSchema field : inputJobInfo.getTableInfo().getPartitionColumns().getFields()) {
        allCols.append(field);
    }
    return allCols;
}
Also used : HCatSchema(org.apache.hive.hcatalog.data.schema.HCatSchema) HCatFieldSchema(org.apache.hive.hcatalog.data.schema.HCatFieldSchema)

Aggregations

HCatSchema (org.apache.hive.hcatalog.data.schema.HCatSchema)45 HCatFieldSchema (org.apache.hive.hcatalog.data.schema.HCatFieldSchema)21 Job (org.apache.hadoop.mapreduce.Job)17 ArrayList (java.util.ArrayList)14 Configuration (org.apache.hadoop.conf.Configuration)13 HashMap (java.util.HashMap)10 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)10 IOException (java.io.IOException)8 HCatException (org.apache.hive.hcatalog.common.HCatException)8 Table (org.apache.hadoop.hive.ql.metadata.Table)6 Test (org.junit.Test)6 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)5 Properties (java.util.Properties)4 Path (org.apache.hadoop.fs.Path)4 ResourceSchema (org.apache.pig.ResourceSchema)4 FrontendException (org.apache.pig.impl.logicalLayer.FrontendException)4 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 HCatRecord (org.apache.hive.hcatalog.data.HCatRecord)3 PigException (org.apache.pig.PigException)3 Map (java.util.Map)2