use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class WriteJson method run.
public int run(String[] args) throws Exception {
Configuration conf = getConf();
args = new GenericOptionsParser(conf, args).getRemainingArgs();
String serverUri = args[0];
String inputTableName = args[1];
String outputTableName = args[2];
String dbName = null;
String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
if (principalID != null)
conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
Job job = new Job(conf, "WriteJson");
HCatInputFormat.setInput(job, dbName, inputTableName);
// initialize HCatOutputFormat
job.setInputFormatClass(HCatInputFormat.class);
job.setJarByClass(WriteJson.class);
job.setMapperClass(Map.class);
job.setOutputKeyClass(WritableComparable.class);
job.setOutputValueClass(DefaultHCatRecord.class);
job.setNumReduceTasks(0);
HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
HCatSchema s = HCatInputFormat.getTableSchema(job);
System.err.println("INFO: output schema explicitly set for writing:" + s);
HCatOutputFormat.setSchema(job, s);
job.setOutputFormatClass(HCatOutputFormat.class);
return (job.waitForCompletion(true) ? 0 : 1);
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class TestHCatInputFormatMethods method testGetPartitionAndDataColumns.
@Test
public void testGetPartitionAndDataColumns() throws Exception {
Configuration conf = new Configuration();
Job myJob = new Job(conf, "hcatTest");
HCatInputFormat.setInput(myJob, "default", "testHCIFMethods");
HCatSchema cols = HCatInputFormat.getDataColumns(myJob.getConfiguration());
Assert.assertTrue(cols.getFields() != null);
Assert.assertEquals(cols.getFields().size(), 2);
Assert.assertTrue(cols.getFields().get(0).getName().equals("a"));
Assert.assertTrue(cols.getFields().get(1).getName().equals("b"));
Assert.assertTrue(cols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING));
Assert.assertTrue(cols.getFields().get(1).getType().equals(HCatFieldSchema.Type.INT));
HCatSchema pcols = HCatInputFormat.getPartitionColumns(myJob.getConfiguration());
Assert.assertTrue(pcols.getFields() != null);
Assert.assertEquals(pcols.getFields().size(), 2);
Assert.assertTrue(pcols.getFields().get(0).getName().equals("x"));
Assert.assertTrue(pcols.getFields().get(1).getName().equals("y"));
Assert.assertTrue(pcols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING));
Assert.assertTrue(pcols.getFields().get(1).getType().equals(HCatFieldSchema.Type.STRING));
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class TestLazyHCatRecord method testGetWithName.
@Test
public void testGetWithName() throws Exception {
TypeInfo ti = getTypeInfo();
HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector(ti));
HCatSchema schema = HCatSchemaUtils.getHCatSchema(ti).get(0).getStructSubSchema();
Assert.assertEquals(INT_CONST, ((Integer) r.get("an_int", schema)).intValue());
Assert.assertEquals(LONG_CONST, ((Long) r.get("a_long", schema)).longValue());
Assert.assertEquals(DOUBLE_CONST, ((Double) r.get("a_double", schema)).doubleValue(), 0);
Assert.assertEquals(STRING_CONST, (String) r.get("a_string", schema));
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project flink by apache.
the class HCatInputFormatBase method getFields.
/**
* Specifies the fields which are returned by the InputFormat and their order.
*
* @param fields The fields and their order which are returned by the InputFormat.
* @return This InputFormat with specified return fields.
* @throws java.io.IOException
*/
public HCatInputFormatBase<T> getFields(String... fields) throws IOException {
// build output schema
ArrayList<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(fields.length);
for (String field : fields) {
fieldSchemas.add(this.outputSchema.get(field));
}
this.outputSchema = new HCatSchema(fieldSchemas);
// update output schema configuration
configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));
return this;
}
use of org.apache.hive.hcatalog.data.schema.HCatSchema in project hive by apache.
the class FosterStorageHandler method configureInputJobProperties.
@Override
public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
try {
Map<String, String> tableProperties = tableDesc.getJobProperties();
String jobInfoProperty = tableProperties.get(HCatConstants.HCAT_KEY_JOB_INFO);
if (jobInfoProperty != null) {
InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobInfoProperty);
HCatTableInfo tableInfo = inputJobInfo.getTableInfo();
HCatSchema dataColumns = tableInfo.getDataColumns();
List<HCatFieldSchema> dataFields = dataColumns.getFields();
StringBuilder columnNamesSb = new StringBuilder();
StringBuilder typeNamesSb = new StringBuilder();
for (HCatFieldSchema dataField : dataFields) {
if (columnNamesSb.length() > 0) {
columnNamesSb.append(",");
typeNamesSb.append(":");
}
columnNamesSb.append(dataField.getName());
typeNamesSb.append(dataField.getTypeString());
}
jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesSb.toString());
jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, typeNamesSb.toString());
boolean isTransactionalTable = AcidUtils.isTablePropertyTransactional(tableProperties);
AcidUtils.AcidOperationalProperties acidOperationalProperties = AcidUtils.getAcidOperationalProperties(tableProperties);
AcidUtils.setAcidOperationalProperties(jobProperties, isTransactionalTable, acidOperationalProperties);
}
} catch (IOException e) {
throw new IllegalStateException("Failed to set output path", e);
}
}
Aggregations