use of org.apache.orc.mapred.OrcStruct in project druid by druid-io.
the class OrcHadoopInputRowParserTest method testDate1900.
@Test
public void testDate1900() throws IOException {
/*
TestOrcFile.testDate1900.orc
struct<time:timestamp,date:date>
{1900-05-05 12:34:56.1, 1900-12-25}
*/
HadoopDruidIndexerConfig config = loadHadoopDruidIndexerConfig("example/testDate1900_hadoop_job.json");
Job job = Job.getInstance(new Configuration());
config.intoConfiguration(job);
OrcStruct data = getFirstRow(job, ((StaticPathSpec) config.getPathSpec()).getPaths());
List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
Assert.assertEquals(1, rows.get(0).getDimensions().size());
Assert.assertEquals("1900-12-25T00:00:00.000Z", rows.get(0).getDimension("date").get(0));
Assert.assertEquals(DateTimes.of("1900-05-05T12:34:56.1Z"), rows.get(0).getTimestamp());
}
use of org.apache.orc.mapred.OrcStruct in project druid by druid-io.
the class OrcHadoopInputRowParserTest method testTest2.
@Test
public void testTest2() throws IOException {
HadoopDruidIndexerConfig config = loadHadoopDruidIndexerConfig("example/test_2_hadoop_job.json");
Job job = Job.getInstance(new Configuration());
config.intoConfiguration(job);
/*
test_2.orc
struct<timestamp:string,col1:string,col2:array<string>,col3:float,col4:bigint,col5:decimal,col6:array<string>,col7:map<string,string>>
{2016-01-01, bar, [dat1, dat2, dat3], 1.1, 2, 3.5, [], {subcol7=subval7}}
*/
OrcStruct data = getFirstRow(job, ((StaticPathSpec) config.getPathSpec()).getPaths());
List<InputRow> rows = (List<InputRow>) config.getParser().parseBatch(data);
Assert.assertEquals(7, rows.get(0).getDimensions().size());
Assert.assertEquals("bar", rows.get(0).getDimension("col1").get(0));
Assert.assertEquals("dat1", rows.get(0).getDimension("col2").get(0));
Assert.assertEquals("dat2", rows.get(0).getDimension("col2").get(1));
Assert.assertEquals("dat3", rows.get(0).getDimension("col2").get(2));
Assert.assertEquals(1.1f, rows.get(0).getRaw("col3"));
Assert.assertEquals(2L, rows.get(0).getRaw("col4"));
Assert.assertEquals(3.5d, rows.get(0).getRaw("col5"));
Assert.assertEquals(ImmutableList.of(), rows.get(0).getRaw("col6"));
Assert.assertEquals("subval7", rows.get(0).getRaw("col7-subcol7"));
}
use of org.apache.orc.mapred.OrcStruct in project druid by druid-io.
the class OrcReader method intermediateRowIterator.
@Override
protected CloseableIterator<OrcStruct> intermediateRowIterator() throws IOException {
final Closer closer = Closer.create();
// We fetch here to cache a copy locally. However, this might need to be changed if we want to split an orc file
// into several InputSplits in the future.
final byte[] buffer = new byte[InputEntity.DEFAULT_FETCH_BUFFER_SIZE];
final CleanableFile file = closer.register(source.fetch(temporaryDirectory, buffer));
final Path path = new Path(file.file().toURI());
final ClassLoader currentClassLoader = Thread.currentThread().getContextClassLoader();
final Reader reader;
try {
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
reader = closer.register(OrcFile.createReader(path, OrcFile.readerOptions(conf)));
} finally {
Thread.currentThread().setContextClassLoader(currentClassLoader);
}
// The below line will get the schmea to read the whole columns.
// This can be improved by projecting some columns only what users want in the future.
final TypeDescription schema = reader.getSchema();
final RecordReader batchReader = reader.rows(reader.options());
final OrcMapredRecordReader<OrcStruct> recordReader = new OrcMapredRecordReader<>(batchReader, schema);
closer.register(recordReader::close);
return new CloseableIterator<OrcStruct>() {
final NullWritable key = recordReader.createKey();
OrcStruct value = null;
@Override
public boolean hasNext() {
if (value == null) {
try {
// The returned OrcStruct in next() can be kept in memory for a while.
// Here, we create a new instance of OrcStruct before calling RecordReader.next(),
// so that we can avoid to share the same reference to the "value" across rows.
value = recordReader.createValue();
if (!recordReader.next(key, value)) {
value = null;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
return value != null;
}
@Override
public OrcStruct next() {
if (value == null) {
throw new NoSuchElementException();
}
final OrcStruct currentValue = value;
value = null;
return currentValue;
}
@Override
public void close() throws IOException {
closer.close();
}
};
}
use of org.apache.orc.mapred.OrcStruct in project druid by druid-io.
the class OrcStructConverterTest method testConvertRootFieldWithUnknownFieldNameReturningNull.
@Test
public void testConvertRootFieldWithUnknownFieldNameReturningNull() {
final Map<String, TypeDescription> types = new HashMap<>();
types.put("int", TypeDescription.createInt());
final TypeDescription schema = createRootSchema(types);
final OrcStruct orcStruct = new OrcStruct(schema);
orcStruct.setFieldValue("int", new IntWritable(1024));
final OrcStructConverter converter = new OrcStructConverter(false);
assertNullValue(converter, orcStruct, "unknownField");
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcCompactionTaskTest method basicTestWithRecompactionAndBasicSchemaEvolution.
@Test
public void basicTestWithRecompactionAndBasicSchemaEvolution() throws Exception {
File basePath = Files.createTempDir();
basePath.deleteOnExit();
String minutelyPath = "Identity/MemberAccount/minutely/2017/04/03/10/20_30/run_2017-04-03-10-20";
String hourlyPath = "Identity/MemberAccount/hourly/2017/04/03/10/";
File jobDir = new File(basePath, minutelyPath);
Assert.assertTrue(jobDir.mkdirs());
// Writing some basic ORC files
createTestingData(jobDir);
// Writing an additional file with ** evolved schema **.
TypeDescription evolvedSchema = TypeDescription.fromString("struct<i:int,j:int,k:int>");
OrcStruct orcStruct_4 = (OrcStruct) OrcStruct.createValue(evolvedSchema);
orcStruct_4.setFieldValue("i", new IntWritable(5));
orcStruct_4.setFieldValue("j", new IntWritable(6));
orcStruct_4.setFieldValue("k", new IntWritable(7));
File file_2 = new File(jobDir, "file_2.1." + extensionName);
writeOrcRecordsInFile(new Path(file_2.getAbsolutePath()), evolvedSchema, ImmutableList.of(orcStruct_4));
// Make this is the newest.
file_2.setLastModified(Long.MAX_VALUE);
// Verify execution
// Overwrite the job configurator factory key.
EmbeddedGobblin embeddedGobblin = createEmbeddedGobblinCompactionJob("basic", basePath.getAbsolutePath()).setConfiguration(CompactionJobConfigurator.COMPACTION_JOB_CONFIGURATOR_FACTORY_CLASS_KEY, TestCompactionOrcJobConfigurator.Factory.class.getName()).setConfiguration(COMPACTION_OUTPUT_EXTENSION, extensionName).setConfiguration(COMPACTION_LATEDATA_THRESHOLD_FOR_RECOMPACT_PER_DATASET, "Identity.*:0.1");
JobExecutionResult execution = embeddedGobblin.run();
Assert.assertTrue(execution.isSuccessful());
// Result verification
File outputDir = new File(basePath, hourlyPath);
FileSystem fs = FileSystem.getLocal(new Configuration());
List<FileStatus> statuses = new ArrayList<>();
reloadFolder(statuses, outputDir, fs);
Assert.assertTrue(statuses.size() == 1);
List<OrcStruct> result = readOrcFile(statuses.get(0).getPath());
Assert.assertEquals(result.size(), 4);
Assert.assertEquals(result.get(0).getFieldValue("i"), new IntWritable(1));
Assert.assertEquals(result.get(0).getFieldValue("j"), new IntWritable(2));
Assert.assertNull(result.get(0).getFieldValue("k"));
Assert.assertEquals(result.get(1).getFieldValue("i"), new IntWritable(2));
Assert.assertEquals(result.get(1).getFieldValue("j"), new IntWritable(3));
Assert.assertNull(result.get(1).getFieldValue("k"));
Assert.assertEquals(result.get(2).getFieldValue("i"), new IntWritable(4));
Assert.assertEquals(result.get(2).getFieldValue("j"), new IntWritable(5));
Assert.assertNull(result.get(2).getFieldValue("k"));
Assert.assertEquals(result.get(3).getFieldValue("i"), new IntWritable(5));
Assert.assertEquals(result.get(3).getFieldValue("j"), new IntWritable(6));
Assert.assertEquals(result.get(3).getFieldValue("k"), new IntWritable(7));
// Adding new .orc file into the directory and verify if re-compaction is triggered.
File file_late = new File(jobDir, "file_late.1." + extensionName);
OrcStruct orcStruct_5 = (OrcStruct) OrcStruct.createValue(evolvedSchema);
orcStruct_5.setFieldValue("i", new IntWritable(10));
orcStruct_5.setFieldValue("j", new IntWritable(11));
orcStruct_5.setFieldValue("k", new IntWritable(12));
writeOrcRecordsInFile(new Path(file_late.getAbsolutePath()), evolvedSchema, ImmutableList.of(orcStruct_5));
execution = embeddedGobblin.run();
Assert.assertTrue(execution.isSuccessful());
reloadFolder(statuses, outputDir, fs);
result = readOrcFile(statuses.get(0).getPath());
// Note previous execution's inspection gives 4 result, given re-compaction, this should gives 1 late-record more.
Assert.assertEquals(result.size(), 4 + 1);
}
Aggregations