use of org.apache.hadoop.io.BytesWritable in project metron by apache.
the class PartitionHDFSWriter method handle.
public void handle(long ts, byte[] value) throws IOException {
turnoverIfNecessary(ts);
BytesWritable bw = new BytesWritable(value);
try {
writer.append(new LongWritable(ts), bw);
} catch (ArrayIndexOutOfBoundsException aioobe) {
LOG.warn("This appears to be HDFS-7765 (https://issues.apache.org/jira/browse/HDFS-7765), " + "which is an issue with syncing and not problematic: {}", aioobe.getMessage(), aioobe);
}
numWritten++;
if (numWritten % config.getSyncEvery() == 0) {
syncHandler.sync(outputStream);
}
}
use of org.apache.hadoop.io.BytesWritable in project metron by apache.
the class PcapHelperTest method readSamplePackets.
public static List<byte[]> readSamplePackets(String pcapLoc) throws IOException {
SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(), SequenceFile.Reader.file(new Path(pcapLoc)));
List<byte[]> ret = new ArrayList<>();
IntWritable key = new IntWritable();
BytesWritable value = new BytesWritable();
while (reader.next(key, value)) {
byte[] pcapWithHeader = value.copyBytes();
ret.add(pcapWithHeader);
}
return ret;
}
use of org.apache.hadoop.io.BytesWritable in project nutch by apache.
the class CrawlDbReader method processStatJob.
public void processStatJob(String crawlDb, Configuration config, boolean sort) throws IOException, InterruptedException, ClassNotFoundException {
double[] quantiles = { .01, .05, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .95, .99 };
if (config.get("db.stats.score.quantiles") != null) {
List<Double> qs = new ArrayList<>();
for (String s : config.getStrings("db.stats.score.quantiles")) {
try {
double d = Double.parseDouble(s);
if (d >= 0.0 && d <= 1.0) {
qs.add(d);
} else {
LOG.warn("Skipping quantile {} not in range in db.stats.score.quantiles: {}", s);
}
} catch (NumberFormatException e) {
LOG.warn("Skipping bad floating point number {} in db.stats.score.quantiles: {}", s, e.getMessage());
}
quantiles = new double[qs.size()];
int i = 0;
for (Double q : qs) {
quantiles[i++] = q;
}
Arrays.sort(quantiles);
}
}
if (LOG.isInfoEnabled()) {
LOG.info("CrawlDb statistics start: " + crawlDb);
}
TreeMap<String, Writable> stats = processStatJobHelper(crawlDb, config, sort);
if (LOG.isInfoEnabled()) {
LOG.info("Statistics for CrawlDb: " + crawlDb);
LongWritable totalCnt = ((LongWritable) stats.get("T"));
stats.remove("T");
LOG.info("TOTAL urls:\t" + totalCnt.get());
for (Map.Entry<String, Writable> entry : stats.entrySet()) {
String k = entry.getKey();
long value = 0;
double fvalue = 0.0;
byte[] bytesValue = null;
Writable val = entry.getValue();
if (val instanceof LongWritable) {
value = ((LongWritable) val).get();
} else if (val instanceof FloatWritable) {
fvalue = ((FloatWritable) val).get();
} else if (val instanceof BytesWritable) {
bytesValue = ((BytesWritable) val).getBytes();
}
if (k.equals("scn")) {
LOG.info("min score:\t" + fvalue);
} else if (k.equals("scx")) {
LOG.info("max score:\t" + fvalue);
} else if (k.equals("sct")) {
LOG.info("avg score:\t" + (fvalue / totalCnt.get()));
} else if (k.equals("scNaN")) {
LOG.info("score == NaN:\t" + value);
} else if (k.equals("ftn")) {
LOG.info("earliest fetch time:\t" + new Date(1000 * 60 * value));
} else if (k.equals("ftx")) {
LOG.info("latest fetch time:\t" + new Date(1000 * 60 * value));
} else if (k.equals("ftt")) {
LOG.info("avg of fetch times:\t" + new Date(1000 * 60 * (value / totalCnt.get())));
} else if (k.equals("fin")) {
LOG.info("shortest fetch interval:\t{}", TimingUtil.secondsToDaysHMS(value));
} else if (k.equals("fix")) {
LOG.info("longest fetch interval:\t{}", TimingUtil.secondsToDaysHMS(value));
} else if (k.equals("fit")) {
LOG.info("avg fetch interval:\t{}", TimingUtil.secondsToDaysHMS(value / totalCnt.get()));
} else if (k.startsWith("status")) {
String[] st = k.split(" ");
int code = Integer.parseInt(st[1]);
if (st.length > 2)
LOG.info(" " + st[2] + " :\t" + val);
else
LOG.info(st[0] + " " + code + " (" + CrawlDatum.getStatusName((byte) code) + "):\t" + val);
} else if (k.equals("scd")) {
MergingDigest tdigest = MergingDigest.fromBytes(ByteBuffer.wrap(bytesValue));
for (double q : quantiles) {
LOG.info("score quantile {}:\t{}", q, tdigest.quantile(q));
}
} else {
LOG.info(k + ":\t" + val);
}
}
}
if (LOG.isInfoEnabled()) {
LOG.info("CrawlDb statistics: done");
}
}
use of org.apache.hadoop.io.BytesWritable in project nutch by apache.
the class CrawlDbReader method processStatJobHelper.
private TreeMap<String, Writable> processStatJobHelper(String crawlDb, Configuration config, boolean sort) throws IOException, InterruptedException, ClassNotFoundException {
Path tmpFolder = new Path(crawlDb, "stat_tmp" + System.currentTimeMillis());
Job job = NutchJob.getInstance(config);
config = job.getConfiguration();
job.setJobName("stats " + crawlDb);
config.setBoolean("db.reader.stats.sort", sort);
FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setMapperClass(CrawlDbStatMapper.class);
job.setCombinerClass(CrawlDbStatReducer.class);
job.setReducerClass(CrawlDbStatReducer.class);
FileOutputFormat.setOutputPath(job, tmpFolder);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NutchWritable.class);
// https://issues.apache.org/jira/browse/NUTCH-1029
config.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
try {
int complete = job.waitForCompletion(true) ? 0 : 1;
} catch (InterruptedException | ClassNotFoundException e) {
LOG.error(StringUtils.stringifyException(e));
throw e;
}
// reading the result
FileSystem fileSystem = tmpFolder.getFileSystem(config);
SequenceFile.Reader[] readers = SegmentReaderUtil.getReaders(tmpFolder, config);
Text key = new Text();
NutchWritable value = new NutchWritable();
TreeMap<String, Writable> stats = new TreeMap<>();
for (int i = 0; i < readers.length; i++) {
SequenceFile.Reader reader = readers[i];
while (reader.next(key, value)) {
String k = key.toString();
Writable val = stats.get(k);
if (val == null) {
stats.put(k, value.get());
continue;
}
if (k.equals("sc")) {
float min = Float.MAX_VALUE;
float max = Float.MIN_VALUE;
if (stats.containsKey("scn")) {
min = ((FloatWritable) stats.get("scn")).get();
} else {
min = ((FloatWritable) stats.get("sc")).get();
}
if (stats.containsKey("scx")) {
max = ((FloatWritable) stats.get("scx")).get();
} else {
max = ((FloatWritable) stats.get("sc")).get();
}
float fvalue = ((FloatWritable) value.get()).get();
if (min > fvalue) {
min = fvalue;
}
if (max < fvalue) {
max = fvalue;
}
stats.put("scn", new FloatWritable(min));
stats.put("scx", new FloatWritable(max));
} else if (k.equals("ft") || k.equals("fi")) {
long min = Long.MAX_VALUE;
long max = Long.MIN_VALUE;
String minKey = k + "n";
String maxKey = k + "x";
if (stats.containsKey(minKey)) {
min = ((LongWritable) stats.get(minKey)).get();
} else if (stats.containsKey(k)) {
min = ((LongWritable) stats.get(k)).get();
}
if (stats.containsKey(maxKey)) {
max = ((LongWritable) stats.get(maxKey)).get();
} else if (stats.containsKey(k)) {
max = ((LongWritable) stats.get(k)).get();
}
long lvalue = ((LongWritable) value.get()).get();
if (min > lvalue) {
min = lvalue;
}
if (max < lvalue) {
max = lvalue;
}
stats.put(k + "n", new LongWritable(min));
stats.put(k + "x", new LongWritable(max));
} else if (k.equals("sct")) {
FloatWritable fvalue = (FloatWritable) value.get();
((FloatWritable) val).set(((FloatWritable) val).get() + fvalue.get());
} else if (k.equals("scd")) {
MergingDigest tdigest = null;
MergingDigest tdig = MergingDigest.fromBytes(ByteBuffer.wrap(((BytesWritable) value.get()).getBytes()));
if (val instanceof BytesWritable) {
tdigest = MergingDigest.fromBytes(ByteBuffer.wrap(((BytesWritable) val).getBytes()));
tdigest.add(tdig);
} else {
tdigest = tdig;
}
ByteBuffer tdigestBytes = ByteBuffer.allocate(tdigest.smallByteSize());
tdigest.asSmallBytes(tdigestBytes);
stats.put(k, new BytesWritable(tdigestBytes.array()));
} else {
LongWritable lvalue = (LongWritable) value.get();
((LongWritable) val).set(((LongWritable) val).get() + lvalue.get());
}
}
reader.close();
}
// remove score, fetch interval, and fetch time
// (used for min/max calculation)
stats.remove("sc");
stats.remove("fi");
stats.remove("ft");
// removing the tmp folder
fileSystem.delete(tmpFolder, true);
return stats;
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class TestDynamicSerDe method testStructsinStructs.
/**
* Tests map and list null/empty with return nulls *off*.
*/
public void testStructsinStructs() throws Throwable {
try {
Properties schema = new Properties();
// schema.setProperty(serdeConstants.SERIALIZATION_FORMAT,
// org.apache.thrift.protocol.TJSONProtocol.class.getName());
schema.setProperty(serdeConstants.SERIALIZATION_FORMAT, org.apache.thrift.protocol.TBinaryProtocol.class.getName());
schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, "test");
schema.setProperty(serdeConstants.SERIALIZATION_DDL, "struct inner { i32 field1, string field2 },struct test {inner foo, i32 hello, list<string> bye, map<string,i32> another}");
schema.setProperty(serdeConstants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
//
// construct object of above type
//
// construct the inner struct
ArrayList<Object> innerStruct = new ArrayList<Object>();
innerStruct.add(new Integer(22));
innerStruct.add(new String("hello world"));
// construct outer struct
ArrayList<String> bye = new ArrayList<String>();
bye.add("firstString");
bye.add("secondString");
HashMap<String, Integer> another = new HashMap<String, Integer>();
another.put("firstKey", 1);
another.put("secondKey", 2);
ArrayList<Object> struct = new ArrayList<Object>();
struct.add(innerStruct);
struct.add(Integer.valueOf(234));
struct.add(bye);
struct.add(another);
DynamicSerDe serde = new DynamicSerDe();
serde.initialize(new Configuration(), schema);
ObjectInspector oi = serde.getObjectInspector();
// Try to serialize
BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
// Try to deserialize
Object o = serde.deserialize(bytes);
List<?> olist = (List<?>) o;
assertEquals(4, olist.size());
assertEquals(innerStruct, olist.get(0));
assertEquals(new Integer(234), olist.get(1));
assertEquals(bye, olist.get(2));
assertEquals(another, olist.get(3));
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
Aggregations