use of org.apache.hadoop.io.VersionMismatchException in project nutch by apache.
the class ParseData method readFields.
public final void readFields(DataInput in) throws IOException {
version = in.readByte();
// incompatible change from UTF8 (version < 5) to Text
if (version != VERSION)
throw new VersionMismatchException(VERSION, version);
status = ParseStatus.read(in);
// read title
title = Text.readString(in);
int numOutlinks = in.readInt();
outlinks = new Outlink[numOutlinks];
for (int i = 0; i < numOutlinks; i++) {
outlinks[i] = Outlink.read(in);
}
if (version < 3) {
// read metadata
int propertyCount = in.readInt();
contentMeta.clear();
for (int i = 0; i < propertyCount; i++) {
contentMeta.add(Text.readString(in), Text.readString(in));
}
} else {
contentMeta.clear();
contentMeta.readFields(in);
}
if (version > 3) {
parseMeta.clear();
parseMeta.readFields(in);
}
}
use of org.apache.hadoop.io.VersionMismatchException in project nutch by apache.
the class Content method readFields.
public final void readFields(DataInput in) throws IOException {
metadata.clear();
int sizeOrVersion = in.readInt();
if (sizeOrVersion < 0) {
// version
version = sizeOrVersion;
switch(version) {
case VERSION:
url = Text.readString(in);
base = Text.readString(in);
content = new byte[in.readInt()];
in.readFully(content);
contentType = Text.readString(in);
metadata.readFields(in);
break;
default:
throw new VersionMismatchException((byte) VERSION, (byte) version);
}
} else {
// size
byte[] compressed = new byte[sizeOrVersion];
in.readFully(compressed, 0, compressed.length);
ByteArrayInputStream deflated = new ByteArrayInputStream(compressed);
DataInput inflater = new DataInputStream(new InflaterInputStream(deflated));
readFieldsCompressed(inflater);
}
}
use of org.apache.hadoop.io.VersionMismatchException in project nutch by apache.
the class CrawlDatum method readFields.
public void readFields(DataInput in) throws IOException {
// read version
byte version = in.readByte();
if (// check version
version > CUR_VERSION)
throw new VersionMismatchException(CUR_VERSION, version);
status = in.readByte();
fetchTime = in.readLong();
retries = in.readByte();
if (version > 5) {
fetchInterval = in.readInt();
} else
fetchInterval = Math.round(in.readFloat());
score = in.readFloat();
if (version > 2) {
modifiedTime = in.readLong();
int cnt = in.readByte();
if (cnt > 0) {
signature = new byte[cnt];
in.readFully(signature);
} else
signature = null;
}
if (version > 3) {
boolean hasMetadata = false;
if (version < 7) {
org.apache.hadoop.io.MapWritable oldMetaData = new org.apache.hadoop.io.MapWritable();
if (in.readBoolean()) {
hasMetadata = true;
metaData = new org.apache.hadoop.io.MapWritable();
oldMetaData.readFields(in);
}
for (Writable key : oldMetaData.keySet()) {
metaData.put(key, oldMetaData.get(key));
}
} else {
if (in.readBoolean()) {
hasMetadata = true;
metaData = new org.apache.hadoop.io.MapWritable();
metaData.readFields(in);
}
}
if (hasMetadata == false)
metaData = null;
}
// translate status codes
if (version < 5) {
if (oldToNew.containsKey(status))
status = oldToNew.get(status);
else
status = STATUS_DB_UNFETCHED;
}
}
Aggregations